<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Energy Res.</journal-id>
<journal-title>Frontiers in Energy Research</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Energy Res.</abbrev-journal-title>
<issn pub-type="epub">2296-598X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1494164</article-id>
<article-id pub-id-type="doi">10.3389/fenrg.2024.1494164</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Energy Research</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>A deep reinforcement learning-based approach for cyber resilient demand response optimization</article-title>
<alt-title alt-title-type="left-running-head">Sinha et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fenrg.2024.1494164">10.3389/fenrg.2024.1494164</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Sinha</surname>
<given-names>Ayush</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1122188/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Vyas</surname>
<given-names>Ranjana</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Alasali</surname>
<given-names>Feras</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1934289/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Holderbaum</surname>
<given-names>William</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1936281/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Vyas</surname>
<given-names>O. P.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of IT</institution>, <institution>Indian Institute of Information Technology</institution>, <addr-line>Allahabad</addr-line>, <country>India</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Electrical Engineering</institution>, <institution>Faculty of Engineering</institution>, <institution>The Hashemite University</institution>, <addr-line>Zarqa</addr-line>, <country>Jordan</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>School of Science, Engineering Environment</institution>, <institution>University of Salford</institution>, <addr-line>Salford</addr-line>, <country>United Kingdom</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/802762/overview">M. Premkumar</ext-link>, Dayananda Sagar College of Engineering, India</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1371498/overview">Ziming Yan</ext-link>, Nanyang Technological University, Singapore</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1408940/overview">Lefeng Cheng</ext-link>, Guangzhou University, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Ayush Sinha, <email>pro.ayush@iiita.ac.in</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>30</day>
<month>01</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>12</volume>
<elocation-id>1494164</elocation-id>
<history>
<date date-type="received">
<day>10</day>
<month>09</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>30</day>
<month>12</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Sinha, Vyas, Alasali, Holderbaum and Vyas.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Sinha, Vyas, Alasali, Holderbaum and Vyas</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>The contemporary smart grid infrastructure, characterized by its bidirectional communication capabilities between prosumers and utility organizations, has revolutionized the efficient execution of fine-grain computational tasks. Ensuring the uninterrupted delivery of power, even in the face of unforeseen contingencies, stands as a paramount concern for utility companies. Peak load forecasting, load balancing, and robust cyberattack detection and prevention mechanisms are integral components in achieving grid reliability. This research endeavors to advance peak load forecasting strategies and demand response optimization at the microgrid level, thereby enhancing grid reliability through the application of Deep Reinforcement Learning (DRL) techniques. Additionally, it investigates the ongoing threat of false data injection attacks. By synergizing these two critical investigations and implementing a novel framework and defense mechanism, this paper proposes a comprehensive approach to fortify the smart grid&#x2019;s reliability and security. The envisioned framework not only refines demand response (DR) optimization but also bolsters the grid&#x2019;s resilience in the face of the everevolving cyber threat landscape. The research outcomes showcase the practicality and effectiveness of the proposed framework, substantiated through extensive experimentation conducted on IEEE-3, IEEE-9, IEEE-14, and IEEE-33 bus systems.</p>
</abstract>
<kwd-group>
<kwd>smart grid architecture</kwd>
<kwd>load forecasting</kwd>
<kwd>demand response</kwd>
<kwd>load profiling</kwd>
<kwd>smart grid resilience</kwd>
<kwd>FDI attack</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Smart Grids</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>The conventional design of the power network has advanced in sophisticated ways since its unique inception when a central framework regulated energy creation and distribution. The advent of innovations for Internet communication in this domain brought a shift toward a more interconnected, intelligent, and dynamic nature of the grid model, known as the Smart Grid (SG). Its fundamental advantage is two-way data communication, through which information can be exchanged between the client (i.e., a smart meter) and the power company, thus making it appropriate to play out a sophisticated power consumption metering (<xref ref-type="bibr" rid="B49">Mohassel et al., 2014</xref>). This allows the user to partake in programs that decrease power use when energy costs rise and allow the user him/her to sell the power produced at home (e.g., utilizing solar energy installations). This technology can also be leveraged by the power company to enhance the supply and demand of electricity by managing power generation and distribution in real-time, enabling power operators and administrators to anticipate periods of high demand and prevent scenarios of blackouts.</p>
<p>For this, the data collection is done through sophisticated advanced metering infrastructure (AMI) in aggregation with meter data management systems (MDMS). The data collection needs information technology-enabled industrial equipment. From one viewpoint, the power company is utilizing the supervisory control and data acquisition (SCADA) frameworks to deploy machines that continuously sense the energy generation and demand of numerous consumers. This incorporates, for instance, the programmable logic controllers (PLCs) and remote terminal units (RTUs) that are available in the substations spread over the wide area network (WAN) of the smart grid. From another viewpoint, support for the MDMS techniques involves interconnecting these modern resources with outside networks (e.g., the Web) and technical advances (e.g., distributed computing and the cloud) to go through additional information investigation and support demand response (DR).</p>
<p>The growing connection of SCADA systems that used to work separately has increased the number of online security risks, in this case, (<xref ref-type="bibr" rid="B86">Upadhyay and Sampalli, 2020</xref>). The main reason behind complex attacks are more likely to target multiple nodes in the control network over a long period of time. The presence of these attacks can harm the smart grid infrastructure and risk the accessibility of utility machines, which converts into scenarios responsible for holding the power supply and is likely to introduce power outages in the network (<xref ref-type="bibr" rid="B65">Romanenko et al., 2020</xref>). In a similar aspect, security measures should likewise be inducted to save the accessibility of the power supply in situations like high demand (that may likewise be incited on purpose), thus staying away from blackouts (<xref ref-type="bibr" rid="B44">Lopez et al., 2018</xref>).</p>
<p>In connection with the situation mentioned above, the reliability and security of the SG infrastructure are critical phenomena. It can be investigated by analyzing the resilience of SG (<xref ref-type="bibr" rid="B71">Singh and Govindarasu, 2020</xref>). The authors in (<xref ref-type="bibr" rid="B18">Clark and Zonouz, 2019</xref>) stated that the resilience of the SG focuses on (I) assurance for the full corrective measures of the core functionalities of the SG despite continuous ill-disposed mischievous activities and attacks. As a boundary condition, some non-core functionalities may be affected for the time being. (II) Ensured recovery of the crucial activity of the influenced sub-functionalities inside a predefined cost limit called the resilience limit. So, to analyze the stability of SG in terms of safety and security, it is imperative to not only study cyber security and DR but also explore the interdependence between them and how they contribute towards the resilience measure of SG. For this reason, the protection of SG infrastructure from such undesired actions with mischievous intentions is an emerging research area (<xref ref-type="bibr" rid="B20">Cybersecurity, 2018</xref>), for the government (UsEnergy. U)- (InEnergy), and international agencies like the National Institute of Standards and Technology (NIST) (<xref ref-type="bibr" rid="B20">Cybersecurity, 2018</xref>) and the European Union Agency for Cybersecurity (ENISA) (EuGovernment).</p>
<p>There is a lack of academic research on smart grid reliability as it concerns to users, and further investigation on this subject is necessary (<xref ref-type="bibr" rid="B12">Balali et al., 2023</xref>) and (<xref ref-type="bibr" rid="B14">Bohra and Anvari-Moghaddam, 2022</xref>). To understand the criteria for judging the Reliability and Security Aspect of SG, the authors have proposed in (<xref ref-type="bibr" rid="B47">Mashal et al., 2023</xref>) as three main factors: (1) The &#x201c;Network System&#x201d; criterion is all about the needs and standards of the communication network system. (2) The &#x201c;Big Data&#x201d; criterion shows the features and traits of handling large amounts of data. (3) The &#x201c;Grid efficiency&#x201d; measure checks how well the smart grid works as presented in <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Smart grid reliability criteria.</p>
</caption>
<graphic xlink:href="fenrg-12-1494164-g001.tif"/>
</fig>
<p>In <xref ref-type="bibr" rid="B47">Mashal et al., (2023)</xref>, authors proposed the problem of evaluating the reliability of smart grids as a Multiple Criteria Decision Making (MCDM) problem in order to investigate the elements that influence it. With the help of expert opinion and MCDA approach, the authors proposed a overall rank for criteria and subcriteria as mentioned in <xref ref-type="fig" rid="F1">Figure 1</xref>. The below figure presents the rang and weightage of subcriteria, <xref ref-type="fig" rid="F2">Figure 2</xref>. From this figure, it is evident that for Big Data handling criteria, Privacy and Analytics are two important sub-criteria, for Grid Efficiency point of view Interoperability, Availability and Self-Healing are an important sub criteria and finally for Smart Grid Network System aspect, Network Cyber Security and Delay are highly ranked sub-criteria. The present work is mostly based on the resilience mechanism defined by the NIST report (<xref ref-type="bibr" rid="B66">Ross et al., 2019</xref>). The main contributions of the paper are as follows:<list list-type="simple">
<list-item>
<p>1) Propose a framework for SG with demand response optimization and cyber events handling mechanism.</p>
</list-item>
<list-item>
<p>2) As mentioned above in <xref ref-type="fig" rid="F2">Figure 2</xref>, the seven sub-criteria are explored through this work which are as Privacy, Analytics, Interoperability, Availability, Self-Healing, Cyber Security and Delay.</p>
</list-item>
<list-item>
<p>3) Present the design and execution of a detection mechanism for cyber events, thus ensuring the security of SG.</p>
</list-item>
<list-item>
<p>4) Address the wellbeing of the critical SG assets by carrying out a DR balance mechanism that allows a crucial energy supply for the whole SG, expediting the expectation of future utilization patterns.</p>
</list-item>
<list-item>
<p>5) The proposed framework demonstrate how the ideas of DR and cyber security with resilience are intrinsically related.</p>
</list-item>
</list>
</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Global weights and ranking for sub-criteria -smart grid reliability.</p>
</caption>
<graphic xlink:href="fenrg-12-1494164-g002.tif"/>
</fig>
<p>The rest of the paper&#x2019;s structural flow follows: <xref ref-type="sec" rid="s2">Section 2</xref> describes the background and related work. It also describes the DR mechanism and the optimal method to ensure its smooth functioning, along with the relationship between DR and SG resilience. It also depicts the necessity of cyber security and its post-attack handling scenarios. Further, <xref ref-type="sec" rid="s3">Section 3</xref> presents the proposed framework as a solution for the problem formulated. To validate the effectiveness of the proposed framework, <xref ref-type="sec" rid="s4">Section 4</xref> describes the data used for the experiment, and further, <xref ref-type="sec" rid="s5">Section 5</xref> shows the usability of the DR and cyber security for the safety and security of SG from the resilience perspective. Finally, Section 6 is for conclusive remarks and future scope of the present work.</p>
</sec>
<sec id="s2">
<title>2 Background work</title>
<p>As per the NIST, the definition of the SG is the power delivery infrastructure based on integrating and amalgamating different smart computing and communication technologies with intelligent services. The ENISA also considers SG as an intelligent energy infrastructure with two-way communication capability for consumers and producers with smart components like Advance Metering Infrastructure (AMI). Throughout the world, the arrangement and activity of power infrastructure foundations are, by and large, dependent on security and sufficiency necessities. These principles permit the framework construction to withstand dangers to supply consumer requests with a great and negligible disruption throughout a period. Due to environmental change, the number and seriousness of natural disasters like tempests, droughts, and floods have been observed in many countries. In 2012, the northeastern territories of the USA were impacted by a hurricane that annihilated around 100,000 electrical wires. Around 7 million people were affected by a power cut as a result of this event. As per the authors (<xref ref-type="bibr" rid="B60">Panteli and Mancarella, 2015</xref>), the impact of severe weather events is expected to increase due to higher greenhouse gas concentrations. Such contingencies emphasize the urgency and importance of making the power grid smarter and more intelligent enough to withstand these catastrophic circumstances that also impact social life.</p>
<p>Along with natural disasters, cyber security is also one of the major concerns for SG&#x2019;s safe and smooth operation. In (<xref ref-type="bibr" rid="B60">Panteli and Mancarella, 2015</xref>), resilience is defined as &#x201c;<italic>the ability of a system to withstand, absorb, and rapidly recover from an external, high-impact, low-probability devastating event, like an extreme weather event or a cyber attack</italic>&#x201d;. A resilient infrastructure can restore and recover from such a damaging situation within an acceptable time frame. Many researchers have defined the resilience concept from a critical infrastructure perspective. For example, authors in <xref ref-type="bibr" rid="B52">Mousavizadeh et al. (2018)</xref> defined resilience as the ability to recover and restore the system against extreme catastrophic events. The definition has covered both active and passive concepts. One of the important factors to consider while determining the reliability and stability of SG is the consideration of insider attack scenarios, as explained by (<xref ref-type="bibr" rid="B70">Singh et al., 2021</xref>). It is shown that if an attacker already has access to the system as an insider, this access can be used to launch attacks that are more difficult to detect and prevent. Another work by <xref ref-type="bibr" rid="B17">Cheng and Yu (2019)</xref> shows how the AI 2.0, driven by data, will speed up the growth of smart energy and electric power systems (Smart EEPS). In this version of AI, machine learning (ML) is a key method that analyzes large amounts of real and simulated data to predict outcomes, make judgments, and help people make better decisions.</p>
<p>The SG infrastructure&#x2019;s intelligent communication and decision-making system components make its resilience more dependent on the underlying distribution network. For example, in <xref ref-type="bibr" rid="B39">Li et al. (2017)</xref>, in normal mode, the SG may not have any Micro Grid (MG) formation. However, after disturbing events or partial blackouts, the same SG has one or more MG formations based on the resilience measures taken by the operator. With the proper utilization of DR in the MG system, load scheduling can be achieved efficiently by detecting anomalies in the system. Another research work in <xref ref-type="bibr" rid="B26">Fleschutz et al. (2021)</xref> highlights the price-based demand response (PBDR) system that is attributed to good economic and environmental aspects. With the analysis of carbon emissions based on the PBDR system, the authors have established that the PBDR system is good for economic and environmental aspects. The aim of another work, <xref ref-type="bibr" rid="B16">Chen et al. (2015)</xref>, is to suggest a new method for managing power distribution systems during outages. The suggested strategy entails building numerous microgrids that are live-connected to the radial distribution system and powered by distributed generators (DG). This allows for the restoration of critical loads in a timely manner, thus making the SG more resilient (<xref ref-type="fig" rid="F3">Figure 3</xref>).</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Attack resilient smart grid functioning.</p>
</caption>
<graphic xlink:href="fenrg-12-1494164-g003.tif"/>
</fig>
<p>In addition to the role of these advancements in day-to-day operator activity, they give greater adaptability to power grid utility in the extreme possibility conditions in which electrical lines are harmed or association with the upstream SG network is disturbed. This issue has constrained network operators to make an inescapable arrangement for the resilient operation of the SG in extreme conditions like technical issues, natural disasters, and man-made issues that cause irrecoverable losses. Hence, the occurrence of severe contingency conditions is a prominent issue. Consequently, advancing an appropriate procedure to decrease the adverse consequences of this issue on the SG network has become vital. Up to now, considerable research has been done in the context of both normal and contingency situations of SG. Therefore, the present section represents the resilience measures of SG as per the NIST (<xref ref-type="bibr" rid="B66">Ross et al., 2019</xref>) framework, which is as follows:<list list-type="simple">
<list-item>
<p>1) Adaptive response</p>
</list-item>
<list-item>
<p>2) Segmentation</p>
</list-item>
<list-item>
<p>3) Redundancy</p>
</list-item>
<list-item>
<p>4) Diversity</p>
</list-item>
<list-item>
<p>5) Deception</p>
</list-item>
</list>
</p>
<p>For more detailed explanation, the present section has been divided in to two segments as SG Resilience in context to Cyber Resilience and Optimized Demand Response, respectively.</p>
<sec id="s2-1">
<title>2.1 SG resilience: cyber resilience</title>
<p>NIST has published a report with a special focus on cyber resilience, &#x201c;<italic>Developing Cyber Resilient Systems: A Systems Security Engineering Approach</italic>&#x201d;, (<xref ref-type="bibr" rid="B66">Ross et al., 2019</xref>). It includes different verticals: adaptive response, segmentation, redundancy, diversity, and deception. Each of these (<xref ref-type="fig" rid="F4">Figure 4</xref>) has standard procedures and best practices with the objective of making attack-resilient systems. A more detailed explanation of the NIST resilient mechanism is as follows:<list list-type="simple">
<list-item>
<p>1) Adaptive response: This method entails a prompt and suitable response to a cyberattack by changing specific system elements to change their functionality or adjust the resource allocation. The system must continue to function while these changes are implemented.</p>
</list-item>
<list-item>
<p>2) Segmentation: It prioritizes activities and resources based on their importance and reliability to identify and secure the most attractive or susceptible ones. Segmentation can happen either manually or automatically while the system is running.</p>
</list-item>
<list-item>
<p>3) Redundancy: It embraces the existence of numerous, secure instances of critical components, including hardware, data, and functions (referred to as &#x201c;replicas&#x201d;), eliminates single points of failure, and enables the system to continue functioning even after a successful cyber attack. The retention of additional, alternate communication resources is another definition of redundancy. At this point, replicas must stay in sync.</p>
</list-item>
<list-item>
<p>4) Diversity: This strategy uses heterogeneity in terms of architecture, design, or technology to make it more difficult for attackers to take advantage of widespread vulnerabilities.</p>
</list-item>
<list-item>
<p>5) Deception: It is carried out by concealing crucial resources, knowingly disseminating false information, or leading attackers in the wrong direction to ripoffs of the genuine system components. Even when they have gained in, it may be able to stop them from seriously harming the system.</p>
</list-item>
</list>
</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>NIST Cyber Resilience technique (<xref ref-type="bibr" rid="B66">Ross et al., 2019</xref>)</p>
</caption>
<graphic xlink:href="fenrg-12-1494164-g004.tif"/>
</fig>
<p>Another factor important for the resilience of SG, as analyzed in <xref ref-type="bibr" rid="B2">Adepu et al. (2020)</xref>, is weaknesses in both the network infrastructure and the processes that control the smart grid. It was explained that these overlooked, common vulnerabilities can be effectively used to attack smart grids. This means that they showed that even vulnerabilities that are known to exist can be used to attack smart grids if they are not properly addressed. The authors in <xref ref-type="bibr" rid="B2">Adepu et al. (2020)</xref>, specifically mentioned that distribution systems with multiple energy sources are particularly vulnerable to attack. This is because these systems are more complex and therefore have more potential vulnerabilities. Another important contribution towards the cyber resilience of SG has been mentioned in a special publication of NIST (<xref ref-type="bibr" rid="B66">Ross et al., 2019</xref>) and pointed out that SG are vulnerable to cyber attacks due to their use of heterogeneous communication technologies and their distributed nature. Further work on a similar line is being done by <xref ref-type="bibr" rid="B77">Syrmakesis et al. (2022)</xref> and shows that while preventing or detecting cyber attacks is a well-studied field of research, making SG more resilient against such threats is a challenging task. The article, (<xref ref-type="bibr" rid="B11">Babar et al., 2020</xref>), presents the implementation of a safe demand-side management system in the smart grid. This system utilizes machine learning and IoT techniques to accurately identify dishonest entities within the grid. Authors in <xref ref-type="bibr" rid="B82">Tebekaemi and Wijesekera (2019)</xref>, introduced the secure overlay communication model as a means to distribute the operation and control of smart grids. This model includes a technique for detecting attacks that modify data. In another work as <xref ref-type="bibr" rid="B24">e Sousa et al. (2022)</xref>, the authors addressed the identification of load-altering attacks, which have the potential to disrupt network stability, by employing linear matrix inequality optimization techniques. The study conducted by <xref ref-type="bibr" rid="B76">Srivastava and Parida (2022)</xref> focuses on identifying and isolating potential problems in AC microgrids using a machine-learning technique. The ramifications of injecting false data on the functioning of intelligent power distribution networks have been examined in the study conducted by <xref ref-type="bibr" rid="B15">Cao et al. (2022)</xref>. A brief reason of deep reinforcement learning (DRL) suitability for false data injection (FDI) attack detection compared to traditional methods are as:<list list-type="simple">
<list-item>
<p>1) Adapts to Changing Conditions: Smart grids are constantly changing, with different power demands, weather conditions, and operational challenges. Traditional detection methods often rely on fixed thresholds or static patterns, which can struggle to keep up with these changes. DRL, on the other hand, learns and adapts in real-time. It can adjust to new situations as they happen, making it better suited for environments like smart grids where things are always shifting. This adaptability makes DRL more resilient to new, unexpected types of attacks.</p>
</list-item>
<list-item>
<p>2) Tracks Attacks Over Time FDI attacks usually do not happen all at once; attackers often inject false data in small doses over time to gradually influence the system. DRL is great at handling these kinds of &#x201c;sequential&#x201d; tasks. It considers the long-term impact of each data point, so it is able to detect patterns or small, ongoing changes that might indicate an attack in progress. Traditional methods might miss these subtle, accumulating signs of trouble since they often analyze data in isolated snapshots.</p>
</list-item>
<list-item>
<p>3) Learns Complex Patterns Automatically The data in smart grids can be pretty complex&#x2014;things like voltage, power flows, and load data are all interrelated, and it is hard to manually define features to capture every subtle pattern or anomaly. DRL uses deep neural networks to automatically learn relevant features from this data. It is like having a model that can see subtle signs of unusual behavior without needing an expert to pre-program every possible pattern. This ability to learn what matters directly from the data gives DRL an edge in spotting the tricky, hidden signs of an FDI attack.</p>
</list-item>
<list-item>
<p>4) Fast, Real-Time Detection Traditional detection methods can sometimes be slower or less responsive. They may need time to process batches of data or use fixed rules that might not respond fast enough.&#x2009;DRL models are designed to make quick decisions. They&#x2019;re built for real-time detection, so as soon as they spot something suspicious, they can flag it. This is especially important in large smart grids, where attacks need to be detected and stopped quickly to prevent damage.</p>
</list-item>
<list-item>
<p>5) Learns to Recognize Harmful Attacks Not all FDI attacks are equally harmful. Some might cause minor disruptions, while others could lead to serious issues like blackouts. DRL can be trained to recognize the attacks that are most dangerous. By using a reward system where it &#x201c;learns&#x201d; to avoid actions that lead to instability, DRL models become better at prioritizing serious threats over harmless anomalies. This way, it can focus on detecting the attacks that really matter.</p>
</list-item>
<list-item>
<p>6) Handles New, Evolving Attack Strategies Traditional methods often rely on known patterns or signatures, making them good at detecting familiar attack types but less effective against new or modified ones. DRL, however, can generalize better to new types of attacks. By training on a wide range of scenarios in a simulated environment, it learns what normal and abnormal behavior look like, even if it has not seen a specific attack before. This makes it more robust and able to catch innovative or evolving attack strategies.</p>
</list-item>
<list-item>
<p>7) Can Keep Improving Over Time Traditional detection methods are usually static&#x2014;once trained, they do not change unless they&#x2019;re retrained on new data, which can be a lengthy process. DRL can be set up to keep learning continuously, adapting as new data comes in. This ongoing learning process helps it stay effective as the grid evolves, whether due to seasonal changes, new infrastructure, or changing consumer behaviors.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s2-2">
<title>2.2 SG resilience: optimized demand response</title>
<p>Because of the new advancements in the modernization of the power grid framework, distributed energy resources (DERs) are presently essential in providing DR interest in various conditions. It needs cross-functional arrangements that speed up the coordination of DERs and help the organization administrator optimize SG operations. Notwithstanding the DERs, there are more appealing and moderate choices that make the present SG frameworks more intelligent than traditional networks. One of the popular options is distribution network reconfiguration (DNR). Despite having been presented a while ago, the concept of DNR is now considered to be a versatile solution in the process of modernizing SG frameworks (<xref ref-type="bibr" rid="B9">Arasteh et al., 2018</xref>). The DNR is characterized as the way toward changing the situation with regularly open/shut switches of the distribution network to arrive at an arrangement that enhances the objective while fulfilling all functional planning constraints of the SG without discarding any SG infrastructure network node(s) (<xref ref-type="bibr" rid="B62">Paterakis et al., 2015</xref>). Many researchers have handled the contingency situation, which arises due to disturbances in SG, with a different mechanism. For example, authors in <xref ref-type="bibr" rid="B28">Gholami et al. (2016)</xref> proposed using fuel in plug-in electric vehicles (PEV) as an alternative resource to combat partial blackout situations. Due to the important part that Distributed Generation (DG) and energy storage systems (ESS) play in the power system, many studies have been conducted to find ways to include these DERs in the SG framework under different circumstances. Authors in <xref ref-type="bibr" rid="B58">Nikkhah and Rabiee (2018)</xref> proposed a constraint on voltage stability for effectively managing wind power as an alternate energy resource. Further in another work <xref ref-type="bibr" rid="B63">Pilz et al. (2020)</xref>, authors have investigated the impact of false data injection attacks on smart grids and designed a security game to help utility companies choose the best strategies. They finally stated that the security game can help utility companies choose the most appropriate monitoring and defense strategies so that false data injection attacks have only a limited, if any, impact on smart energy scheduling. The taxonomy of existing research for the Study of Resilience with Respect to DERs and DNR is presented in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Taxonomy of previous research for the study of resilience with respect to DERs and DNR.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="center">References number</th>
<th colspan="3" align="center">DERs consideration</th>
<th colspan="3" align="center">DERs allocation</th>
<th rowspan="2" align="center">DR</th>
<th rowspan="2" align="center">Contingency</th>
</tr>
<tr>
<th align="center">DG</th>
<th align="center">ESS</th>
<th align="center">PEV</th>
<th align="center">DG</th>
<th align="center">ESS</th>
<th align="center">PEV</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">
<xref ref-type="bibr" rid="B4">Ahmadi et al. (2019)</xref>
</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B59">Nikkhah et al. (2020)</xref>
</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">No</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B33">Home-Ortiz and Mantovani (2020a)</xref>
</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">No</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B27">Gao et al. (2020)</xref>
</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">Yes</td>
<td align="center">No</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B34">Home-Ortiz and Mantovani (2020b)</xref>
</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">Yes</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B31">Han et al. (2020)</xref>
</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">No</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B57">Nikkhah et al. (2021)</xref>
</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">No</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B57">Nikkhah et al. (2021)</xref>
</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">No</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B28">Gholami et al. (2016)</xref>
</td>
<td align="center">Yes</td>
<td align="center">Yes</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">Yes</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B56">Nick et al. (2017)</xref>
</td>
<td align="center">Yes</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B58">Nikkhah and Rabiee (2018)</xref>
</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B10">Awad et al. (2015)</xref>
</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">yes</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B88">Vahidinasab (2014)</xref>
</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B64">Rabiee et al. (2018)</xref>
</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B21">Ding et al. (2017)</xref>
</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B41">Lin and Bie (2018)</xref>
</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">Yes</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B68">Sharifi et al. (2017)</xref>
</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">Yes</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B3">Aghaei et al. (2016)</xref>
</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">Yes</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2-3">
<title>2.3 Research gap and motivation</title>
<p>Price attacks and energy theft or its parameter (voltage, current, and phase angle) manipulation are the two main types of cyberattacks that try to mess up SG-related DR strategies. For instance, attackers can mess up power distribution systems by posting fake energy prices that are less than the real ones through the Internet or social networks (<xref ref-type="bibr" rid="B81">Tang et al., 2019</xref>; <xref ref-type="bibr" rid="B80">Tang et al., 2018</xref>; <xref ref-type="bibr" rid="B79">Tang et al., 2019b</xref>). People who get false information about low electricity prices will probably use more as a smart reaction to the chance, which will likely cause a sudden (partial) load increase in the power system. Then, the quick rise in demand may lead to a peak load or even an overload on the power grid. Energy theft attacks are another type of attack. In these attacks, one or more customers in the power system are the thieves who try to make money by changing the data sent to the utility companies about either generation or usage (<xref ref-type="bibr" rid="B7">Amin et al., 2015</xref>; <xref ref-type="bibr" rid="B23">Esmalifalak et al., 2014</xref>). Attacks like these could make the people who do them money, but the energy companies would lose money. We look at a new kind of framework that combines false pricing attacks and energy parameter manipulation attacks in the context of SG security and reliability.</p>
</sec>
</sec>
<sec id="s3">
<title>3 Attack resilient smart grid reference architecture</title>
<p>For the power grid to work reliably, there needs to be a full and all-encompassing cybersecurity framework that includes attack prevention, attribution (forensics), detection, prevention, restoration, and resilience for the smart grid. This framework needs to cover the physical, application, information, and infrastructure domains. Computerized reasoning and AI-based intrusion avoidance and recognition frameworks are the best strategies for cyber event identification, classification, and lessening its effect in SG. These arrangements fabricate a keen, adaptable, secure, resilient, and versatile cyber-physical smart grid infrastructure (<xref ref-type="bibr" rid="B91">Zeadally et al., 2020</xref>). The existing research summary for the architecture attributed to the automatic protection of SG is presented in <xref ref-type="table" rid="T2">Table 2</xref>. <xref ref-type="table" rid="T2">Table 2</xref> encapsulates the coverage of different aspects like fault detection, network reconfiguration, demand response, stability and robustness, and regulatory policy which are integral constituents of the SG architecture.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Previous Research for the Architecture attributed to Automatic Protection of Smart Grid.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">References number</th>
<th align="center">Fault detection</th>
<th align="center">Network Reconfiguration</th>
<th align="center">DR</th>
<th align="center">Stability and robustness</th>
<th align="center">Regulatory policy for RE</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">
<xref ref-type="bibr" rid="B13">Bhattarai et al. (2015)</xref>
</td>
<td align="center">Y</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">N</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B30">Habib et al. (2017)</xref>
</td>
<td align="center">Y</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">N</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B69">Shih et al. (2017)</xref>
</td>
<td align="center">Y</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">N</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B50">Momesso et al. (2020)</xref>
</td>
<td align="center">Y</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">N</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B45">Ma et al. (2018)</xref>
</td>
<td align="center">Y</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">N</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B40">Liao et al. (2019)</xref>
</td>
<td align="center">Y</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">N</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B85">Tummasit et al. (2015)</xref>
</td>
<td align="center">Y</td>
<td align="center">Y</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">N</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B67">Sampath Kumar et al. (2018)</xref>
</td>
<td align="center">N</td>
<td align="center">Y</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">N</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B53">Muda and Jena (2017)</xref>
</td>
<td align="center">Y</td>
<td align="center">Y</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">Y</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B46">Mahat et al. (2011)</xref>
</td>
<td align="center">Y</td>
<td align="center">Y</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">Y</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B35">Ibrahim et al. (2016)</xref>
</td>
<td align="center">Y</td>
<td align="center">Y</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">N</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B55">Nascimento et al. (2020)</xref>
</td>
<td align="center">Y</td>
<td align="center">Y</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">N</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B61">Papaspiliotopoulos et al. (2015)</xref>
</td>
<td align="center">Y</td>
<td align="center">Y</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">N</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B84">Tielens and Van Hertem (2016)</xref>
</td>
<td align="center">N</td>
<td align="center">Y</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">Y</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B8">Arani and El-Saadany (2012)</xref>
</td>
<td align="center">N</td>
<td align="center">Y</td>
<td align="center">Y</td>
<td align="center">N</td>
<td align="center">Y</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B5">Alipoor et al. (2014)</xref>
</td>
<td align="center">N</td>
<td align="center">Y</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">Y</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B75">Soni et al. (2013)</xref>
</td>
<td align="center">N</td>
<td align="center">Y</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">Y</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B93">Zhang and Chi (2015)</xref>
</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">Y</td>
<td align="center">Y</td>
<td align="center">N</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B19">Cohenpb and Charles (1985)</xref>
</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">Y</td>
<td align="center">N</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B6">Allesina and Tang (2012)</xref>
</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">Y</td>
<td align="center">Y</td>
<td align="center">N</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B29">Gribble (2001)</xref>
</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">Y</td>
<td align="center">Y</td>
<td align="center">N</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B43">Long et al. (2017)</xref>
</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">Y</td>
<td align="center">Y</td>
<td align="center">N</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B51">Morstyn et al. (2018)</xref>
</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">Y</td>
<td align="center">Y</td>
<td align="center">N</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B42">Liu et al. (2017)</xref>
</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">Y</td>
<td align="center">Y</td>
<td align="center">N</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B38">Korjenic and Bednar (2011)</xref>
</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">Y</td>
<td align="center">Y</td>
<td align="center">N</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B78">Szulecki et al. (2015)</xref>
</td>
<td align="center">N</td>
<td align="center">N</td>
<td align="center">Y</td>
<td align="center">Y</td>
<td align="center">N</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Through this work, we proposed a resilient framework, considering the microgrid concept as the backbone of the SG infrastructure. A microgrid is a limited-scale and self-dependent power distribution framework. It comprises RES and ESS and is equipped with facilitated control techniques. Loads inside a microgrid can be upheld by its neighborhood distributed generators persistently, which facilitates the MG to be detached from its upstream or parent node microgrid during blackout events or contingencies (<xref ref-type="bibr" rid="B92">Zhang et al., 2019</xref>; <xref ref-type="bibr" rid="B89">Wang et al., 2015</xref>). These features contribute to maximizing the resilience of SG. A resilient SG framework must be equipped to withstand, expect, and react to extreme or unprecedented events (<xref ref-type="bibr" rid="B90">Wang and Wang, 2015</xref>). Though the self-adequate microgrid offers several benefits to SG, more emphasis must be placed on analyzing resilience from a DR and cybersecurity perspective. The proposed framework as presented in <xref ref-type="fig" rid="F5">Figure 5</xref> represents two components: DR optimization and Cyber event handling and both are explored through a data-driven approach.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Attack resilient smart grid architecture.</p>
</caption>
<graphic xlink:href="fenrg-12-1494164-g005.tif"/>
</fig>
<p>The proposed framework&#x2019;s operational components are as:<list list-type="simple">
<list-item>
<p>1) Data Anomaly is detected and SG operational strategy component (having Cyber Event Detection and Demand Response management) is invoked.</p>
</list-item>
<list-item>
<p>2) The demand Response module is based on effective and accurate electrical load forecasting.</p>
</list-item>
<list-item>
<p>3) Based on the electrical load forecasting, the profit (DR) optimization is achieved for all stakeholders (producer and consumer). Our Previous work (<xref ref-type="bibr" rid="B74">Sinha et al., 2021</xref>) and (<xref ref-type="bibr" rid="B32">Holderbaum et al., 2023</xref>) supports the electrical load forecasting component with detailed experimentation and validation of the proposed DR mechanism.</p>
</list-item>
<list-item>
<p>4) Cyber Event detection is mainly for power fault detection and classification followed by FDI attach detection mechanism.</p>
</list-item>
<list-item>
<p>5) As part of our previous research work, we have done the power fault detection and classification (<xref ref-type="bibr" rid="B73">Sinha et al., 2022</xref>).</p>
</list-item>
<list-item>
<p>6) For the FDI attack detection component, we proposed a Reinforcement Learning-based detection mechanism.</p>
</list-item>
<list-item>
<p>7) Finally, the last component of the proposed framework is supported by our previous work on smart grid restoration mechanism (<xref ref-type="bibr" rid="B72">Sinha et al., 2020</xref>).</p>
</list-item>
</list>
</p>
<p>The next two subsections discuss these two aspects in detail.</p>
<sec id="s3-1">
<title>3.1 Resilience for smart grid demand response</title>
<p>Because of the techno-monetary problems of the extension of existing distribution infrastructure, DERs could be a successful way for electricity delivery to customers with minimized active power loss and load shedding. However, conventional Distributed Network Reconfiguration (DNR) models neglect to adjust to the imperatives and constraints introduced by new SG network advances. Considering the reasons mentioned above, the adaptation of an extensive coordinated model in which an optimal activity model for DR is vital, which is more likely to bring in the resilient operation of the grid infrastructure (<xref ref-type="table" rid="T1">Table 1</xref>). In the proposed framework, as presented in <xref ref-type="fig" rid="F5">Figure 5</xref>, the resilience aspect with context to DR has been divided into two parts: (i) efficient and accurate load forecasting and (ii) optimization of profits among multiple stakeholders for distributed microgrid infrastructure. A detailed explanation of both aspects is as follows:</p>
<sec id="s3-1-1">
<title>3.1.1 Efficient and accurate load forecasting</title>
<p>In our previous work, VAR-CNN-LSTM (<xref ref-type="bibr" rid="B74">Sinha et al., 2021</xref>), and (<xref ref-type="bibr" rid="B32">Holderbaum et al., 2023</xref>) we proposed a model based on Deep Learning (DL) technique to accurately forecast the next 6-hour electrical load. In load forecasting, historical data is considered time series data. It has linear and non-linear components (<xref ref-type="disp-formula" rid="e1">Equation 1</xref>).<disp-formula id="e1">
<mml:math id="m1">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>where <inline-formula id="inf1">
<mml:math id="m2">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is a linear component at time t, <inline-formula id="inf2">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is a component which is a non-linear component at time t and <inline-formula id="inf3">
<mml:math id="m4">
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the error component. A hybrid model called VAR-CNN-LSTM is proposed in this work. To handle the linear component the Vector Auto Regression (VAR) is used. The mathematical notation of the time series with A typical Auto Regression with order &#x2018;p&#x2019; can be formulated as (<xref ref-type="disp-formula" rid="e2">Equation 2</xref>).<disp-formula id="e2">
<mml:math id="m5">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b5;</mml:mi>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>where <inline-formula id="inf4">
<mml:math id="m6">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is a constant denoting the intercept, <inline-formula id="inf5">
<mml:math id="m7">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are lag coefficients. After making time series data stationary, the VAR model will do the forecasting task and the residual of this is fed as an input to the deep learning part that is CNN-LSTM. The resultant vector from <inline-formula id="inf6">
<mml:math id="m8">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> convolutional layer is formulated as (<xref ref-type="disp-formula" rid="e3">Equation 3</xref>).<disp-formula id="e3">
<mml:math id="m9">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:msubsup>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msubsup>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>m</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
<inline-formula id="inf7">
<mml:math id="m10">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> represents bias for <inline-formula id="inf8">
<mml:math id="m11">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> feature map, <inline-formula id="inf9">
<mml:math id="m12">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is calculated by input <inline-formula id="inf10">
<mml:math id="m13">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> from previous layer, <inline-formula id="inf11">
<mml:math id="m14">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes the Rectified Linear Unit (ReLU) (<xref ref-type="bibr" rid="B54">Nair and Hinton, 2010</xref>) like activation function and w is the kernel. After the convolutional operation, the LSTM is used at a lower layer as it stores the temporal information well in advance from the features extracted by CNN layer.</p>
</sec>
<sec id="s3-1-2">
<title>3.1.2 Load profiling at microgrid level</title>
<p>DR scenarios for power distribution are getting attention as energy demand continues to grow. Their importance is set to grow consistently throughout the years before the Smart Grid (SG) foundation. DR programs attempt to support prosumers to use uninterrupted supply and decrease their consumption usage during peak hours, which would eventually support microgrid administrator changing of DR and draw profit by selling the amount of generated power to the SG. Though various research works think of utilizing DR systems, most of them revolve around a model based on single-agent electricity costs as a variable independent of climate. However, we recognize an urgency to analyze and support learning toward working with a multi-agent model that can enhance the DR process when power prices are administered through their respective demands. Our methodology is centered around utilizing price as a sign that will influence the adjustment of demand and subsequently optimize the DR reaction.</p>
<p>We suggested a way to use the Asynchronous Advantage Actor-Critic model to create the agent and a setting that uses VAR-CNN-LSTM (from our earlier work) to mimic the real-life situation (<xref ref-type="bibr" rid="B48">Mnih et al., 2016</xref>). In our A3C model, we have a master agent who is responsible for the decision-making based on the current state of the environment, and we have worker agents whose sole responsibility is to explore and update both policy and value networks asynchronously, which are common to all worker agents. The list of symbols used for the A3C algorithm is presented in <xref ref-type="table" rid="T3">Table 3</xref>.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>List of symbols used for A3C algorithm.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Symbol</th>
<th align="left">Meaning</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">
<inline-formula id="inf12">
<mml:math id="m15">
<mml:mrow>
<mml:mi mathvariant="normal">S</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">S</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">States</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf13">
<mml:math id="m16">
<mml:mrow>
<mml:mi mathvariant="normal">R</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">R</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Rewards</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf14">
<mml:math id="m17">
<mml:mrow>
<mml:mi mathvariant="normal">A</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">A</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Actions</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf15">
<mml:math id="m18">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">State, action, and reward at time step <inline-formula id="inf16">
<mml:math id="m19">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> of one trajectory</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf17">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mspace width="1em"/>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Return; or discounted future reward; <inline-formula id="inf18">
<mml:math id="m21">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x221e;</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msup>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf19">
<mml:math id="m22">
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
<mml:mspace width="1em"/>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Discount factor; penalty to uncertainty of future rewards; <inline-formula id="inf20">
<mml:math id="m23">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>&#x3c;</mml:mo>
<mml:mi>&#x3b3;</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf21">
<mml:math id="m24">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo stretchy="false">&#x2223;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Transition probability of getting to the next state <inline-formula id="inf22">
<mml:math id="m25">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> from the current state <inline-formula id="inf23">
<mml:math id="m26">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> with action <inline-formula id="inf24">
<mml:math id="m27">
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and reward <inline-formula id="inf25">
<mml:math id="m28">
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf26">
<mml:math id="m29">
<mml:mrow>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mspace width="1em"/>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">State-value function measures the expected return of state <inline-formula id="inf27">
<mml:math id="m30">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>.</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is a value function parameterized by <inline-formula id="inf28">
<mml:math id="m31">
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf29">
<mml:math id="m32">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the parameter to the value function</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf30">
<mml:math id="m33">
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Deterministic policy; we can also label this as <inline-formula id="inf31">
<mml:math id="m34">
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, but using a different letter gives <inline-formula id="inf32">
<mml:math id="m35">
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mspace width="1em"/>
</mml:mrow>
</mml:math>
</inline-formula> better distinction so that we can easily tell when the policy is stochastic or deterministic without further explanation. Either <inline-formula id="inf33">
<mml:math id="m36">
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> or <inline-formula id="inf34">
<mml:math id="m37">
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is what a reinforcement learning algorithm aims to learn</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf35">
<mml:math id="m38">
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mo stretchy="false">&#x2223;</mml:mo>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mspace width="1em"/>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Stochastic policy (agent behavior strategy); <inline-formula id="inf36">
<mml:math id="m39">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>.</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is a policy parameterized by <inline-formula id="inf37">
<mml:math id="m40">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf38">
<mml:math id="m41">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mspace width="1em"/>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Advantage function, <inline-formula id="inf39">
<mml:math id="m42">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>; it can be considered as another version of <inline-formula id="inf40">
<mml:math id="m43">
<mml:mrow>
<mml:mi mathvariant="normal">Q</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-value with lower variance by taking the state-value off as the baseline</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf41">
<mml:math id="m44">
<mml:mrow>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mspace width="1em"/>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Action-value function is similar to <inline-formula id="inf42">
<mml:math id="m45">
<mml:mrow>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, but it assesses the expected return of a pair of state and action <inline-formula id="inf43">
<mml:math id="m46">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>Q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>.</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is a action value function parameterized by <inline-formula id="inf44">
<mml:math id="m47">
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf45">
<mml:math id="m48">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mspace width="1em"/>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">The value of state <inline-formula id="inf46">
<mml:math id="m49">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> when we follow a policy <inline-formula id="inf47">
<mml:math id="m50">
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
<mml:mo>;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="double-struck">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x2223;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf48">
<mml:math id="m51">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>Q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mspace width="1em"/>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Similar to <inline-formula id="inf49">
<mml:math id="m52">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>.</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, the value of (state, action) pair when we follow a policy <inline-formula id="inf50">
<mml:math id="m53">
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula id="inf51">
<mml:math id="m54">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>Q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="double-struck">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x2223;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf52">
<mml:math id="m55">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">varies from state to state and it&#x2019;s maximum value is <inline-formula id="inf53">
<mml:math id="m56">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">max</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf54">
<mml:math id="m57">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">parameter to the policy</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Working of Worker network: Worker agents, as in <xref ref-type="statement" rid="Algorithm_3">Algorithm 3</xref> and <xref ref-type="statement" rid="Algorithm_4">4</xref>, are created by the master agent that is responsible for the exploration and updation of the policy and value networks. The work of worker agents can be divided into the trajectory calculation and updation of the networks.</p>
<sec id="s3-1-2-1">
<title>3.1.2.1 Calculation of trajectory</title>
<p>A trajectory is a path that the agent takes through a state, action and reward space. The length of the trajectory can vary and be set. Consider T to be the trajectory length that is set. It is assumed that every worker agent has a copy of the current state of the environment upon which they explore.<list list-type="simple">
<list-item>
<p>(1) &#x2009;First, the agent observes the current state of the environment at a given time t, <inline-formula id="inf55">
<mml:math id="m58">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and this is given to the policy network to generate a probability distribution <inline-formula id="inf56">
<mml:math id="m59">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x2223;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>(2) We create a categorical probability distribution with respect to the probability distribution function generated by the policy network that helps in sampling random action.</p>
</list-item>
<list-item>
<p>(3) Upon taking an action <inline-formula id="inf57">
<mml:math id="m60">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, the agent observes the next state <inline-formula id="inf58">
<mml:math id="m61">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and reward <inline-formula id="inf59">
<mml:math id="m62">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>(4) The agent stores (<inline-formula id="inf60">
<mml:math id="m63">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>;<inline-formula id="inf61">
<mml:math id="m64">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula id="inf62">
<mml:math id="m65">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) tuple and repeats the above steps till trajectory length T.</p>
</list-item>
</list>
</p>
<p>&#x2009;&#x2009;&#x2009;&#x2009;&#x2009;This process generates a trajectory for each worker agent, which is different for every agent because of the random actions chosen to explore the environment.</p>
</sec>
<sec id="s3-1-2-2">
<title>3.1.2.2 Updating policy and value networks</title>
<p>Before updating the policy and value networks, the worker agents calculate each tuple&#x2019;s advantage value, each state&#x2019;s target value, and the loss value of policy and value networks for considering the entire trajectory. The advantage value is calculated for each tuple present in the trajectory by using the n-step method for every tuple in the trajectory (<xref ref-type="disp-formula" rid="e4">Equation 4</xref>).<disp-formula id="e4">
<mml:math id="m66">
<mml:mrow>
<mml:mtable class="aligned">
<mml:mtr>
<mml:mtd columnalign="right">
<mml:mi>A</mml:mi>
<mml:mi>d</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x2223;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msup>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mi>V</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x2223;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>V</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x2223;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right">
<mml:mo>&#x2200;</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
</p>
<p>Due to the iterative nature of our process, it is not feasible to utilize the cumulative rewards R(t) at each time step. In order to proceed, it is important to develop a Critic model that can effectively estimate the value function. The agent calculates the total reward <inline-formula id="inf63">
<mml:math id="m67">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> which can be viewed as a sum of flat partial returns as (<xref ref-type="disp-formula" rid="e5">Equation 5</xref>).<disp-formula id="e5">
<mml:math id="m68">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x221e;</mml:mi>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msup>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
</p>
<p>For any two values within the interval <inline-formula id="inf64">
<mml:math id="m69">
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
<mml:mspace width="0.3333em"/>
<mml:mo>&#x2208;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>[0, 1), we can conceptualize the return <inline-formula id="inf65">
<mml:math id="m70">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> as having partial termination in one step, resulting in a degree of (1 - <inline-formula id="inf66">
<mml:math id="m71">
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>) and yielding only the first reward, <inline-formula id="inf67">
<mml:math id="m72">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. Additionally, it can be seen as partially terminating after two steps, with a degree of (1 - <inline-formula id="inf68">
<mml:math id="m73">
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>) &#x2a; <inline-formula id="inf69">
<mml:math id="m74">
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, resulting in a return of <inline-formula id="inf70">
<mml:math id="m75">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and so forth. Finally, before updating the policy and value networks which are represented by <inline-formula id="inf71">
<mml:math id="m76">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>A</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf72">
<mml:math id="m77">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>C</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, we calculate the loss of both policy and value networks.</p>
<p>&#x2009;&#x2009;&#x2009;&#x2009;&#x2009;Loss of policy network is calculated by the given (<xref ref-type="disp-formula" rid="e6">Equation 6</xref>).<disp-formula id="e6">
<mml:math id="m78">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>V</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
</p>
<p>The loss of value network is calculated by the given (<xref ref-type="disp-formula" rid="e7">Equation 7</xref>).<disp-formula id="e7">
<mml:math id="m79">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mi>d</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mi>log</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x2223;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>where, <inline-formula id="inf73">
<mml:math id="m80">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the advantage function and <inline-formula id="inf74">
<mml:math id="m81">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x2223;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> is the probability distribution of an action given a state at time <inline-formula id="inf75">
<mml:math id="m82">
<mml:mrow>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> given by policy network.</p>
<p>The policy and value networks are updated in the following way, considering <inline-formula id="inf76">
<mml:math id="m83">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the learning rate of the policy network (actor), <inline-formula id="inf77">
<mml:math id="m84">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the learning rate of value network (critic). Policy network (<xref ref-type="disp-formula" rid="e8">Equation 8</xref>).<disp-formula id="e8">
<mml:math id="m85">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>&#x2207;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">policy</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>
</p>
<p>Value network or Critic network (<xref ref-type="disp-formula" rid="e9">Equation 9</xref>).<disp-formula id="e9">
<mml:math id="m86">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>&#x2207;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">value</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>After the exploration is done by the worker agents and the networks are updated, the master agent based on the current environment state takes the best suitable action to maximize the overall reward.</p>
</sec>
<sec id="s3-1-2-3">
<title>3.1.2.3 Action Space</title>
<p>The steps of the action function are defined as:<list list-type="simple">
<list-item>
<p>(i) Verify that the action is legal.</p>
</list-item>
<list-item>
<p>(ii) Send the history of environment states and calculate the next state (also including the current environment state) to the LSTM.</p>
</list-item>
<list-item>
<p>(iii) Compute the new price based on the effect of the action.</p>
</list-item>
<list-item>
<p>(iv) Set the just-calculated price as the price of the next state (the new price).</p>
</list-item>
<list-item>
<p>(v) Based on the demand and supply values of the <italic>next state</italic> and the new price, compute the non-normalized reward.</p>
</list-item>
<list-item>
<p>(vi) In the historical record of environment states, add the next state to it.</p>
</list-item>
<list-item>
<p>(vii) Return the value of the non-normalized reward and the next state to the agent.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s3-1-2-4">
<title>3.1.2.4 Reward</title>
<p>Based on <xref ref-type="statement" rid="Algorithm_2">Algorithm 2</xref>, the reward function is formulated with the following goals as:<list list-type="simple">
<list-item>
<p>(i) Ensure that the demand is always more than supply in order to ensure that the producer makes a profit instead of paying back to the consumers to consume electricity.</p>
</list-item>
<list-item>
<p>(ii) Ensure that there is always a buffer present for demand. It will ensure that abrupt changes in demand or supply will not impact and decrease the producer&#x2019;s profitability by a huge amount.</p>
</list-item>
<list-item>
<p>(iii) In order to avoid a long-term reduction in demand, make sure that the price of electricity is not extremely high.</p>
</list-item>
</list>
</p>
<p>So, the formula to compute the reward value is as:<disp-formula id="equ1">
<mml:math id="m87">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>d</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mo>&#x2a;</mml:mo>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mo>&#x2a;</mml:mo>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>However, the correction is based on the following points:<list list-type="simple">
<list-item>
<p>
<inline-formula id="inf78">
<mml:math id="m88">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> If <inline-formula id="inf79">
<mml:math id="m89">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is within a certain limit or bounds.</p>
</list-item>
<list-item>
<p>
<inline-formula id="inf80">
<mml:math id="m90">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> If <inline-formula id="inf81">
<mml:math id="m91">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> or <inline-formula id="inf82">
<mml:math id="m92">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is negative.</p>
</list-item>
</list>
</p>
<p>The value of <inline-formula id="inf83">
<mml:math id="m93">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is set to 3 and <inline-formula id="inf84">
<mml:math id="m94">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is set to 2 to satisfy the constraints mentioned above. The variable <inline-formula id="inf85">
<mml:math id="m95">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the non-normalized value, as in <xref ref-type="statement" rid="Algorithm_5">Algorithm 5</xref> of the price the action has given to the environment. For this function, the non-normalized values of demand and supply are used. Since we are using the demand and supply values of the same time step as that of the <inline-formula id="inf86">
<mml:math id="m96">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, we are therefore using the demand and supply values of the next time step, that is, the one that will be returned alongside the reward, to calculate the reward. Here, the <inline-formula id="inf87">
<mml:math id="m97">
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is a factor that is simply used to ensure that the <inline-formula id="inf88">
<mml:math id="m98">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> lies within limits or bounds defined earlier, eventually ensuring no exploding or vanishing price problems. This happens by recalculating and modifying the reward to punish the agent for getting too far out of bounds.</p>
<p>
<statement content-type="algorithm" id="Algorithm_1">
<label>Algorithm 1</label>
<p>REINFORCE.<list list-type="simple">
<list-item>
<p>Initialize the policy parameter <inline-formula id="inf89">
<mml:math id="m99">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> at random</p>
</list-item>
<list-item>
<p>Generate one trajectory on policy</p>
</list-item>
<list-item>
<p>&#x2003;<inline-formula id="inf90">
<mml:math id="m100">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>;</p>
</list-item>
<list-item>
<p>
<bold>for</bold> <inline-formula id="inf91">
<mml:math id="m101">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> <italic>to</italic> <inline-formula id="inf92">
<mml:math id="m102">
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> <bold>do</bold>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Estimate the return G</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Update policy parameters:</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;<inline-formula id="inf93">
<mml:math id="m103">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>&#x2207;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>log</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>
<bold>end</bold>
</p>
</list-item>
</list>
</p>
</statement>
</p>
<p>
<statement content-type="algorithm" id="Algorithm_2">
<label>Algorithm 2</label>
<p>Reward Function.<list list-type="simple">
<list-item>
<p>Initialize the hyperparameters <inline-formula id="inf94">
<mml:math id="m104">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>x</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf95">
<mml:math id="m105">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>correction <inline-formula id="inf96">
<mml:math id="m106">
<mml:mrow>
<mml:mo>&#x2190;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>if <inline-formula id="inf97">
<mml:math id="m107">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> or</p>
</list-item>
<list-item>
<p>&#x2003;<inline-formula id="inf98">
<mml:math id="m108">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mi>m</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> or</p>
</list-item>
<list-item>
<p>&#x2003;<inline-formula id="inf99">
<mml:math id="m109">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#x3e;</mml:mo>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>x</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>:</p>
</list-item>
<list-item>
<p>correction &#x3d; 0-abs (correction)</p>
</list-item>
<list-item>
<p>reward &#x2190; (mod(<italic>demand-supply</italic>)<sup>3</sup>)&#x2217;(<italic>abs</italic>(<italic>newPrice</italic>
<sup>2</sup>))&#x2217;<italic>correction</italic>
</p>
</list-item>
<list-item>
<p>profit <inline-formula id="inf101">
<mml:math id="m111">
<mml:mrow>
<mml:mo>&#x2190;</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2217;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>return reward</p>
</list-item>
</list>
</p>
</statement>
</p>
<p>
<statement content-type="algorithm" id="Algorithm_3">
<label>Algorithm 3</label>
<p>Asynchronous Advantage Actor-Critic (A3C) Offline &#x2b; Online (Episodic).<list list-type="simple">
<list-item>
<p>Global parameters:-<inline-formula id="inf102">
<mml:math id="m112">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, w</p>
</list-item>
<list-item>
<p>Initialise thread-specific parameters: <inline-formula id="inf103">
<mml:math id="m113">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf104">
<mml:math id="m114">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>Initialize time step <inline-formula id="inf105">
<mml:math id="m115">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>
<bold>while</bold> <inline-formula id="inf106">
<mml:math id="m116">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">max</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> <bold>do</bold>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Reset gradient: <inline-formula id="inf107">
<mml:math id="m117">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf108">
<mml:math id="m118">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>w</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Synchronize thread-specific parameters with global</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;ones: <inline-formula id="inf109">
<mml:math id="m119">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf110">
<mml:math id="m120">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;<inline-formula id="inf111">
<mml:math id="m121">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and sample a starting state <inline-formula id="inf112">
<mml:math id="m122">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="normal">S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;<bold>while</bold> <italic>(st !&#x3d; TERMINAL) and</italic> <inline-formula id="inf113">
<mml:math id="m123">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>start</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>max</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> <bold>do</bold>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;Pick the action <inline-formula id="inf114">
<mml:math id="m124">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and receive a</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;&#x2003;new reward <inline-formula id="inf115">
<mml:math id="m125">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and a new state <inline-formula id="inf116">
<mml:math id="m126">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;Update <inline-formula id="inf117">
<mml:math id="m127">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf118">
<mml:math id="m128">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;<bold>end</bold>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Initialize the variable that holds the return</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;estimation</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;<inline-formula id="inf119">
<mml:math id="m129">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:mtable class="array">
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
</mml:mtd>
<mml:mtd columnalign="right">
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mtext>&#x2009;TERMINAL</mml:mtext>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="left">
<mml:msub>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mtd>
<mml:mtd columnalign="right">
<mml:mspace width="2em"/>
<mml:mtext>otherwise</mml:mtext>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;<bold>for</bold> <inline-formula id="inf120">
<mml:math id="m130">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>start</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> <bold>do</bold>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<inline-formula id="inf121">
<mml:math id="m131">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>&#x3b3;</mml:mi>
<mml:mi>R</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>; here R is a MC measure of <inline-formula id="inf122">
<mml:math id="m132">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;Accumulate gradients w.r.t.:</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;&#x2003;<inline-formula id="inf123">
<mml:math id="m133">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x2207;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>log</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>;</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;Accumulate gradients w.r.t. w&#x2019;:</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;&#x2003;<inline-formula id="inf124">
<mml:math id="m134">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>w</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mi>w</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x2207;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;<bold>end</bold>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Update asynchronously <inline-formula id="inf125">
<mml:math id="m135">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> using <inline-formula id="inf126">
<mml:math id="m136">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and w using <inline-formula id="inf127">
<mml:math id="m137">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>
<bold>end</bold>
</p>
</list-item>
</list>
</p>
</statement>
</p>
<p>
<statement content-type="algorithm" id="Algorithm_4">
<label>Algorithm 4</label>
<p>Asynchronous Advantage Actor-Critic (A3C) Online mode Sliding Window.<list list-type="simple">
<list-item>
<p>Global parameters: -<inline-formula id="inf128">
<mml:math id="m138">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, w</p>
</list-item>
<list-item>
<p>Initialise thread-specific parameters: - <inline-formula id="inf129">
<mml:math id="m139">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf130">
<mml:math id="m140">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>Initialize time step <inline-formula id="inf131">
<mml:math id="m141">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>Initialize deques trajectoryReward, trajectoryState</p>
</list-item>
<list-item>
<p>&#x2003;trajectoryAction</p>
</list-item>
<list-item>
<p>
<bold>while</bold> <italic>(st !&#x3d; TERMINAL) and</italic> <inline-formula id="inf132">
<mml:math id="m142">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>start</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>max</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> <bold>do</bold>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Pick the action <inline-formula id="inf133">
<mml:math id="m143">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and receive a new</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;reward <inline-formula id="inf134">
<mml:math id="m144">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and a new state <inline-formula id="inf135">
<mml:math id="m145">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Update <inline-formula id="inf136">
<mml:math id="m146">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf137">
<mml:math id="m147">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;append state to trajectoryState</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;append action to trajectoryAction</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;append reward to trajectoryReward</p>
</list-item>
<list-item>
<p>
<bold>end</bold>
</p>
</list-item>
<list-item>
<p>
<bold>while</bold> <italic>True</italic> <bold>do</bold>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Reset gradient: <inline-formula id="inf138">
<mml:math id="m148">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf139">
<mml:math id="m149">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>w</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Synchronize thread-specific parameters with global</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;ones: <inline-formula id="inf140">
<mml:math id="m150">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf141">
<mml:math id="m151">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;<inline-formula id="inf142">
<mml:math id="m152">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and sample a starting state st.</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Pick the action <inline-formula id="inf143">
<mml:math id="m153">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and receive a new</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;reward <inline-formula id="inf144">
<mml:math id="m154">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and a new state <inline-formula id="inf145">
<mml:math id="m155">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Update <inline-formula id="inf146">
<mml:math id="m156">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf147">
<mml:math id="m157">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Pop the trajectoryState</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Pop the trajectoryAction</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Pop the trajectoryReward</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Append newState to trajectoryState</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Append newAction to trajectoryAction</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Append newReward to trajectoryReward</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Initialize the variable that holds the return</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;estimation</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;<inline-formula id="inf148">
<mml:math id="m158">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:mtable class="array">
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
</mml:mtd>
<mml:mtd columnalign="right">
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mtext>&#x2009;TERMINAL</mml:mtext>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="left">
<mml:msub>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mtd>
<mml:mtd columnalign="right">
<mml:mtext>otherwise</mml:mtext>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Calculate<inline-formula id="inf149">
<mml:math id="m159">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">predicted</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">target</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and Advantage</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Accumulate gradients w.r.t. :</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<inline-formula id="inf150">
<mml:math id="m160">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x2207;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>log</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2217;</mml:mo>
<mml:mfenced open="(" close="">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>;</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Accumulate gradients w.r.t. w&#x2019;:</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<inline-formula id="inf151">
<mml:math id="m161">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>w</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mi>w</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x2207;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">predicted</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">target</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Update asynchronously <inline-formula id="inf152">
<mml:math id="m162">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> using <inline-formula id="inf153">
<mml:math id="m163">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and w using&#x2003;&#x2003; <inline-formula id="inf154">
<mml:math id="m164">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>
<bold>end</bold>
</p>
</list-item>
</list>
</p>
</statement>
</p>
<p>
<statement content-type="algorithm" id="Algorithm_5">
<label>Algorithm 5</label>
<p>Update Price.<list list-type="simple">
<list-item>
<p>Set hyper-parameters:</p>
</list-item>
<list-item>
<p>&#x2003;<inline-formula id="inf155">
<mml:math id="m165">
<mml:mrow>
<mml:mi>&#x3b6;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>N</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>U</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>B</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>B</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;<bold>for</bold>&#x2003;<inline-formula id="inf156">
<mml:math id="m166">
<mml:mrow>
<mml:mo>&#x2200;</mml:mo>
<mml:mtext>timestep&#x2009;</mml:mtext>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>&#x2003;<bold>do</bold>
</p>
</list-item>
<list-item>
<p>&#x2003;<inline-formula id="inf157">
<mml:math id="m167">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>N</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;<inline-formula id="inf158">
<mml:math id="m168">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>x</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>U</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>B</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>B</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>/</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;<inline-formula id="inf159">
<mml:math id="m169">
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>F</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>x</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>g</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>/</mml:mo>
<mml:mi>&#x3b6;</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>/</mml:mo>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>N</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>&#x2003;<inline-formula id="inf160">
<mml:math id="m170">
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>N</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
<mml:mo>/</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;<inline-formula id="inf161">
<mml:math id="m171">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>F</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mo>&#x2217;</mml:mo>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b6;</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;<bold>end</bold>
</p>
</list-item>
</list>
</p>
</statement>
</p>
</sec>
</sec>
</sec>
<sec id="s3-2">
<title>3.2 Resilience for smart grid cyber security</title>
<p>The second aspect of the proposed resilient framework (<xref ref-type="fig" rid="F5">Figure 5</xref>) is cyber event handling with an emphasis on FDI attacks. For this, the work proposed a deep learning-based algorithm to detect the ongoing false data injection attack (FDI). The proposed method is divided into six sections:</p>
<sec id="s3-2-1">
<title>3.2.1 Simulating cyber attacks</title>
<p>We simulate the attack by dividing the simulation into episodes. In each episode that spans 500 timesteps, the attack starts at a random time step (t). We train our reinforcement learning model on several episodes and use a predetermined reward system to compute the reward and take the appropriate action.</p>
</sec>
<sec id="s3-2-2">
<title>3.2.2 Predict agent action</title>
<p>In each episode, at each step, the neural network model receives the state value and predicts two values: the estimated reward for each action (stop or continue) for that given state. Using this prediction, we take action with the maximum reward and proceed to the next state. The reward system ensures that the model is punished for taking the wrong actions at the right time.</p>
</sec>
<sec id="s3-2-3">
<title>3.2.3 Goal of the reinforcement learning agent</title>
<p>We have two possible states that our system can be in:<list list-type="simple">
<list-item>
<p>1) Normal functioning <inline-formula id="inf162">
<mml:math id="m172">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>2) Under FDI Attack <inline-formula id="inf163">
<mml:math id="m173">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
</list>
</p>
<p>We have two possible actions that the RL agent can take:<list list-type="simple">
<list-item>
<p>1) Continue the normal functioning of the grid (Do not stop the simulation) <inline-formula id="inf164">
<mml:math id="m174">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>2) Stop the simulation <inline-formula id="inf165">
<mml:math id="m175">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
</list>Our objective is to create an agent to identify the attacks as soon as they begin (not sooner, not later) in order to avert severe grid damage. We have four different possibilities as a result of our agents&#x2019; actions. They are as follows:<list list-type="simple">
<list-item>
<p>1) The agent terminates the simulation before the attack occurs.</p>
</list-item>
<list-item>
<p>2) The agent terminates the simulation after the attack starts.</p>
</list-item>
<list-item>
<p>3) The agent does not halt the simulation after the attack starts.</p>
</list-item>
<list-item>
<p>4) The agent does not halt the simulation before the attack occurs.</p>
</list-item>
</list>
</p>
<p>In the above four outcomes, only 2 and 4 are desired, whereas actions 1 and 3 are unintended.</p>
</sec>
<sec id="s3-2-4">
<title>3.2.4 Reward system</title>
<p>For each of the above four consequences of our agent&#x2019;s action, we reward it in such a way that we punish the unintended consequences and reward the intended ones. Suppose the current state is defined as <inline-formula id="inf166">
<mml:math id="m176">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and the current action as <inline-formula id="inf167">
<mml:math id="m177">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. We can define a possible reward policy as (<xref ref-type="disp-formula" rid="e10">Equations 10</xref>&#x2013;<xref ref-type="disp-formula" rid="e13">13</xref>).<disp-formula id="e10">
<mml:math id="m178">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>Reward</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mtext>,&#x2009;if&#x2009;</mml:mtext>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mtext>&#x2009;and&#x2009;</mml:mtext>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>
<disp-formula id="e11">
<mml:math id="m179">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>Reward</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mtext>,&#x2009;if&#x2009;</mml:mtext>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mtext>&#x2009;and&#x2009;</mml:mtext>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>
<disp-formula id="e12">
<mml:math id="m180">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>Reward</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mtext>,&#x2009;if&#x2009;</mml:mtext>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mtext>&#x2009;and&#x2009;</mml:mtext>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>
<disp-formula id="e13">
<mml:math id="m181">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>Reward</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mtext>,&#x2009;if&#x2009;</mml:mtext>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(13)</label>
</disp-formula>Where <inline-formula id="inf168">
<mml:math id="m182">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf169">
<mml:math id="m183">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> can be small positive values to ensure positive reward, <inline-formula id="inf170">
<mml:math id="m184">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf171">
<mml:math id="m185">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are constants that we can fine-tune states to improve performance. <inline-formula id="inf172">
<mml:math id="m186">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the state under normal functioning, and <inline-formula id="inf173">
<mml:math id="m187">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the state under FDI attack. At the same time, <inline-formula id="inf174">
<mml:math id="m188">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the action to continue the normal functioning of the grid (do not stop the simulation), and <inline-formula id="inf175">
<mml:math id="m189">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the action denoting to stop the simulation. The start <inline-formula id="inf176">
<mml:math id="m190">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is the timestep when the attack begins. <xref ref-type="disp-formula" rid="e11">Equation 11</xref> is the reward when the agent fails to stop the grid while the attack is happening. In this case, the reward is based on the time elapsed since the attack began <inline-formula id="inf177">
<mml:math id="m191">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. <xref ref-type="disp-formula" rid="e13">Equation 13</xref> is the reward when the agent stops the grid when there is no attack (false positive). We want to punish this consequence, and thus the reward can have a huge negative value when this happens. We use the mean of the noise vector as the reward term at that state to incorporate it into the agent&#x2019;s learning process and impact its decision when a similar observation occurs at a future point in time. Hence, this reward policy should theoretically ensure that our agent learns to avoid unintended actions.</p>
</sec>
<sec id="s3-2-5">
<title>3.2.5 Attack detection learning Algorithm</title>
<p>
<statement content-type="algorithm" id="Algorithm_6">
<label>Algorithm 6</label>
<p>Attack Detection Learning Algorithm.<list list-type="simple">
<list-item>
<p>&#x2003;<bold>Input</bold>&#x2003;&#x3d;&#x2003;model,&#x2003;targetModel,&#x2003;params</p>
</list-item>
<list-item>
<p>&#x2003;<bold>procedure</bold>&#x2003;TRAIN(model,&#x2003;targetModel)</p>
</list-item>
<list-item>
<p>&#x2003;<inline-formula id="inf178">
<mml:math id="m192">
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>s</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mn>5000</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>B</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>f</mml:mi>
<mml:mi>f</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>z</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mn>1000</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>B</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>z</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;Initialize&#x2003;Replay&#x2003;Buffer&#x2003;as&#x2003;an&#x2003;empty&#x2003;list</p>
</list-item>
<list-item>
<p>&#x2003;<bold>While</bold>
<inline-formula id="inf179">
<mml:math id="m193">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mi>t</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> do</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;<inline-formula id="inf180">
<mml:math id="m194">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>s</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mn>500</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;<inline-formula id="inf181">
<mml:math id="m195">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>k</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;<inline-formula id="inf182">
<mml:math id="m196">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
<mml:mo>&#x2190;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>&#x2003;random&#x2003;number&#x2003;between&#x2003;0&#x2003;and&#x2003;150</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;<inline-formula id="inf183">
<mml:math id="m197">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>n</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>g</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>d</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi>I</mml:mi>
<mml:mi>E</mml:mi>
<mml:mi>E</mml:mi>
<mml:mi>E</mml:mi>
<mml:mn>14</mml:mn>
<mml:mi>b</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>k</mml:mi>
<mml:mtext>_</mml:mtext>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;<inline-formula id="inf184">
<mml:math id="m198">
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>m</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;<inline-formula id="inf185">
<mml:math id="m199">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;<bold>for</bold> <italic>i</italic> <inline-formula id="inf186">
<mml:math id="m200">
<mml:mrow>
<mml:mo>&#x2190;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>&#x2003;<italic>0&#x2003;to&#x2003;timesteps</italic>&#x2003;<bold>do</bold>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<bold>if</bold>&#x2003;<italic>i&#x2003;&#x3d;&#x2003;&#x3d;&#x2003;start</italic> <bold>then</bold>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<inline-formula id="inf187">
<mml:math id="m201">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>k</mml:mi>
<mml:mtext>_</mml:mtext>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<inline-formula id="inf188">
<mml:math id="m202">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mtext>_</mml:mtext>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mtext>_</mml:mtext>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
<mml:mtext>_</mml:mtext>
<mml:mi>n</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>g</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>d</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi>I</mml:mi>
<mml:mi>E</mml:mi>
<mml:mi>E</mml:mi>
<mml:mi>E</mml:mi>
<mml:mn>14</mml:mn>
<mml:mi>b</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>k</mml:mi>
<mml:mtext>_</mml:mtext>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<bold>if</bold>&#x2003;<italic>t</italic>&#x2003;<inline-formula id="inf189">
<mml:math id="m203">
<mml:mrow>
<mml:mo>&#x2264;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> <italic>observe</italic> <bold>then</bold>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<inline-formula id="inf190">
<mml:math id="m204">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mtext>_</mml:mtext>
<mml:mi>q</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>m</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mtext>_</mml:mtext>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<inline-formula id="inf191">
<mml:math id="m205">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mtext>_</mml:mtext>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#x2190;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> &#x2003;random&#x2003;number&#x2003;between&#x2003;0</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;and&#x2003;2</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<bold>else</bold>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;&#x5f;&#x5f;</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<inline-formula id="inf192">
<mml:math id="m206">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mtext>_</mml:mtext>
<mml:mi>q</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>m</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mtext>_</mml:mtext>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<inline-formula id="inf193">
<mml:math id="m207">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mtext>_</mml:mtext>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mtext>_</mml:mtext>
<mml:mi>q</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<bold>if</bold>&#x2003;<italic>attack&#x2003;has&#x2003;started&#x2003;and&#x2003;action&#x2003;is&#x2003;0(Stop</italic>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<italic>Simulation)</italic>&#x2003;<bold>then</bold>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;&#x5f;&#x5f;</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<inline-formula id="inf194">
<mml:math id="m208">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>d</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mn>500</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;&#x2003;attack&#x2003;has&#x2003;not&#x2003;started&#x2003;and&#x2003;action&#x2003;is</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;1(Continue&#x2003;Simulation)</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<inline-formula id="inf195">
<mml:math id="m209">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>d</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mn>5</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;&#x2003;attack&#x2003;has&#x2003;not&#x2003;started&#x2003;and&#x2003;action&#x2003;is&#x2003;0(Stop</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;Simulation)</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<inline-formula id="inf196">
<mml:math id="m210">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>d</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>100</mml:mn>
<mml:mo>&#x2217;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>10</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mtext>noise</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;&#x2003;attach&#x2003;has&#x2003;started&#x2003;and&#x2003;action&#x2003;is&#x2003;0(Stop</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;Simulation)</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<inline-formula id="inf197">
<mml:math id="m211">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>d</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2.5</mml:mn>
<mml:mo>&#x2217;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<inline-formula id="inf198">
<mml:math id="m212">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mtext>_</mml:mtext>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<inline-formula id="inf199">
<mml:math id="m213">
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mtext>_</mml:mtext>
<mml:mi>q</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<inline-formula id="inf200">
<mml:math id="m214">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mtext>_</mml:mtext>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<inline-formula id="inf201">
<mml:math id="m215">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
<mml:mtext>_</mml:mtext>
<mml:mi>n</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
<mml:mtext>_</mml:mtext>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
<mml:mtext>_</mml:mtext>
<mml:mi>n</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;<bold>if</bold>&#x2003;<inline-formula id="inf202">
<mml:math id="m216">
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
<mml:mo>&#x3e;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> <italic>and</italic> <inline-formula id="inf203">
<mml:math id="m217">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3e;</mml:mo>
<mml:mi>o</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>&#x2003;<bold>then</bold>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x5f;&#x5f;</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;<inline-formula id="inf204">
<mml:math id="m218">
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>&#x3f5;</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>/</mml:mo>
<mml:mi>t</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<bold>if</bold>&#x2003;<inline-formula id="inf205">
<mml:math id="m219">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>500</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> <italic>or Current Size of Replay Buffer</italic> <inline-formula id="inf206">
<mml:math id="m220">
<mml:mrow>
<mml:mo>&#x3e;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;<italic>buffer</italic>&#x2003;<bold>then</bold>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Remove&#x2003;the&#x2003;oldest&#x2003;entry&#x2003;in&#x2003;the&#x2003;Replay&#x2003;Buffer</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Generate&#x2003;a&#x2003;mini&#x2003;batch&#x2003;from&#x2003;of&#x2003;BatchSize&#x2003;entries</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;from&#x2003;the&#x2003;Replay&#x2003;Buffer</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;<inline-formula id="inf207">
<mml:math id="m221">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>M</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>&#x2003;train&#x2003;the&#x2003;model&#x2003;using&#x2003;X&#x2003;and&#x2003;y&#x2003;generated</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Save&#x2003;the&#x2003;model&#x2003;after&#x2003;every&#x2003;100&#x2003;steps.</p>
</list-item>
</list>
</p>
</statement>
</p>
<p>The steps involved in the training process of the deep reinforcement learning algorithm are mentioned in <xref ref-type="statement" rid="Algorithm_6">Algorithm 6</xref>.</p>
</sec>
<sec id="s3-2-6">
<title>3.2.6 Correlation with NIST framework</title>
<p>
<list list-type="simple">
<list-item>
<p>&#x2022; Identify: In the previous sections, we looked at a few possible attacks on SG systems. False Data Injection (FDI) attack is one of the most simple yet lethal attacks that can be done on smart grid networks. The FDI attack falls under two of the three possible categories of attacks we defined previously (confidentiality of data, integrity of data and commands, availability of information, and electricity). FDI attacks can tamper with the integrity of data and commands. The data that can be tampered with is the critical sensor information essential in making important decisions about electricity production and supply. FDI attacks can also cause problems with the availability of information and electricity. This happens indirectly when the tampered sensor information is used to make predictions or estimations about the amount of power needed to be generated or supplied. Having the wrong estimation can lead to unintended power surges or outages.</p>
</list-item>
<list-item>
<p>&#x2022; Protect: This step involves ensuring that adequate safety and security measures are in place to stop the attack from happening in the first place. In this work, we discussed an additional software barrier running in real-time to protect against the FDI attack. It analyzes the sensor readings at regular intervals and ensures the detection of aberrations in the readings.</p>
</list-item>
<list-item>
<p>&#x2022; Detect: This step involves identifying the attacks as they happen. This is essential since it enables us to respond to the threat and act accordingly. However, as technology advances, it becomes easier for attackers to bypass these detection mechanisms. Hence, having a detection mechanism exclusively for a particular attack makes it much harder for an attacker to bypass it. This work proposes a reinforcement learning-based detection mechanism for the FDI attack in particular. This method constantly tracks the sensor information and utilizes it to estimate the state of the SG system (under attack or normal functioning).</p>
</list-item>
<list-item>
<p>&#x2022; Respond: This is the step where we take the necessary action to deal with the threat of an attack. The proposed framework takes the necessary steps as soon as it notices threats in order to lessen or lessen the severity of the damage that the attackers have caused. There are several ways to respond to an attack, ranging from blacklisting the IP address of the attack source by setting up new firewall rules to stopping the system from running for a few minutes while you respond to the threat. This ensures that the amount of damage caused is minimized. In this work, we proposed a reinforcement learning agent-based technique that takes the sensor readings at any point in time as the state of the environment and takes one of two actions: to stop the system from running further or to continue running. By stopping the system from running, we are buying time to respond to the threat posed by the attack and minimize both the physical and financial damage dealt in the process.</p>
</list-item>
<list-item>
<p>&#x2022; Recover: The recovery phase ensures that all the services that were hindered during the attack are restored to their normal functionality. It also involves setting up necessary security measures to prevent attacks from happening further. Many strategies were proposed to deal with the recovery phase. In this work, we do not deal with the recovery phase of the NIST framework. We suggest possible approaches in the future work section of this work.</p>
</list-item>
</list>
</p>
</sec>
</sec>
</sec>
<sec id="s4">
<title>4 Dataset</title>
<sec id="s4-1">
<title>4.1 For load profiling</title>
<p>For training the reinforcement learning-based agents, we tried to mimic the behavior of the power flow analysis of the smart grid. We have used the publicly available dataset called Independent Electricity System Operator (IESO). The IESO data is the collection of various reports released by Ontario&#x2019;s power grid operators. The report contains supply, demand, tariffs, and other relevant parameters. The dataset has values from the time interval 2010-01-01 to 2019-12-20, with data points taken at an interval of 5 minutes. We mainly required hourly electricity consumption by the consumers and the historical tariff price so we could use it to train our LSTM model to better simulate the future parameters of the smart grid, including demand, supply, and other parameters, based on the changes to the tariffs made by the agent.</p>
<p>While performing the correlation analysis, we removed the column with a negligible correlation with the price and demand value, which resulted in the reduction of the column from 47 to 13. This would make the training more accurate for the LSTM model and efficiently mimic the SG behavior in the agent&#x2019;s actions. Finally, there might be some cases where relevant supply data is unavailable, so we study the latest supply and demand values from IESO and compute the supply column values, keeping in mind that their correlation with other column variables is equal to that of the latest data. This step was essential for the reward computation due to our assumption that in the SG environment, most consumers also behave like prosumers, thus making their demand fluctuate. In this model behavior, the worst situation may arise when the power supply is greater than demand, thus making the agent (producer) pay back to the consumers. This will enhance the importance of the supply value column to make the model appropriate.</p>
<p>In addition, we normalized the dataset to speed up the learning process for the LSTM to simulate the environment and, indirectly, the learning for the policy and critic networks, leading to faster convergence. The environment and the agents all use normalized data, except for the reward function and the logging facilities, the former to give more fine-tuned reward signals to the agent and the latter for debugging purposes.</p>
<p>Dataset normalization is done to make training easier for NN. We use the following formula to normalize the dataset values (<xref ref-type="disp-formula" rid="e14">Equation 14</xref>).<disp-formula id="e14">
<mml:math id="m222">
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>u</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">row,col</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>u</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">row,col</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>m</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>e</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">col</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>x</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>e</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">col</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>m</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>e</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">col</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(14)</label>
</disp-formula>
</p>
</sec>
<sec id="s4-2">
<title>4.2 For cyber threat detection</title>
<p>The investigation and data generation was completed in a phased manner so that the readiness of data utilized in FDI attack detection on an IEEE 9-bus framework and a 14-bus system was carried out in MATLAB Simulink (<xref ref-type="bibr" rid="B22">Documentation, 2020</xref>) and MATPOWER (<xref ref-type="bibr" rid="B94">Zimmerman et al., 2011</xref>). For 9 Bus, we deployed six three-phase V-I measurement parts to recreate PMUs introduced in power frameworks. Likewise, for 14 bus, we deployed. 11 three-phase V-I measurement parts. For each PMU in the 9-bus framework, we recorded 18 distinctive electrical amounts, like the magnitude and those related to current and voltage. Similarly, for 14 bus, 28 such electrical estimations are recorded.</p>
</sec>
</sec>
<sec id="s5">
<title>5 Power system use case</title>
<sec id="s5-1">
<title>5.1 Achieving resilience through demand response</title>
<sec id="s5-1-1">
<title>5.1.1 Load forecasting</title>
<p>The load forecasting is done using model proposed in <xref ref-type="bibr" rid="B74">Sinha et al. (2021)</xref>. The experiment has been carried out in IESO data (I. E. S. O. (IESO)) combined with Canada weather data. The data distribution is presented as in <xref ref-type="fig" rid="F6">Figures 6</xref>, <xref ref-type="fig" rid="F7">7</xref>.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Distribution of the electricity Demand.</p>
</caption>
<graphic xlink:href="fenrg-12-1494164-g006.tif"/>
</fig>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Distribution of the electricity Price.</p>
</caption>
<graphic xlink:href="fenrg-12-1494164-g007.tif"/>
</fig>
<p>The model outcome on the IESO dataset is described in <xref ref-type="fig" rid="F8">Figure 8</xref>. From the result, it is clear that the model has achieved good accuracy and outperforms well in comparison to other existing models like MLP, HMM, and VAR. For better clarity, we have tested the model on all eleven regions of Ontario, and the RMSE score is calculated and presented for all the regions simultaneously.</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Forecast of the electricity Price.</p>
</caption>
<graphic xlink:href="fenrg-12-1494164-g008.tif"/>
</fig>
</sec>
<sec id="s5-1-2">
<title>5.1.2 Load profiling at microgrid level</title>
<p>We have used <xref ref-type="statement" rid="Algorithm_1">Algorithms 1</xref>, <xref ref-type="statement" rid="Algorithm_2">2</xref>, <xref ref-type="statement" rid="Algorithm_3">3</xref>, <xref ref-type="statement" rid="Algorithm_4">4</xref>, and <xref ref-type="statement" rid="Algorithm_5">5</xref> for optimal profit and demand response optimization in the microgrid environment. The online real-time training of the agent as in <xref ref-type="fig" rid="F9">Figures 9</xref>&#x2013;<xref ref-type="fig" rid="F11">11</xref>, and <xref ref-type="fig" rid="F12">Figure 12</xref> elaborates the outcome of the online training of the agent with a method of model updates chosen as episodic (i.e., sliding window approach is not being used here). Instead of it, we adopted a head-start approach-based pre-trained network described earlier. <xref ref-type="fig" rid="F9">Figure 9</xref> depicts that the value of the mean profit gathered by the agent is relatively higher than the dataset profit. This would imply that the agent gradually becomes competent in maximizing profits while keeping the price column&#x2019;s value within acceptable bounds or limits.</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>Profit accumulated plot with the mean, max and min profits values of models and original datasets.</p>
</caption>
<graphic xlink:href="fenrg-12-1494164-g009.tif"/>
</fig>
<fig id="F10" position="float">
<label>FIGURE 10</label>
<caption>
<p>Critic loss curve based on our RL model.</p>
</caption>
<graphic xlink:href="fenrg-12-1494164-g010.tif"/>
</fig>
<fig id="F11" position="float">
<label>FIGURE 11</label>
<caption>
<p>Policy loss curve based on our RL model.</p>
</caption>
<graphic xlink:href="fenrg-12-1494164-g011.tif"/>
</fig>
<fig id="F12" position="float">
<label>FIGURE 12</label>
<caption>
<p>Average advantage.</p>
</caption>
<graphic xlink:href="fenrg-12-1494164-g012.tif"/>
</fig>
<p>
<xref ref-type="fig" rid="F10">Figure 10</xref> shows that the critic loss constantly decreases, implying that the value network can predict the correct value of the states. When compared to offline mode, we can see that after 2000 episodes, the critic loss in online mode is of the order of 0.01, compared to <inline-formula id="inf208">
<mml:math id="m223">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:msup>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>5</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> in offline mode.</p>
<p>
<xref ref-type="fig" rid="F11">Figure 11</xref> shows that in online mode, policy loss becomes almost optimal after 2000 episodes, whereas it takes 5,000 episodes in offline mode for it to become optimal. This implies a better convergence rate to optimal policy in online mode due to the use of pre-trained networks (headstart modifications).</p>
<p>It is clear from <xref ref-type="fig" rid="F12">Figure 12</xref> that the average advantage in online mode is much better than in offline mode. The algorithm also quickly converges (on the input data), implying that in online mode, the agent is taking comparatively better actions than its offline counterpart and is quick to identify the most optimal actions to be taken in a state.</p>
<p>So, it may be inferred that Deep reinforcement learning (DRL) is highly effective for load profiling and demand response optimization because it adapts in real time to changing demand patterns, predicts future needs, and makes sequential decisions to balance load. Unlike traditional methods, DRL can personalize load management for different users, automate demand-side responses, and handle the complexities of renewable energy integration. By continuously learning from real-time data, DRL enables proactive peak shaving, cost reduction, and enhanced grid stability, making the smart grid more resilient, efficient, and responsive.</p>
</sec>
</sec>
<sec id="s5-2">
<title>5.2 Achieving resiliency through multiagent detection mechanism</title>
<p>This section deals with the experimental outcome as achieved using the proposed <xref ref-type="statement" rid="Algorithm_6">Algorithm 6</xref> for the in-progress FDI attack. The experiment was carried out on 3-bus (<xref ref-type="bibr" rid="B1">Abur and Exposito, 2004</xref>), IEEE-9 bus, IEEE-14 bus, and IEEE-30 bus systems (<xref ref-type="fig" rid="F13">Figures 13</xref>, <xref ref-type="fig" rid="F14">14</xref>; <xref ref-type="table" rid="T4">Table 4</xref>). The assessment will be conducted on three exemplary bus grids, in conjunction with several standard IEEE grids, including the IEEE 9 bus, IEEE 14 bus, and IEEE 32 bus systems. The existing system state vector, comprising voltage magnitudes and phase angles, is ascertained through the utilization of State Estimation functions inherent to the PANDAPOWER Python library (<xref ref-type="bibr" rid="B83">Thurner et al., 2018</xref>). Simulation endeavors involve the initialization of network configurations to their default values for the simple 3-bus grid, IEEE 9 bus, IEEE 14 bus, and IEEE 33 bus systems, each of which is subjected to a prescribed number of steps during each episode. The inception of a Fault Detection and Isolation (FDI) attack is introduced at a randomly selected point within an episode and persists for an indeterminate duration. The principal objective entails training our model to promptly terminate the episode upon the commencement of the FDI attack, with temporal precision. To check the convergence of the proposed reinforcement-based learning algorithm, we plot the value of the loss function as the training of the model progresses. The plot is shown in <xref ref-type="fig" rid="F15">Figure 15</xref>.</p>
<fig id="F13" position="float">
<label>FIGURE 13</label>
<caption>
<p>3 bus grid.</p>
</caption>
<graphic xlink:href="fenrg-12-1494164-g013.tif"/>
</fig>
<fig id="F14" position="float">
<label>FIGURE 14</label>
<caption>
<p>IEEE 14 bus grid network.</p>
</caption>
<graphic xlink:href="fenrg-12-1494164-g014.tif"/>
</fig>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Evaluation results of in progress FDI attack using proposed method.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Bus type</th>
<th align="center">Perfect calls percentage</th>
<th align="center">Good calls percentage</th>
<th align="center">Delayed calls rate</th>
<th align="center">False alarms rate</th>
<th align="center">Detection failure percentage</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">3 Bus Grid</td>
<td align="center">81</td>
<td align="center">96</td>
<td align="center">0.72</td>
<td align="center">1.84</td>
<td align="center">0</td>
</tr>
<tr>
<td align="center">IEEE 9 Bus</td>
<td align="center">94</td>
<td align="center">96</td>
<td align="center">0.01</td>
<td align="center">0.50</td>
<td align="center">0</td>
</tr>
<tr>
<td align="center">IEEE 14 Bus</td>
<td align="center">88</td>
<td align="center">99</td>
<td align="center">0.50</td>
<td align="center">0.00</td>
<td align="center">0</td>
</tr>
<tr>
<td align="center">IEEE 33 Bus</td>
<td align="center">94</td>
<td align="center">92</td>
<td align="center">0.00</td>
<td align="center">4.60</td>
<td align="center">0</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F15" position="float">
<label>FIGURE 15</label>
<caption>
<p>Loss curve.</p>
</caption>
<graphic xlink:href="fenrg-12-1494164-g015.tif"/>
</fig>
<p>The evaluation metrics for the proposed RL agent are as follows:<list list-type="simple">
<list-item>
<p>1) Perfect Calls Percentage</p>
</list-item>
<list-item>
<p>2) False Alarm Rate</p>
</list-item>
<list-item>
<p>3) Good Calls Percentage</p>
</list-item>
<list-item>
<p>4) Late Calls rate</p>
</list-item>
<list-item>
<p>5) Detection Failure Percentage</p>
</list-item>
<list-item>
<p>1) Perfect Calls Percentage: The number of attacks that our RL agent can detect as soon as they start. Let the number of episodes where the attack starts time step is the same as the attack detection time step be <inline-formula id="inf209">
<mml:math id="m224">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and the total number of episodes be T. This can be computed by the equation:</p>
</list-item>
</list>
</p>
<p>
<inline-formula id="inf210">
<mml:math id="m225">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>f</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mspace width="0.3333em"/>
<mml:mi>C</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
<mml:mspace width="0.3333em"/>
<mml:mi>P</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2217;</mml:mo>
<mml:mn>100</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
<list list-type="simple">
<list-item>
<p>2) Good Calls Percentage: Let us define good calls as detecting attacks before a certain number of time steps after they start. We can call this threshold the Good Calls Percentage. Let the number of episodes where the attack detection is within time steps after it starts to be <inline-formula id="inf211">
<mml:math id="m226">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and the total number of episodes be T. This can be computed using the equation:</p>
</list-item>
</list>
</p>
<p>
<inline-formula id="inf212">
<mml:math id="m227">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>d</mml:mi>
<mml:mspace width="0.3333em"/>
<mml:mi>C</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
<mml:mspace width="0.3333em"/>
<mml:mi>P</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2217;</mml:mo>
<mml:mn>100</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
<list list-type="simple">
<list-item>
<p>3) Delayed Calls Rate: Let us define Delayed calls as detecting the attack anytime after the attack begins. Let start be the time step when the attack has begun, t be the time step when the agent detects and the total number of episodes be T. For all the episodes that start, this can be computed using the equation:</p>
</list-item>
</list>
</p>
<p>
<inline-formula id="inf213">
<mml:math id="m228">
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>y</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mspace width="0.3333em"/>
<mml:mi>C</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
<mml:mspace width="0.3333em"/>
<mml:mi>R</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
<mml:mfenced open="|" close="|">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula>
<list list-type="simple">
<list-item>
<p>4) False Alarm Rate: False Alarms are the calls that occur before the attack begins. We would want to avoid these as much as possible to avoid disruptions in power supplies. Let start be the time step when the attack has begun, t be the time step when the agent detects it, and the total number of episodes be T. For all the episodes that start, this can be computed using the equation:</p>
</list-item>
</list>
</p>
<p>
<inline-formula id="inf214">
<mml:math id="m229">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mspace width="0.3333em"/>
<mml:mi>A</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>s</mml:mi>
<mml:mspace width="0.3333em"/>
<mml:mi>R</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
<mml:mfenced open="|" close="|">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula>
<list list-type="simple">
<list-item>
<p>5) Detection Failure Percentage: When our agent is unable to detect that the attack occurred by the end of the episode, we call this a detection failure. Let the number of episodes this happens to be called <inline-formula id="inf215">
<mml:math id="m230">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and the total episodes be T. This can be computed using the equation:</p>
</list-item>
</list>
</p>
<p>
<inline-formula id="inf216">
<mml:math id="m231">
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mspace width="0.3333em"/>
<mml:mi>F</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mspace width="0.3333em"/>
<mml:mi>P</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2217;</mml:mo>
<mml:mn>100</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
<p>We have used evaluation metrics to estimate the performance of the proposed RL algorithm for the model trained for simple &#x2212;3 bus, IEEE-9, IEEE-14, and IEEE-33 bus systems. We have experimented using 100 episodes for all IEEE system grids. The threshold for good calls was set to 10 time steps after the attack began. <xref ref-type="table" rid="T4">Table 4</xref> shows the results of the experiment done on an IEEE 9, 14, 30, and 3-bus grid system.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s6">
<title>6 Conclusion</title>
<p>The Smart Grid framework is an emerging innovation that carries many advantages to administrators and users, even though it has a few downsides regarding safety and security, which may impact its deployment in real-time applications. Like other frameworks for critical infrastructure, the advancement of SG modern devices toward an exceptionally associated and distributed model paves a few issues for the reliability and safety of the integrated framework. The proposed framework investigates the reliance of DR on the smart grid and shows how the ideas of DR and cyber security with resilience are intrinsically related. The framework first gives the deep learning model for accurately estimating electrical load and price. Consequently, it proposes an optimized demand response strategy in a multi-micro-grid environment using a modified RL-based A3C algorithm (in offline and online modes). The results show that for DR optimization, online mode converges more quickly than offline mode, implying that in online mode, the agent is taking comparatively better actions than its offline counterpart and is quick to identify the most optimal actions to be taken in a state. Further, the framework explored the intricacies of in-progress cyber attacks, especially FDI attacks. It proposed a reinforcement learning-based algorithm for the same, and the experiment is carried out on IEEE-3, IEEE-9, IEEE-14, and IEEE-33 bus systems. It is shown with the help of a plot that the loss function minimizes as the model&#x2019;s training progresses. The evaluation metrics for the proposed RL agent for the in-progress FDI attack are Perfect Calls Percentage, False Alarm rate, Good Calls Percentage, Late Calls rate, and Detection Failure Percentage. Finally, the framework shows the interdependence of DR and cyber security and proposes a solution for reliable smart grid functioning.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s7">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: <ext-link ext-link-type="uri" xlink:href="https://www.ieso.ca/power-data">https://www.ieso.ca/power-data</ext-link>.</p>
</sec>
<sec sec-type="author-contributions" id="s8">
<title>Author contributions</title>
<p>AS: Conceptualization, Data curation, Formal Analysis, Investigation, Methodology, Software, Validation, Writing&#x2013;original draft, Writing&#x2013;review and editing. RV: Project administration, Supervision, Writing&#x2013;review and editing. FA: Project administration, Supervision, Writing&#x2013;review and editing. WH: Investigation, Methodology, Supervision, Writing&#x2013;original draft, Writing&#x2013;review and editing. OV: Conceptualization, Formal Analysis, Investigation, Methodology, Project administration, Resources, Supervision, Writing&#x2013;review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s9">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. The work is partially funded by the Department of Science and Technology (DST), and C3i-Hub (Indian Institute of Technology, Kanpur), India for the Risk Averse Resilience Framework for Critical Infrastructure Security (RARCIS) project.</p>
</sec>
<ack>
<p>The English language of this manuscript was reviewed and improved using Grammarly, an AI-based writing assistant tool that helps enhance grammar, clarity, and style. (<ext-link ext-link-type="uri" xlink:href="https://app.grammarly.com/">https://app.grammarly.com/</ext-link>).</p>
</ack>
<sec sec-type="COI-statement" id="s10">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Abur</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Exp&#xf3;sito</surname>
<given-names>A. G.</given-names>
</name>
</person-group> (<year>2004</year>). <source>Power system state estimation: theory and implementation</source>. <edition>1st ed</edition>. <publisher-name>CRC press</publisher-name>. <pub-id pub-id-type="doi">10.1201/9780203913673</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Adepu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kandasamy</surname>
<given-names>N. K.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Mathur</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Attacks on smart grid: power supply interruption and malicious power generation</article-title>. <source>Int. J. Inf. Secur.</source> <volume>19</volume>, <fpage>189</fpage>&#x2013;<lpage>211</lpage>. <pub-id pub-id-type="doi">10.1007/s10207-019-00452-z</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Aghaei</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Alizadeh</surname>
<given-names>M.-I.</given-names>
</name>
<name>
<surname>Siano</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Heidari</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Contribution of emergency demand response programs in power system reliability</article-title>. <source>Energy</source> <volume>103</volume>, <fpage>688</fpage>&#x2013;<lpage>696</lpage>. <pub-id pub-id-type="doi">10.1016/j.energy.2016.03.031</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ahmadi</surname>
<given-names>S.-A.</given-names>
</name>
<name>
<surname>Vahidinasab</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Ghazizadeh</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Mehran</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Giaouris</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Taylor</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Co-optimising distribution network adequacy and security by simultaneous utilisation of network reconfiguration and distributed energy resources</article-title>. <source>IET Generation, Transm. and Distribution</source> <volume>13</volume> (<issue>20</issue>), <fpage>4747</fpage>&#x2013;<lpage>4755</lpage>. <pub-id pub-id-type="doi">10.1049/iet-gtd.2019.0824</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alipoor</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Miura</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ise</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Power system stabilization using virtual synchronous generator with alternating moment of inertia</article-title>. <source>IEEE J. Emerg. Sel. Top. power Electron.</source> <volume>3</volume> (<issue>2</issue>), <fpage>451</fpage>&#x2013;<lpage>458</lpage>. <pub-id pub-id-type="doi">10.1109/jestpe.2014.2362530</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Allesina</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Stability criteria for complex ecosystems</article-title>. <source>Nature</source> <volume>483</volume> (<issue>7388</issue>), <fpage>205</fpage>&#x2013;<lpage>208</lpage>. <pub-id pub-id-type="doi">10.1038/nature10832</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Amin</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Schwartz</surname>
<given-names>G. A.</given-names>
</name>
<name>
<surname>Cardenas</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>Sastry</surname>
<given-names>S. S.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Game-theoretic models of electricity theft detection in smart utility networks: providing new capabilities with advanced metering infrastructure</article-title>. <source>IEEE Control Syst. Mag.</source> <volume>35</volume> (<issue>1</issue>), <fpage>66</fpage>&#x2013;<lpage>81</lpage>. <pub-id pub-id-type="doi">10.1109/MCS.2014.2364711</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Arani</surname>
<given-names>M. F. M.</given-names>
</name>
<name>
<surname>El-Saadany</surname>
<given-names>E. F.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Implementing virtual inertia in dfig-based wind power generation</article-title>. <source>IEEE Trans. Power Syst.</source> <volume>28</volume> (<issue>2</issue>), <fpage>1373</fpage>&#x2013;<lpage>1384</lpage>. <pub-id pub-id-type="doi">10.1109/tpwrs.2012.2207972</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Arasteh</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Vahidinasab</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Sepasian</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Aghaei</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Stochastic system of systems architecture for adaptive expansion of smart distribution grids</article-title>. <source>IEEE Trans. Industrial Inf.</source> <volume>15</volume> (<issue>1</issue>), <fpage>377</fpage>&#x2013;<lpage>389</lpage>. <pub-id pub-id-type="doi">10.1109/tii.2018.2808268</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Awad</surname>
<given-names>A. S.</given-names>
</name>
<name>
<surname>El-Fouly</surname>
<given-names>T. H.</given-names>
</name>
<name>
<surname>Salama</surname>
<given-names>M. M.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Optimal ess allocation for benefit maximization in distribution networks</article-title>. <source>IEEE Trans. Smart Grid</source> <volume>8</volume> (<issue>4</issue>), <fpage>1668</fpage>&#x2013;<lpage>1678</lpage>. <pub-id pub-id-type="doi">10.1109/tsg.2015.2499264</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Babar</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Tariq</surname>
<given-names>M. U.</given-names>
</name>
<name>
<surname>Jan</surname>
<given-names>M. A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Secure and resilient demand side management engine using machine learning for iot-enabled smart grid</article-title>. <source>Sustain. Cities Soc.</source> <volume>62</volume>, <fpage>102370</fpage>. <pub-id pub-id-type="doi">10.1016/j.scs.2020.102370</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Balali</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Yunusa-Kaltungo</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Edwards</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A systematic review of passive energy consumption optimisation strategy selection for buildings through multiple criteria decision-making techniques</article-title>. <source>Renew. Sustain. Energy Rev.</source> <volume>171</volume>, <fpage>113013</fpage>. <pub-id pub-id-type="doi">10.1016/j.rser.2022.113013</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Bhattarai</surname>
<given-names>B. P.</given-names>
</name>
<name>
<surname>Bak-Jensen</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Chaudhary</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Pillai</surname>
<given-names>J. R.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>An adaptive overcurrent protection in smart distribution grid</article-title>,&#x201d; in <source>2015 IEEE eindhoven PowerTech</source> (<publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>6</lpage>.</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bohra</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Anvari-Moghaddam</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A comprehensive review on applications of multicriteria decision-making methods in power and energy systems</article-title>. <source>Int. J. Energy Res.</source> <volume>46</volume> (<issue>4</issue>), <fpage>4088</fpage>&#x2013;<lpage>4118</lpage>. <pub-id pub-id-type="doi">10.1002/er.7517</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cao</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Gu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Lou</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Sheng</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Distributed synchronous detection for false data injection attack in cyber-physical microgrids</article-title>. <source>Int. J. Electr. Power and Energy Syst.</source> <volume>137</volume>, <fpage>107788</fpage>. <pub-id pub-id-type="doi">10.1016/j.ijepes.2021.107788</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Qiu</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Resilient distribution system by microgrids formation after natural disasters</article-title>. <source>IEEE Trans. smart grid</source> <volume>7</volume> (<issue>2</issue>), <fpage>958</fpage>&#x2013;<lpage>966</lpage>. <pub-id pub-id-type="doi">10.1109/tsg.2015.2429653</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>A new generation of ai: a review and perspective on machine learning technologies applied to smart energy and electric power systems</article-title>. <source>Int. J. Energy Res.</source> <volume>43</volume> (<issue>6</issue>), <fpage>1928</fpage>&#x2013;<lpage>1973</lpage>. <pub-id pub-id-type="doi">10.1002/er.4333</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Clark</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Zonouz</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Cyber-physical resilience: definition and assessment metric</article-title>. <source>IEEE Trans. Smart Grid</source> <volume>10</volume> (<issue>2</issue>), <fpage>1671</fpage>&#x2013;<lpage>1684</lpage>. <pub-id pub-id-type="doi">10.1109/tsg.2017.2776279</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cohenpb</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Charles</surname>
<given-names>M. N.</given-names>
</name>
</person-group> (<year>1985</year>). <article-title>When will a large complex system be stable?</article-title> <source>J. Theor. Biol.</source> <volume>113</volume>, <fpage>153</fpage>&#x2013;<lpage>156</lpage>.</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cybersecurity</surname>
<given-names>C. I.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Framework for improving critical infrastructure cybersecurity. CSWP, vol. 4162018</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://nvlpubs.nist.gov/nistpubs/CSWP/NIST">https://nvlpubs. nist. gov/nistpubs/CSWP/NIST</ext-link>
</comment>.</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ding</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Bie</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>A resilient microgrid formation strategy for load restoration considering master-slave distributed generators and topology reconfiguration</article-title>. <source>Appl. energy</source> <volume>199</volume>, <fpage>205</fpage>&#x2013;<lpage>216</lpage>. <pub-id pub-id-type="doi">10.1016/j.apenergy.2017.05.012</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Documentation</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Simulation and model-based design</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.mathworks.com/products/simulink.html">https://www.mathworks.com/products/simulink.html</ext-link>.</comment>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Esmalifalak</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Nguyen</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Detecting stealthy false data injection using machine learning in smart grid</article-title>. <source>IEEE Syst. J.</source> <volume>11</volume> (<issue>3</issue>), <fpage>1644</fpage>&#x2013;<lpage>1652</lpage>. <pub-id pub-id-type="doi">10.1109/jsyst.2014.2341597</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>e Sousa</surname>
<given-names>&#xc1;.</given-names>
</name>
<name>
<surname>Messai</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Manamanni</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Load-altering attack detection on smart grid using functional observers</article-title>. <source>Int. J. Crit. Infrastructure Prot.</source> <volume>37</volume>, <fpage>100518</fpage>. <pub-id pub-id-type="doi">10.1016/j.ijcip.2022.100518</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="web">
<collab>EuGovernment</collab> <article-title>European Union agency for cybersecurity (enisa), smart grids</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.enisa.europa.eu/topics/critical-information-infrastructures-and-services/smart-grids?tab=details">https://www.enisa.europa.eu/topics/critical-information-infrastructures-and-services/smart-grids?tab&#x3d;details</ext-link>.</comment>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fleschutz</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Bohlayer</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Braun</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Henze</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Murphy</surname>
<given-names>M. D.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>The effect of price-based demand response on carbon emissions in european electricity markets: the importance of adequate carbon prices</article-title>. <source>Appl. Energy</source> <volume>295</volume>, <fpage>117040</fpage>. <pub-id pub-id-type="doi">10.1016/j.apenergy.2021.117040</pub-id> <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.sciencedirect.com/science/article/pii/S0306261921004992">https://www.sciencedirect.com/science/article/pii/S0306261921004992</ext-link>.</comment>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Batch-constrained reinforcement learning for dynamic distribution network reconfiguration</article-title>. <source>IEEE Trans. Smart Grid</source> <volume>11</volume> (<issue>6</issue>), <fpage>5357</fpage>&#x2013;<lpage>5369</lpage>. <pub-id pub-id-type="doi">10.1109/tsg.2020.3005270</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gholami</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Shekari</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Aminifar</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Shahidehpour</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Microgrid scheduling with uncertainty: the quest for resilience</article-title>. <source>IEEE Trans. Smart Grid</source> <volume>7</volume> (<issue>6</issue>), <fpage>2849</fpage>&#x2013;<lpage>2858</lpage>. <pub-id pub-id-type="doi">10.1109/tsg.2016.2598802</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Gribble</surname>
<given-names>S. D.</given-names>
</name>
</person-group>(<year>2001</year>). &#x201c;<article-title>Robustness in complex systems</article-title>,&#x201d; in <source>Proceedings eighth workshop on hot topics in operating systems</source> (<publisher-name>IEEE</publisher-name>), <fpage>21</fpage>&#x2013;<lpage>26</lpage>.</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Habib</surname>
<given-names>H. F.</given-names>
</name>
<name>
<surname>Mohamed</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>El Hariri</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mohammed</surname>
<given-names>O. A.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Utilizing supercapacitors for resiliency enhancements and adaptive microgrid protection against communication failures</article-title>. <source>Electr. Power Syst. Res.</source> <volume>145</volume>, <fpage>223</fpage>&#x2013;<lpage>233</lpage>. <pub-id pub-id-type="doi">10.1016/j.epsr.2016.12.027</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Han</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ren</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Coupled optimization of topology reconfiguration and voltage source converter control for enlarging load margin of ac/dc distribution network</article-title>,&#x201d; in <conf-name>2020 IEEE 4th conference on energy Internet and energy system integration (EI2)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>633</fpage>&#x2013;<lpage>637</lpage>.</citation>
</ref>
<ref id="B32">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Holderbaum</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Alasali</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Sinha</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>Model predictive control</article-title>,&#x201d; in <source>Energy forecasting and control methods for energy storage systems in distribution networks: predictive modelling and control techniques</source> (<publisher-name>Springer</publisher-name>), <fpage>129</fpage>&#x2013;<lpage>148</lpage>.</citation>
</ref>
<ref id="B33">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Home-Ortiz</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Mantovani</surname>
<given-names>J. R. S.</given-names>
</name>
</person-group> (<year>2020a</year>). &#x201c;<article-title>Enhancement of the resilience through microgrids formation and dg allocation with master-slave dg operation</article-title>,&#x201d; in <conf-name>2020 international conference on smart energy systems and technologies (SEST)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>6</lpage>.</citation>
</ref>
<ref id="B34">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Home-Ortiz</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Mantovani</surname>
<given-names>J. R. S.</given-names>
</name>
</person-group> (<year>2020b</year>). &#x201c;<article-title>Resilience enhancing through microgrids formation and distributed generation allocation</article-title>,&#x201d; in <source>2020 IEEE PES innovative smart grid technologies europe (ISGT-Europe)</source> (<publisher-name>IEEE</publisher-name>), <fpage>995</fpage>&#x2013;<lpage>999</lpage>.</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ibrahim</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>El-Khattam</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>ElMesallamy</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Talaat</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Adaptive protection coordination scheme for distribution network with distributed generation using abc</article-title>. <source>J. Electr. Syst. Inf. Technol.</source> <volume>3</volume> (<issue>2</issue>), <fpage>320</fpage>&#x2013;<lpage>332</lpage>. <pub-id pub-id-type="doi">10.1016/j.jesit.2015.11.012</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="web">
<collab>I. E. S. O. (IESO)</collab> <article-title>Power data</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.ieso.ca/power-data">https://www.ieso.ca/power-data</ext-link>.</comment>
</citation>
</ref>
<ref id="B37">
<citation citation-type="web">
<collab>InEnergy</collab> <article-title>India smart grid forum</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.indiasmartgrid.org/">https://www.indiasmartgrid.org/</ext-link>.</comment>
</citation>
</ref>
<ref id="B38">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Korjenic</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bednar</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2011</year>). &#x201c;<article-title>Impact of lifestyle on the energy demand of a single family house</article-title>,&#x201d; in <source>Build. Simul., building simulation</source>, <volume>4</volume>, <publisher-name>Springer</publisher-name>, <fpage>89</fpage>&#x2013;<lpage>95</lpage>. <pub-id pub-id-type="doi">10.1007/s12273-010-0013-4</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Shahidehpour</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Aminifar</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Alabdulwahab</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Al-Turki</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Networked microgrids for enhancing the power system resilience</article-title>. <source>Proc. IEEE</source> <volume>105</volume> (<issue>7</issue>), <fpage>1289</fpage>&#x2013;<lpage>1310</lpage>. <pub-id pub-id-type="doi">10.1109/jproc.2017.2685558</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Liao</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ren</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Microgrid adaptive current instantaneous trip protection</article-title>,&#x201d; in <conf-name>2019 IEEE innovative smart grid technologies-asia (ISGT asia)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>2074</fpage>&#x2013;<lpage>2078</lpage>.</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Bie</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Tri-level optimal hardening plan for a resilient distribution system considering reconfiguration and dg islanding</article-title>. <source>Appl. Energy</source> <volume>210</volume>, <fpage>1266</fpage>&#x2013;<lpage>1279</lpage>. <pub-id pub-id-type="doi">10.1016/j.apenergy.2017.06.059</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Lei</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Energy-sharing model with price-based demand response for microgrids of peer-to-peer prosumers</article-title>. <source>IEEE Trans. Power Syst.</source> <volume>32</volume> (<issue>5</issue>), <fpage>3569</fpage>&#x2013;<lpage>3583</lpage>. <pub-id pub-id-type="doi">10.1109/tpwrs.2017.2649558</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Long</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Thomas</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Jenkins</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2017</year>). <source>Peer-to-peer energy trading in a community microgrid</source>. <publisher-name>IEEE power and energy society general meeting. IEEE</publisher-name>, <fpage>1</fpage>&#x2013;<lpage>5</lpage>.</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lopez</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Rubio</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Alcaraz</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>A resilient architecture for the smart grid</article-title>. <source>IEEE Trans. Industrial Inf.</source> <volume>14</volume> (<issue>8</issue>), <fpage>3745</fpage>&#x2013;<lpage>3753</lpage>. <pub-id pub-id-type="doi">10.1109/tii.2018.2826226</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ma</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Thorp</surname>
<given-names>J. S.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>An adaptive directional current protection scheme for distribution network with dg integration based on fault steady-state component</article-title>. <source>Int. J. Electr. Power and Energy Syst.</source> <volume>102</volume>, <fpage>223</fpage>&#x2013;<lpage>234</lpage>. <pub-id pub-id-type="doi">10.1016/j.ijepes.2018.04.024</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mahat</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Bak-Jensen</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Bak</surname>
<given-names>C. L.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>A simple adaptive overcurrent protection of distribution systems with distributed generation</article-title>. <source>IEEE Trans. Smart Grid</source> <volume>2</volume> (<issue>3</issue>), <fpage>428</fpage>&#x2013;<lpage>437</lpage>. <pub-id pub-id-type="doi">10.1109/tsg.2011.2149550</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mashal</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Khashan</surname>
<given-names>O. A.</given-names>
</name>
<name>
<surname>Hijjawi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Alshinwan</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>The determinants of reliable smart grid from experts&#x2019; perspective</article-title>. <source>Energy Inf.</source> <volume>6</volume> (<issue>1</issue>), <fpage>10</fpage>. <pub-id pub-id-type="doi">10.1186/s42162-023-00266-3</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Mnih</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Badia</surname>
<given-names>A. P.</given-names>
</name>
<name>
<surname>Mirza</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Graves</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Lillicrap</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Harley</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). &#x201c;<article-title>Asynchronous methods for deep reinforcement learning</article-title>,&#x201d; in <conf-name>International conference on machine learning</conf-name> (<publisher-name>PMLR</publisher-name>), <fpage>1928</fpage>&#x2013;<lpage>1937</lpage>.</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mohassel</surname>
<given-names>R. R.</given-names>
</name>
<name>
<surname>Fung</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mohammadi</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Raahemifar</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>A survey on advanced metering infrastructure</article-title>. <source>Int. J. Electr. Power and Energy Syst.</source> <volume>63</volume>, <fpage>473</fpage>&#x2013;<lpage>484</lpage>. <pub-id pub-id-type="doi">10.1016/j.ijepes.2014.06.025</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Momesso</surname>
<given-names>A. E.</given-names>
</name>
<name>
<surname>Bernardes</surname>
<given-names>W. M. S.</given-names>
</name>
<name>
<surname>Asada</surname>
<given-names>E. N.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Adaptive directional overcurrent protection considering stability constraint</article-title>. <source>Electr. Power Syst. Res.</source> <volume>181</volume>, <fpage>106190</fpage>. <pub-id pub-id-type="doi">10.1016/j.epsr.2019.106190</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Morstyn</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Farrell</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Darby</surname>
<given-names>S. J.</given-names>
</name>
<name>
<surname>McCulloch</surname>
<given-names>M. D.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Using peer-to-peer energy-trading platforms to incentivize prosumers to form federated power plants</article-title>. <source>Nat. Energy</source> <volume>3</volume> (<issue>2</issue>), <fpage>94</fpage>&#x2013;<lpage>101</lpage>. <pub-id pub-id-type="doi">10.1038/s41560-017-0075-y</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mousavizadeh</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Haghifam</surname>
<given-names>M.-R.</given-names>
</name>
<name>
<surname>Shariatkhah</surname>
<given-names>M.-H.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>A linear two-stage method for resiliency analysis in distribution systems considering renewable energy and demand response resources</article-title>. <source>Appl. energy</source> <volume>211</volume>, <fpage>443</fpage>&#x2013;<lpage>460</lpage>. <pub-id pub-id-type="doi">10.1016/j.apenergy.2017.11.067</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Muda</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Jena</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Sequence currents based adaptive protection approach for dns with distributed energy resources</article-title>. <source>IET Generation, Transm. and Distribution</source> <volume>11</volume> (<issue>1</issue>), <fpage>154</fpage>&#x2013;<lpage>165</lpage>. <pub-id pub-id-type="doi">10.1049/iet-gtd.2016.0727</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nair</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Hinton</surname>
<given-names>G. E.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Rectified linear units improve restricted Boltzmann machines</article-title>. <source>Icml</source>.</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nascimento</surname>
<given-names>J. P.</given-names>
</name>
<name>
<surname>Brito</surname>
<given-names>N. S.</given-names>
</name>
<name>
<surname>Souza</surname>
<given-names>B. A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>An adaptive overcurrent protection system applied to distribution systems</article-title>. <source>Comput. and Electr. Eng.</source> <volume>81</volume>, <fpage>106545</fpage>. <pub-id pub-id-type="doi">10.1016/j.compeleceng.2019.106545</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nick</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Cherkaoui</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Paolone</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Optimal planning of distributed energy storage systems in active distribution networks embedding grid reconfiguration</article-title>. <source>IEEE Trans. Power Syst.</source> <volume>33</volume> (<issue>2</issue>), <fpage>1577</fpage>&#x2013;<lpage>1590</lpage>. <pub-id pub-id-type="doi">10.1109/tpwrs.2017.2734942</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Nikkhah</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Allahham</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Royapoor</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Bialek</surname>
<given-names>J. W.</given-names>
</name>
<name>
<surname>Giaouris</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>A community-based building-to-building strategy for multi-objective energy management of residential microgrids</article-title>,&#x201d; in <conf-name>2021 12th international renewable engineering conference (IREC)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>6</lpage>.</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nikkhah</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Rabiee</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Voltage stability constrained multi&#x2010;objective optimisation model for long&#x2010;term expansion planning of large&#x2010;scale wind farms</article-title>. <source>IET Generation, Transm. and Distribution</source> <volume>12</volume> (<issue>3</issue>), <fpage>548</fpage>&#x2013;<lpage>555</lpage>. <pub-id pub-id-type="doi">10.1049/iet-gtd.2017.0763</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nikkhah</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Rabiee</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mohseni-Bonab</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Kamwa</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Risk averse energy management strategy in the presence of distributed energy resources considering distribution network reconfiguration: an information gap decision theory approach</article-title>. <source>IET Renew. Power Gener.</source> <volume>14</volume> (<issue>2</issue>), <fpage>305</fpage>&#x2013;<lpage>312</lpage>. <pub-id pub-id-type="doi">10.1049/iet-rpg.2019.0472</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Panteli</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mancarella</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>The grid: stronger, bigger, smarter? presenting a conceptual framework of power system resilience</article-title>. <source>IEEE Power Energy Mag.</source> <volume>13</volume> (<issue>3</issue>), <fpage>58</fpage>&#x2013;<lpage>66</lpage>. <pub-id pub-id-type="doi">10.1109/mpe.2015.2397334</pub-id>
</citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Papaspiliotopoulos</surname>
<given-names>V. A.</given-names>
</name>
<name>
<surname>Korres</surname>
<given-names>G. N.</given-names>
</name>
<name>
<surname>Kleftakis</surname>
<given-names>V. A.</given-names>
</name>
<name>
<surname>Hatziargyriou</surname>
<given-names>N. D.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Hardware-in-the-loop design and optimal setting of adaptive protection schemes for distribution systems with distributed generation</article-title>. <source>IEEE Trans. Power Deliv.</source> <volume>32</volume> (<issue>1</issue>), <fpage>393</fpage>&#x2013;<lpage>400</lpage>. <pub-id pub-id-type="doi">10.1109/tpwrd.2015.2509784</pub-id>
</citation>
</ref>
<ref id="B62">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Paterakis</surname>
<given-names>N. G.</given-names>
</name>
<name>
<surname>Mazza</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Santos</surname>
<given-names>S. F.</given-names>
</name>
<name>
<surname>Erdin&#xe7;</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Chicco</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Bakirtzis</surname>
<given-names>A. G.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Multi-objective reconfiguration of radial distribution systems using reliability indices</article-title>. <source>IEEE Trans. Power Syst.</source> <volume>31</volume> (<issue>2</issue>), <fpage>1048</fpage>&#x2013;<lpage>1062</lpage>. <pub-id pub-id-type="doi">10.1109/tpwrs.2015.2425801</pub-id>
</citation>
</ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pilz</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Naeini</surname>
<given-names>F. B.</given-names>
</name>
<name>
<surname>Grammont</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Smagghe</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Davis</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Nebel</surname>
<given-names>J.-C.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Security attacks on smart grid scheduling and their defences: a game-theoretic approach</article-title>. <source>Int. J. Inf. Secur.</source> <volume>19</volume>, <fpage>427</fpage>&#x2013;<lpage>443</lpage>. <pub-id pub-id-type="doi">10.1007/s10207-019-00460-z</pub-id>
</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rabiee</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Nikkhah</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Soroudi</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Information gap decision theory to deal with long-term wind energy planning considering voltage stability</article-title>. <source>Energy</source> <volume>147</volume>, <fpage>451</fpage>&#x2013;<lpage>463</lpage>. <pub-id pub-id-type="doi">10.1016/j.energy.2018.01.061</pub-id>
</citation>
</ref>
<ref id="B65">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Romanenko</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Tanjimuddin</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Raussi</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Aro</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Tikka</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Honkapuro</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Taxonomy of security threats in energy systems</article-title>,&#x201d; in <conf-name>2020 17th international conference on the European energy market (EEM)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>7</lpage>.</citation>
</ref>
<ref id="B66">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ross</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Pillitteri</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Graubart</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Bodeau</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>McQuaid</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Developing cyber resilient systems: a systems security engineering approach</article-title>. <source>Natl. Inst. Stand. Technol. Tech. Rep.</source>
</citation>
</ref>
<ref id="B67">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sampath Kumar</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Srinivasan</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Sharma</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Reindl</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Adaptive directional overcurrent relaying scheme for meshed distribution networks</article-title>. <source>IET Generation, Transm. and Distribution</source> <volume>12</volume> (<issue>13</issue>), <fpage>3212</fpage>&#x2013;<lpage>3220</lpage>. <pub-id pub-id-type="doi">10.1049/iet-gtd.2017.1279</pub-id>
</citation>
</ref>
<ref id="B68">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sharifi</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Fathi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Vahidinasab</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>A review on demand-side tools in electricity market</article-title>. <source>Renew. Sustain. Energy Rev.</source> <volume>72</volume>, <fpage>565</fpage>&#x2013;<lpage>572</lpage>. <pub-id pub-id-type="doi">10.1016/j.rser.2017.01.020</pub-id>
</citation>
</ref>
<ref id="B69">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shih</surname>
<given-names>M. Y.</given-names>
</name>
<name>
<surname>Conde</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Leonowicz</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Martirano</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>An adaptive overcurrent coordination scheme to improve relay sensitivity and overcome drawbacks due to distributed generation in smart grids</article-title>. <source>IEEE Trans. industry Appl.</source> <volume>53</volume> (<issue>6</issue>), <fpage>5217</fpage>&#x2013;<lpage>5228</lpage>. <pub-id pub-id-type="doi">10.1109/tia.2017.2717880</pub-id>
</citation>
</ref>
<ref id="B70">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Singh</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Sinha</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Goli</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Subramanian</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Shukla</surname>
<given-names>S. K.</given-names>
</name>
<name>
<surname>Vyas</surname>
<given-names>O. P.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Insider attack mitigation in a smart metering infrastructure using reputation score and blockchain technology</article-title>. <source>Int. J. Inf. Secur.</source> <volume>21</volume>, <fpage>527</fpage>&#x2013;<lpage>546</lpage>. <pub-id pub-id-type="doi">10.1007/s10207-021-00561-8</pub-id>
</citation>
</ref>
<ref id="B71">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Singh</surname>
<given-names>V. K.</given-names>
</name>
<name>
<surname>Govindarasu</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>A novel architecture for attack-resilient wide-area protection and control system in smart grid</article-title>,&#x201d; in <source>2020 resilience week (RWS)</source> (<publisher-name>IEEE</publisher-name>), <fpage>41</fpage>&#x2013;<lpage>47</lpage>.</citation>
</ref>
<ref id="B72">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Sinha</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Chakrabarti</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Vyas</surname>
<given-names>O.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Distributed grid restoration based on graph theory</article-title>,&#x201d; in <source>2020 IEEE international symposium on sustainable energy, signal processing and cyber security (iSSSC)</source>, <fpage>1</fpage>&#x2013;<lpage>6</lpage>.</citation>
</ref>
<ref id="B73">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Sinha</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Dwivedi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Shukla</surname>
<given-names>S. K.</given-names>
</name>
<name>
<surname>Vyas</surname>
<given-names>O.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Commissioning random matrix theory and synthetic minority oversampling technique for power system faults detection and classification</article-title>,&#x201d; in <conf-name>International conference on neural information processing</conf-name> (<publisher-name>Springer</publisher-name>), <fpage>518</fpage>&#x2013;<lpage>529</lpage>.</citation>
</ref>
<ref id="B74">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Sinha</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Tayal</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Vyas</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Vyas</surname>
<given-names>O.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Operational flexibility with statistical and deep learning model for electricity load forecasting</article-title>,&#x201d; in <source>Accepted in lecture notes in electrical engineering (LNEE)</source>. <publisher-name>Springer</publisher-name>.</citation>
</ref>
<ref id="B75">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Soni</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Doolla</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Chandorkar</surname>
<given-names>M. C.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Improvement of transient response in microgrids using virtual inertia</article-title>. <source>IEEE Trans. power Deliv.</source> <volume>28</volume> (<issue>3</issue>), <fpage>1830</fpage>&#x2013;<lpage>1838</lpage>. <pub-id pub-id-type="doi">10.1109/tpwrd.2013.2264738</pub-id>
</citation>
</ref>
<ref id="B76">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Srivastava</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Parida</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Data driven approach for fault detection and Gaussian process regression based location prognosis in smart ac microgrid</article-title>. <source>Electr. Power Syst. Res.</source> <volume>208</volume>, <fpage>107889</fpage>. <pub-id pub-id-type="doi">10.1016/j.epsr.2022.107889</pub-id>
</citation>
</ref>
<ref id="B77">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Syrmakesis</surname>
<given-names>A. D.</given-names>
</name>
<name>
<surname>Alcaraz</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Hatziargyriou</surname>
<given-names>N. D.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Classifying resilience approaches for protecting smart grids against cyber threats</article-title>. <source>Int. J. Inf. Secur.</source> <volume>21</volume> (<issue>5</issue>), <fpage>1189</fpage>&#x2013;<lpage>1210</lpage>. <pub-id pub-id-type="doi">10.1007/s10207-022-00594-7</pub-id>
</citation>
</ref>
<ref id="B78">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Szulecki</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Ancygier</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Szwed</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2015</year>) &#x201c;<article-title>Energy democratization? societal aspects of de-carbonization in the German and polish energy sectors</article-title>,&#x201d; in <source>Societal aspects of de-carbonization in the German and polish energy sectors</source>.</citation>
</ref>
<ref id="B79">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Tang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Fang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zio</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2019b</year>). &#x201c;<article-title>A zero-sum markov defender-attacker game for modeling false pricing in smart grids and its solution by multi-agent reinforcement learning</article-title>,&#x201d; in <conf-name>29th European safety and reliability conference (ESREL2019)</conf-name>.</citation>
</ref>
<ref id="B80">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Tang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Fang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zio</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Ramirez-Marquez</surname>
<given-names>J. E.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Analysis of the vulnerability of smart grids to social network-based attacks</article-title>,&#x201d; in <conf-name>2018 3rd international conference on system reliability and safety (ICSRS)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>130</fpage>&#x2013;<lpage>134</lpage>.</citation>
</ref>
<ref id="B81">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Fang</surname>
<given-names>Y. P.</given-names>
</name>
<name>
<surname>Zio</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Ramirez-Marquez</surname>
<given-names>J. E.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Resilience of smart power grids to false pricing attacks in the social network</article-title>. <source>IEEE Access</source> <volume>7</volume>, <fpage>80491</fpage>&#x2013;<lpage>80505</lpage>. <pub-id pub-id-type="doi">10.1109/access.2019.2923578</pub-id>
</citation>
</ref>
<ref id="B82">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tebekaemi</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Wijesekera</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Secure overlay communication and control model for decentralized autonomous control of smart micro-grids</article-title>. <source>Sustain. Energy, Grids Netw.</source> <volume>18</volume>, <fpage>100222</fpage>. <pub-id pub-id-type="doi">10.1016/j.segan.2019.100222</pub-id>
</citation>
</ref>
<ref id="B83">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Thurner</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Scheidler</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sch&#xe4;fer</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Menke</surname>
<given-names>J.-H.</given-names>
</name>
<name>
<surname>Dollichon</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Meier</surname>
<given-names>F.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>pandapower&#x2014;an open-source python tool for convenient modeling, analysis, and optimization of electric power systems</article-title>. <source>IEEE Trans. Power Syst.</source> <volume>33</volume> (<issue>6</issue>), <fpage>6510</fpage>&#x2013;<lpage>6521</lpage>. <pub-id pub-id-type="doi">10.1109/tpwrs.2018.2829021</pub-id>
</citation>
</ref>
<ref id="B84">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tielens</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Van Hertem</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>The relevance of inertia in power systems</article-title>. <source>Renew. Sustain. Energy Rev.</source> <volume>55</volume>, <fpage>999</fpage>&#x2013;<lpage>1009</lpage>. <pub-id pub-id-type="doi">10.1016/j.rser.2015.11.016</pub-id>
</citation>
</ref>
<ref id="B85">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Tummasit</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Premrudeepreechacharn</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Tantichayakorn</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>Adaptive overcurrent protection considering critical clearing time for a microgrid system</article-title>,&#x201d; in <source>2015 IEEE innovative smart grid technologies-asia (ISGT ASIA)</source> (<publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>6</lpage>.</citation>
</ref>
<ref id="B86">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Upadhyay</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Sampalli</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Scada (supervisory control and data acquisition) systems: vulnerability assessment and security recommendations</article-title>. <source>Comput. and Secur.</source> <volume>89</volume>, <fpage>101666</fpage>. <pub-id pub-id-type="doi">10.1016/j.cose.2019.101666</pub-id>
</citation>
</ref>
<ref id="B87">
<citation citation-type="web">
<article-title>UsEnergy. U.s. department of energy, cybersecurity</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.energy.gov/national-security-safety/cybersecurity">https://www.energy.gov/national-security-safety/cybersecurity</ext-link>
</comment>.</citation>
</ref>
<ref id="B88">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vahidinasab</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Optimal distributed energy resources planning in a competitive electricity market: multiobjective optimization and probabilistic design</article-title>. <source>Renew. energy</source> <volume>66</volume>, <fpage>354</fpage>&#x2013;<lpage>363</lpage>. <pub-id pub-id-type="doi">10.1016/j.renene.2013.12.042</pub-id>
</citation>
</ref>
<ref id="B89">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>kim</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Decentralized energy management system for networked microgrids in grid-connected and islanded modes</article-title>. <source>IEEE Trans. Smart Grid</source> <volume>7</volume> (<issue>2</issue>), <fpage>1097</fpage>&#x2013;<lpage>1105</lpage>. <pub-id pub-id-type="doi">10.1109/tsg.2015.2427371</pub-id>
</citation>
</ref>
<ref id="B90">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Self-healing resilient distribution systems based on sectionalization into microgrids</article-title>. <source>IEEE Trans. Power Syst.</source> <volume>30</volume> (<issue>6</issue>), <fpage>3139</fpage>&#x2013;<lpage>3149</lpage>. <pub-id pub-id-type="doi">10.1109/tpwrs.2015.2389753</pub-id>
</citation>
</ref>
<ref id="B91">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zeadally</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Adi</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Baig</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Khan</surname>
<given-names>I. A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Harnessing artificial intelligence capabilities to improve cybersecurity</article-title>. <source>Ieee Access</source> <volume>8</volume>, <fpage>23 817</fpage>&#x2013;<lpage>823 837</lpage>. <pub-id pub-id-type="doi">10.1109/access.2020.2968045</pub-id>
</citation>
</ref>
<ref id="B92">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Dehghanpour</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>A learning-based power management method for networked microgrids under incomplete information</article-title>. <source>IEEE Trans. Smart Grid</source> <volume>11</volume> (<issue>2</issue>), <fpage>1193</fpage>&#x2013;<lpage>1204</lpage>. <pub-id pub-id-type="doi">10.1109/tsg.2019.2933502</pub-id>
</citation>
</ref>
<ref id="B93">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Chi</surname>
<given-names>K. T.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>Assessment of robustness of power systems from the perspective of complex networks</article-title>,&#x201d; in <conf-name>2015 IEEE international symposium on circuits and systems (ISCAS)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>2684</fpage>&#x2013;<lpage>2687</lpage>.</citation>
</ref>
<ref id="B94">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zimmerman</surname>
<given-names>R. D.</given-names>
</name>
<name>
<surname>Murillo-S&#xe1;nchez</surname>
<given-names>C. E.</given-names>
</name>
<name>
<surname>Thomas</surname>
<given-names>R. J.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Matpower: steady-state operations, planning, and analysis tools for power systems research and education</article-title>. <source>IEEE Trans. Power Syst.</source> <volume>26</volume> (<issue>1</issue>), <fpage>12</fpage>&#x2013;<lpage>19</lpage>. <pub-id pub-id-type="doi">10.1109/tpwrs.2010.2051168</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>