<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Built Environ.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Built Environment</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Built Environ.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2297-3362</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1734945</article-id>
<article-id pub-id-type="doi">10.3389/fbuil.2025.1734945</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Integration of digital twins and machine learning for predictive maintenance using APAR method rules in non-residential buildings</article-title>
<alt-title alt-title-type="left-running-head">Zabadi et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fbuil.2025.1734945">10.3389/fbuil.2025.1734945</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Zabadi</surname>
<given-names>Haneen Rebhi Yousef</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<xref ref-type="author-notes" rid="fn001">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3343368"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal Analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing &#x2013; review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Zemedkun</surname>
<given-names>Fitsum Asrat</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<xref ref-type="author-notes" rid="fn001">
<sup>&#x2020;</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal Analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Hosamo</surname>
<given-names>Haidar</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2648777"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Kraniotis</surname>
<given-names>Dimitrios</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
</contrib>
</contrib-group>
<aff id="aff1">
<institution>Department of Built Environment, OsloMet &#x2013; Oslo Metropolitan University</institution>, <city>Oslo</city>, <country country="NO">Norway</country>
</aff>
<author-notes>
<corresp id="c001">
<label>&#x2a;</label>Correspondence: Haidar Hosamo, <email xlink:href="mailto:haidar.hosamo@oslomet.no">haidar.hosamo@oslomet.no</email>
</corresp>
<fn fn-type="equal" id="fn001">
<label>&#x2020;</label>
<p>These authors have contributed equally to this work and share first authorship</p>
</fn>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-03-03">
<day>03</day>
<month>03</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>11</volume>
<elocation-id>1734945</elocation-id>
<history>
<date date-type="received">
<day>29</day>
<month>10</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>16</day>
<month>12</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Zabadi, Zemedkun, Hosamo and Kraniotis.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Zabadi, Zemedkun, Hosamo and Kraniotis</copyright-holder>
<license>
<ali:license_ref start_date="2026-03-03">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Non-residential buildings are major global energy consumers, and HVAC efficiency is therefore critical. Air Handling Units are central to indoor climate control but are prone to operational faults due to complex control dynamics. Current fault detection approaches often suffer from limited interpretability in machine learning models and limited flexibility in purely rule-based methods.</p>
</sec>
<sec>
<title>Methods</title>
<p>We developed a hybrid predictive maintenance framework that combines interpretable fault detection using Air-Handling Unit Performance Assessment Rules, adaptive fault classification and prediction using machine learning, and near-real-time monitoring through a digital twin interface. The framework was deployed on an AHU in a non-residential facility in Grimstad, Norway, using six months of operational data with more than 51,000 logged records.</p>
</sec>
<sec>
<title>Results</title>
<p>The hybrid approach improved fault detection performance across both frequent and rare fault classes, achieving strong F1-scores and high recall for critical fault conditions. The digital twin component, integrated through pyRevit and a web-based dashboard, enabled near-real-time fault visualization and supported maintenance planning.</p>
</sec>
<sec>
<title>Discussion</title>
<p>The results indicate that combining expert-driven rules with machine learning and digital twin technology can deliver a practical, accurate, and scalable solution for predictive maintenance of AHUs. The framework supports the transition from reactive to intelligent building operations and can be adapted to similar non-residential contexts.</p>
</sec>
</abstract>
<kwd-group>
<kwd>predictive maintenance (PdM)</kwd>
<kwd>air handling units (AHUs)</kwd>
<kwd>fault detection and diagnosis (FDD)</kwd>
<kwd>air-handling unit performance assessment rules (APAR)</kwd>
<kwd>machine learning (ML)</kwd>
<kwd>digital twin (DT)</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement>
</funding-group>
<counts>
<fig-count count="22"/>
<table-count count="13"/>
<equation-count count="5"/>
<ref-count count="63"/>
<page-count count="31"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Indoor Environment</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>Buildings are among the largest global energy consumers, accounting for 20%&#x2013;40% of total energy use, depending on region and type. HVAC systems alone contribute up to 60% of this in non-residential buildings, where continuous operation is common (<xref ref-type="bibr" rid="B24">Kim et al., 2022</xref>; <xref ref-type="bibr" rid="B37">P&#xe9;rez-Lombard et al., 2008</xref>). According to <xref ref-type="bibr" rid="B8">Felgueiras et al. (2016)</xref> Poor maintenance and inefficient operation of HVAC systems can raise energy consumption by up to 50%, highlighting the urgent need for smarter management.</p>
<p>As global energy concerns intensify and regulations tighten, optimizing HVAC efficiency is essential. <xref ref-type="bibr" rid="B11">Hassan et al. (2024)</xref> show that advanced scheduling and control strategies can reduce HVAC energy use by up to 30%, contributing to both cost savings and emissions reduction. These findings align with (<xref ref-type="bibr" rid="B43">Simpeh et al., 2022</xref>), who identify HVAC optimization as a key driver of sustainable building performance.</p>
<p>Traditional maintenance remains reactive or time-based, often causing unexpected failures or early component replacement. PdM addresses this by using near-real-time and historical data to anticipate failures. PdM has been shown to extend equipment life, reduce costs, and improve reliability (<xref ref-type="bibr" rid="B35">Pan, 2023</xref>; <xref ref-type="bibr" rid="B33">Mobley, 2002</xref>).</p>
<p>Within HVAC systems, AHUs play a critical role in regulating airflow, temperature, and indoor air quality. Consequently, inefficiencies in AHUs can have a disproportionate impact on overall system performance (<xref ref-type="bibr" rid="B54">Yu et al., 2014</xref>). They are especially vulnerable to operational faults due to their complex control logic and interdependent components. Numerous fault detection strategies have been proposed, ranging from rule-based systems to data-driven approaches. Rule-based frameworks like APAR are interpretable and grounded in thermodynamic logic but often produce false positives under variable conditions. In contrast, ML models offer adaptability and high accuracy but are typically opaque and require labeled datasets, which are scarce in practice. This trade-off between interpretability and adaptability highlights a critical research gap.</p>
<p>This study aims to address that gap by developing a hybrid predictive maintenance framework that combines APAR&#x2019;s transparency with machine learning&#x2019;s adaptability. APAR rules generate fault labels, which train Random Forest (RF) and Neural Network (NN) models to generalize detection while maintaining interpretability. The framework also integrates a DT, a virtual model of the AHU updated in near-real-time with live sensor data. Implemented using pyRevit and a lightweight web dashboard, it provides visualization, alerts, and diagnostics to support facility management decisions. DT technology plays a transformative role in enabling scalable, near-real-time PdM (<xref ref-type="bibr" rid="B35">Pan, 2023</xref>).</p>
<p>The methodology was tested on an AHU in a non-residential building in Grimstad, Norway, using 6&#xa0;months of operational data. Despite limited instrumentation and system scope, the approach achieved strong diagnostic accuracy and demonstrated the feasibility of integrating rule-based, data-driven, and DT solutions. This research contributes a scalable and transparent framework for PdM, supporting the shift from reactive to intelligent, energy-efficient operation of AHUs.</p>
<p>Despite substantial progress in integrating Artificial Intelligence (AI), Automated Fault Detection and Diagnostics (AFDD), and Digital Twin (DT) frameworks, current HVAC predictive maintenance systems often trade interpretability for adaptability. Studies have demonstrated that while machine learning and neural networks can optimize energy efficiency and automate fault detection, they often lack explainability and transparency in decision processes (<xref ref-type="bibr" rid="B32">Mistry, 2021</xref>). Similarly, digital twins are frequently implemented in offline or semi-manual configurations, limiting their capacity for continuous synchronization and real-time adaptation (<xref ref-type="bibr" rid="B12">Hodavand et al., 2023</xref>). This study advances the state of the art by developing a lightweight, semi-real-time hybrid predictive maintenance framework that merges APAR interpretability with ML adaptability in a DT environment, addressing practical deployment challenges identified in recent reviews (<xref ref-type="bibr" rid="B2">Aghili et al., 2025</xref>).</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Literature review</title>
<p>HVAC systems are essential for indoor environmental quality but are also among the most energy-intensive systems in buildings. Research has increasingly focused on enhancing HVAC efficiency and reliability through intelligent monitoring and maintenance strategies. This literature review explores key developments in DT technology, PdM, and fault detection methods, with a particular focus on rule-based systems like APAR and the role of ML.</p>
<sec id="s2-1">
<label>2.1</label>
<title>Digital twin technology for predictive maintenance</title>
<p>DT technology has emerged as a transformative concept in PdM by enabling a dynamic, bidirectional connection between physical systems and their digital counterparts (<xref ref-type="bibr" rid="B10">Grieves and Vickers, 2017</xref>). Through integration with real-time sensor data, DTs facilitate continuous monitoring, simulation, and diagnostic capabilities, offering a level of responsiveness not achievable through traditional maintenance approaches (<xref ref-type="bibr" rid="B56">Zhong et al., 2023</xref>; <xref ref-type="bibr" rid="B1">Abd Wahab et al., 2024</xref>). The application of DTs in HVAC systems, particularly AHUs, is especially valuable in non-residential buildings where precise environmental control and operational efficiency are essential. By leveraging Building Information Modeling (BIM), Building Management System (BMS), and Internet of Things (IoT) sensors, DTs enable system-wide diagnostics and support simulation-driven decision making (<xref ref-type="bibr" rid="B1">Abd Wahab et al., 2024</xref>; <xref ref-type="bibr" rid="B49">van Dinter et al., 2022</xref>).</p>
<p>Digital twin technology marks another significant advancement in PdM by creating a virtual representation of physical systems for real-time simulation, problem identifications, and diagnostic. For example (<xref ref-type="bibr" rid="B44">Tan and Li, 2024</xref>), showed that integrating PdM with DT models for AHUs can achieve energy saving of up to 13.2% while maintaining thermal comfort. Similarly (<xref ref-type="bibr" rid="B51">Xie et al., 2023</xref>), demonstrated how a DT that integrates with BIM and BMS enables continuous monitoring and early fault detection in HVAC system, facilitating more accurate maintenance planning. This process, which involves multi-level data integration processing, is conceptually illustrated in <xref ref-type="fig" rid="F1">Figure 1</xref>, highlighting the key components and data flows that characterize a typical DT framework for predictive maintenance. Unlike static rule-based methods, DT-based systems support adaptive learning and feedback loops, allowing them to capture hidden patterns in operational data and anticipate system failures with greater accuracy (<xref ref-type="bibr" rid="B53">Yang et al., 2024</xref>). Research by <xref ref-type="bibr" rid="B13">Hosamo et al. (2022)</xref>, has demonstrated the integration of Bayesian networks with BIM-derived spatial data and real-time environmental feedback for both occupant comfort analysis and HVAC failure prediction. Despite these advances, challenges remain in the standardization and definition of DTs in practice. Many so-called DT implementations operate as digital shadows without true real-time synchronization or feedback mechanisms, limiting their utility and comparability across studies (<xref ref-type="bibr" rid="B53">Yang et al., 2024</xref>).</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>A conceptual diagram of the Digital Twin process for predictive maintenance.</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g001.tif">
<alt-text content-type="machine-generated">Diagram illustrating the workflow from IoT devices to a digital twin platform, leading to hybrid predictive maintenance, then alerts and maintenance scheduling, facility manager review, and facility implementation in a circular process.</alt-text>
</graphic>
</fig>
<p>Another key challenge lies in integrating DTs across multiple system levels, from sensors to building-wide operations. Most implementations focus on component-level diagnostics, particularly in rotating machinery, while building-scale DTs are still under development (<xref ref-type="bibr" rid="B49">van Dinter et al., 2022</xref>). Furthermore, the computational cost of real-time simulation and the demand for high-quality, continuous sensor data remain significant barriers, especially in old buildings that lack modern instrumentation (<xref ref-type="bibr" rid="B56">Zhong et al., 2023</xref>). Efforts to address interoperability through semantic ontologies and structured data models have shown promise but require broader adoption to achieve scalability (<xref ref-type="bibr" rid="B13">Hosamo et al., 2022</xref>).</p>
</sec>
<sec id="s2-2">
<label>2.2</label>
<title>Predictive maintenance strategies in the built environment</title>
<p>PdM is a proactive asset management approach that leverages real-time data to identify signs of equipment degradation and schedule interventions before failures occur. In building operations, especially in HVAC systems such as AHUs, PdM has become essential for enhancing energy efficiency and occupant comfort while reducing operational costs (<xref ref-type="bibr" rid="B9">Gourabpasi and Nik-Bakht, 2024</xref>; <xref ref-type="bibr" rid="B55">Zhang et al., 2024</xref>). This approach stands in contrast to reactive maintenance, which responds to failures after they happen, and preventive maintenance, which relies on scheduled interventions regardless of equipment condition (<xref ref-type="bibr" rid="B33">Mobley, 2002</xref>).</p>
<p>PdM operates by transforming sensor data, temperature, pressure, humidity, and vibration into actionable insights that trigger timely maintenance actions (<xref ref-type="bibr" rid="B17">Jardine et al., 2006</xref>). IoT-enabled sensors form the backbone of this process, supporting continuous condition monitoring and diagnostics (<xref ref-type="bibr" rid="B13">Hosamo et al., 2022</xref>). The growth of edge analytics and cloud computing has expanded PdM capabilities, making it feasible to scale across multiple systems and locations with real-time data processing and storage capabilities (<xref ref-type="bibr" rid="B38">Pruvost et al., 2023</xref>). As a result, PdM is increasingly used in complex environments such as manufacturing, transport, and smart buildings (<xref ref-type="bibr" rid="B58">Zonta et al., 2020</xref>). A summary of commonly used PdM techniques for AHU systems is presented in <xref ref-type="table" rid="T1">Table 1</xref>. According to Cakir (<xref ref-type="bibr" rid="B5">Cakir et al., 2021</xref>) unplanned downtime accounts for 5%&#x2013;20% of productivity losses, underscoring the value of predictive approaches.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Summary of predictive maintenance (including machine learning) techniques.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Method</th>
<th align="left">Application</th>
<th align="left">Strengths</th>
<th align="left">Limitations</th>
<th align="left">Author</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">APAR rule-based system</td>
<td align="left">Rule-based AHU fault detection</td>
<td align="left">Simple to implement<break/>Easily interpretable outcomes</td>
<td align="left">Limited scalability<break/>Does not adapt to dynamic system changes</td>
<td align="left">(<xref ref-type="bibr" rid="B13">Hosamo et al., 2022</xref>)</td>
</tr>
<tr>
<td align="left">Machine learning (e.g., SVM, ANN, RF)</td>
<td align="left">Fault detection and condition prediction in AHU</td>
<td align="left">Handles complex, non-linear patterns<break/>Improves fault classification accuracy</td>
<td align="left">Requires labeled datasets<break/>Demands significant preprocessing</td>
<td align="left">(<xref ref-type="bibr" rid="B60">Zini and Carcasci, 2024</xref>)</td>
</tr>
<tr>
<td align="left">Digital twin &#x2b; IoT sensors</td>
<td align="left">Real-time monitoring of AHU systems</td>
<td align="left">Enables high-precision tracking, supports simulation</td>
<td align="left">High initial setup cost<break/>Complex system integration</td>
<td align="left">(<xref ref-type="bibr" rid="B44">Tan and Li, 2024</xref>)</td>
</tr>
<tr>
<td align="left">Hybrid models</td>
<td align="left">Integration of APAR and ML</td>
<td align="left">Improve interpretability and adaptability<break/>Supports advanced visualization</td>
<td align="left">More complex to design and maintain</td>
<td align="left">(<xref ref-type="bibr" rid="B59">Prabhu Bam et al., 2024</xref>)</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2-3">
<label>2.3</label>
<title>Fault detection and the role of APAR</title>
<p>FDD is a central component of PdM systems, aiming to identify operational anomalies that indicate equipment malfunctions or inefficiencies. In the context of HVAC systems, and particularly AHUs, FDD frameworks have evolved from simple threshold-based logic to sophisticated rule-based and ML-driven models (<xref ref-type="bibr" rid="B54">Yu et al., 2014</xref>; <xref ref-type="bibr" rid="B13">Hosamo et al., 2022</xref>; <xref ref-type="bibr" rid="B46">Trojanova et al., 2009</xref>). One of the most influential rule-based methods is the APAR system, developed at NIST, which uses 28 predefined rules derived from thermodynamic and control principles to evaluate AHU performance across various operational modes (<xref ref-type="bibr" rid="B40">Schein et al., 2006</xref>; <xref ref-type="bibr" rid="B39">Schein and Schein, 2006</xref>).</p>
<p>Each rule is structured as an &#x201c;if-then&#x201d; condition that compares actual sensor readings with expected behavior. For example, during heating mode, APAR checks whether the supply air temperature is sufficiently higher than the mixed air temperature as would be expected in normal heating operation. A violation could signal a stuck valve or a sensor malfunction. APAR relies on existing sensor infrastructure, including supply, return, and outdoor air temperatures, as well as damper positions and control signals, making it cost-effective and broadly applicable. The system incorporates filters such as moving averages and time delays to reduce false positives during mode transitions (<xref ref-type="bibr" rid="B40">Schein et al., 2006</xref>).</p>
<p>Despite its utility, APAR has limitations, as it assumes steady-state operation, which reduces its effectiveness during transitions. It may also generate nuisance alarms if not properly calibrated, and it lacks adaptability to changing system behavior over time (<xref ref-type="bibr" rid="B46">Trojanova et al., 2009</xref>; <xref ref-type="bibr" rid="B6">Dey and Dong, 2016</xref>). To address these issues, researchers have extended APAR with statistical methods like Cumulative Sum (CUSUM) and integrated it into DT frameworks to combine interpretability with predictive capabilities (<xref ref-type="bibr" rid="B13">Hosamo et al., 2022</xref>). These hybrid systems allow APAR to function as a transparent diagnostic layer while benefiting from the adaptability and predictive power of ML models.</p>
<p>This study adopts APAR due to its interpretability, low implementation cost, and compatibility with existing building infrastructure. The structured logic of APAR rules integrates effectively within BIM environments, where real-time sensor data can support automated fault detection. In this work, APAR is used in combination with ML to enhance detection accuracy while retaining the transparency of rule-based diagnostics. This hybrid approach enables practical deployment in existing buildings and creates a scalable framework for more advanced PdM applications.</p>
<p>
<xref ref-type="table" rid="T2">Table 2</xref> summarizes and compares these methods, showing how fault detection strategies have changed over time from rule-based systems like APAR to more advanced and flexible hybrid models.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Comparison of fault detection methods for AHUs.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Method</th>
<th align="left">Key features</th>
<th align="left">Strengths</th>
<th align="left">Limitations</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">APAR (rule-based)</td>
<td align="left">28 if-then rules based on control and thermal logic</td>
<td align="left">Easy to implement</td>
<td align="left">Static rules, prone to nuisance alarms, steady state only</td>
</tr>
<tr>
<td align="left">Enhanced APAR (CUSUM)</td>
<td align="left">Mode-based detection and fault size estimation</td>
<td align="left">Quantifies the severity of faults and enhances mode detection</td>
<td align="left">More complex implementations required validated parameters</td>
</tr>
<tr>
<td align="left">ML-based FDD</td>
<td align="left">ANN, RF, and Bayesian networks for fault prediction</td>
<td align="left">Adaptive, it manages nonlinear systems</td>
<td align="left">Needs large datasets; less transparent</td>
</tr>
<tr>
<td align="left">Hybrid (APAR &#x2b; ML &#x2b; DT)</td>
<td align="left">Combines expert rules, machine learning, and DT</td>
<td align="left">Scalable, interpretable, predictive, real-time</td>
<td align="left">High setup effort, integration complexity</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2-4">
<label>2.4</label>
<title>BIM and real-time sensor data integration for predictive maintenance</title>
<p>The integration of real-time data streaming into BIM platforms, such as Autodesk Revit, and web interfaces has become pivotal in modern construction and facility management (<xref ref-type="bibr" rid="B61">Hosamo et al., 2024</xref>; <xref ref-type="bibr" rid="B23">Kazado et al., 2019</xref>; <xref ref-type="bibr" rid="B47">Tuhaise et al., 2023</xref>; <xref ref-type="bibr" rid="B50">Veerendra et al., 2025</xref>), enabling stakeholders to interact with up-to-date information efficiently and support rapid decision making (<xref ref-type="bibr" rid="B19">Johansson et al., 2015</xref>). This real-time visualization capability addresses the complexities involved in handling large and detailed models, allowing dynamic updates of BIM data in response to real-world changes (<xref ref-type="bibr" rid="B19">Johansson et al., 2015</xref>).</p>
<p>Advancements in DT technologies have further enhanced the utility of real-time data in construction (<xref ref-type="bibr" rid="B47">Tuhaise et al., 2023</xref>). DTs facilitate improved decision-making in asset management and implementation through synchronizing BIM models with live data feeds. This synchronization ensures that the virtual representation of a building accurately reflects its physical counterpart, allowing for proactive maintenance and efficient resource allocation (<xref ref-type="bibr" rid="B47">Tuhaise et al., 2023</xref>). Moreover, the centralization and visualization of complex data within BIM platforms aid stakeholders in comprehending various project facets, leading to more informed decisions (<xref ref-type="bibr" rid="B50">Veerendra et al., 2025</xref>).</p>
<p>The incorporation of immersive technologies, such as virtual reality (VR), into BIM workflows has opened new avenues for real-time collaboration and decision-making. Real-world applications demonstrate that immersive VR within the open BIM ecosystem enhances user engagement and understanding of spatial configurations, which is particularly beneficial during the design and construction phases (<xref ref-type="bibr" rid="B18">Johansson and Roup&#xe9;, 2024</xref>). Additionally, frameworks that enable real-time synchronization of BIM data in virtual environments support collaborative decision-making by providing stakeholders with a shared, up-to-date model to reference during discussions (<xref ref-type="bibr" rid="B41">Schiavi et al., 2022</xref>).</p>
<p>Remote collaboration frameworks that integrate real-time data sharing have proven effective in structural condition assessments. These frameworks allow inspectors and engineers to address structural issues proactively, reducing the risk of failures and enhancing safety through facilitating immediate decision-making (<xref ref-type="bibr" rid="B3">Awadallah et al., 2024</xref>). The development of common data environments for DT, extending from building to city levels, underscores the importance of efficient, real-time data collection and transmission in managing complex urban infrastructures (<xref ref-type="bibr" rid="B52">Yan et al., 2025</xref>). Furthermore, the application of DT models in achieving net-zero energy buildings illustrates the role of real-time monitoring and assisted decision-making in sustainability efforts. These models process data continuously to forecast energy consumption and optimize building performance (<xref ref-type="bibr" rid="B28">Liu et al., 2025</xref>).</p>
<p>Recent studies emphasize self-learning and adaptive building systems integrating AI, rules, and DT frameworks to enhance resilience and energy efficiency. Self-learning ANN controllers have improved stability and reduced energy consumption compared to traditional (PID) systems (<xref ref-type="bibr" rid="B36">Parzinger et al., 2020</xref>; <xref ref-type="bibr" rid="B4">Bouabdallaoui et al., 2021</xref>). Reinforcement learning has also enabled HVAC automation capable of continuous optimization under dynamic conditions (<xref ref-type="bibr" rid="B45">Tian et al., 2023</xref>). These findings support the hybrid APAR&#x2013;ML&#x2013;DT framework proposed here by bridging interpretability and adaptability for future adaptive buildings.</p>
</sec>
<sec id="s2-5">
<label>2.5</label>
<title>Research gap</title>
<p>Although significant progress has been made in applying DT technologies and ML for PdM in building systems, key gaps persist in how rule-based fault detection is operationalized in practice (<xref ref-type="bibr" rid="B46">Trojanova et al., 2009</xref>). Much of the existing literature focuses on large-scale DT frameworks or black-box predictive models, yet there is limited integration of transparent, rule-based diagnostic logic, such as APAR or fuzzy inference systems, into PdM workflows for HVAC systems, particularly AHUs (<xref ref-type="bibr" rid="B13">Hosamo et al., 2022</xref>; <xref ref-type="bibr" rid="B9">Gourabpasi and Nik-Bakht, 2024</xref>; <xref ref-type="bibr" rid="B62">Hosamo et al., 2023</xref>; <xref ref-type="bibr" rid="B63">Hosamo and Mazzetto, 2025</xref>).</p>
<p>Existing studies tend to treat fault detection and PdM as separate processes, with few demonstrating how rule-based methods can complement ML models in a cohesive pipeline. Furthermore, while DTs are often presented as theoretical or high-level constructs, the actual implementation of hybrid diagnostics and predictive models using real-time data streams remains rare. Most research does not show how real-time sensor data is actively processed using both deterministic rules and learning algorithms for ongoing system evaluation (<xref ref-type="bibr" rid="B15">Hu et al., 2024</xref>).</p>
<p>There is also a lack of lightweight, reproducible implementations that demonstrate real-time diagnostics and predictions using accessible platforms and web-based environments. Although some systems integrate data-driven maintenance with centralized dashboards, practical examples where fault detection rules are executed in real-time alongside predictive modeling, especially with partial integration into environments like Revit, are underrepresented in the current literature.</p>
<p>To address this gap, this study proposes a hybrid methodology for AHU fault detection and predictive maintenance using both rule-based and data-driven approaches. APAR rules are implemented to detect faults, supported by ML models for failure prediction. These are executed in real-time using a Python environment to process live AHU sensor data. One selected APAR rule is also implemented directly within Revit to enable model-based interaction with live data. The system is extended with a web-based dashboard to visualize sensor values and rule outcomes, forming a lightweight and scalable DT framework that links diagnostics, prediction, and visualization.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Methodology and implementation</title>
<p>This section outlines the methodology adopted to develop a predictive maintenance framework for AHUs using real-time sensor data, rule-based diagnostics, and ML, integrated with a DT platform. The proposed system aims to detect operational faults, classify their types, and anticipate future failures, ultimately supporting energy-efficient and resilient building management. The full workflow is illustrated in <xref ref-type="fig" rid="F2">Figure 2</xref>, and the sections that follow provide a detailed account of each stage, including data characteristics, preprocessing steps, algorithmic implementation, evaluation strategies, and system integration.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>System architecture of the proposed predictive maintenance framework.</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g002.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a system for predictive maintenance using AHU data. The process starts with real-time AHU data from I4 Helse, processed via an API, providing temperatures and signals. This data is used for predictive maintenance through APAR, leading to fault detection and classification using RF and ANN. A digital twin platform connected to BMS processes real-time sensor data, detects faults based on APAR rules, predicts faults, and includes a 3D model of AHU.</alt-text>
</graphic>
</fig>
<sec id="s3-1">
<label>3.1</label>
<title>AHU data - real time</title>
<p>This section presents the real-time sensor data acquisition process and its contextual integration into the PdM framework. The data were collected from the AHU installed in the I4Helse building, located in Grimstad, Norway (see <xref ref-type="fig" rid="F3">Figure 3</xref>). The AHU system is continuously monitored through a BMS, which serves as the core platform for real-time sensor logging, control execution, and data access. Data was retrieved from the building via a structured Application Programming Interface (API) provided by the BMS.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Exterior view of the I4Helse building in Grimstad, Norway, where the monitored AHU is installed.</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g003.tif">
<alt-text content-type="machine-generated">This figure is intended solely to provide a general exterior view of the I4Helse building in Grimstad, Norway, where the monitored AHU is installed. The detailed aspects mentioned are outside the scope of this figure.</alt-text>
</graphic>
</fig>
<p>A full-scale AHU system was monitored through a network of sensors capturing both environmental and operational variables. These included various air temperatures (outside, return, exhaust, supply, and mixed), control signals (such as valve positions, fan and pump operations), and operational statuses. The dataset comprises a mix of numeric variables (e.g., temperatures in &#xb0;C), logical signals (0/1 values indicating binary states like on/off or open/closed), and continuous control signals (e.g., valve opening percentages). A detailed overview of the dataset features is provided in <xref ref-type="table" rid="T3">Table 3</xref>.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Dataset overview.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Feature</th>
<th align="center">Description</th>
<th align="center">Data type</th>
<th align="center">Unit</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Timestamp</td>
<td align="center">The exact time at which the data was recorded</td>
<td align="center">Date and time</td>
<td align="left">&#x200b;</td>
</tr>
<tr>
<td align="center">Outside temp</td>
<td align="center">The measured temperature outside</td>
<td align="center">Numeric</td>
<td align="center">&#xb0;C</td>
</tr>
<tr>
<td align="center">Exhaust air temp (actual)</td>
<td align="center">The actual temperature of the exhaust air measured by the system</td>
<td align="center">Numeric</td>
<td align="center">&#xb0;C</td>
</tr>
<tr>
<td align="center">Return air temp (actual)</td>
<td align="right">The actual temperature of the return air measured by the system</td>
<td align="center">Numeric</td>
<td align="center">&#xb0;C</td>
</tr>
<tr>
<td align="center">Mixed air temp (actual)</td>
<td align="center">The actual temperature of the mixed air measured by the system</td>
<td align="center">Numeric</td>
<td align="center">&#xb0;C</td>
</tr>
<tr>
<td align="center">Supply air temp (actual)</td>
<td align="center">The actual temperature of the supply air measured by the system</td>
<td align="center">Numeric</td>
<td align="center">&#xb0;C</td>
</tr>
<tr>
<td align="center">Supply air temp (min)</td>
<td align="center">The minimum allowable supply air temperature</td>
<td align="center">Numeric</td>
<td align="center">&#xb0;C</td>
</tr>
<tr>
<td align="center">Supply air temp (max)</td>
<td align="center">The maximum allowable supply air temperature</td>
<td align="center">Numeric</td>
<td align="center">&#xb0;C</td>
</tr>
<tr>
<td align="center">Exhaust valve signal</td>
<td align="center">The target state (open/closed) of the exhaust air valve</td>
<td align="center">Logic</td>
<td align="center">0/1</td>
</tr>
<tr>
<td align="center">Heater pump signal</td>
<td align="center">The target state (on/off) for the heater pump</td>
<td align="center">Logic</td>
<td align="center">0/1</td>
</tr>
<tr>
<td align="center">Chiller pump signal</td>
<td align="center">The target state (on/off) for the chiller pump</td>
<td align="center">Logic</td>
<td align="center">0/1</td>
</tr>
<tr>
<td align="center">Outdoor air valve signal</td>
<td align="center">The target state (open/closed) of the outdoor air valve</td>
<td align="center">Logic</td>
<td align="center">0/1</td>
</tr>
<tr>
<td align="center">Chiller signal</td>
<td align="center">The target state (on/off) of the chiller</td>
<td align="center">Logic</td>
<td align="center">0/1</td>
</tr>
<tr>
<td align="center">Fresh air fan (actual)</td>
<td align="center">The operational state (on/off) of the fresh air fan</td>
<td align="center">Logic</td>
<td align="center">0/1</td>
</tr>
<tr>
<td align="center">Exhaust air fan signal</td>
<td align="center">The control signal (0%&#x2013;100%) sent to the exhaust air fan</td>
<td align="center">Signal</td>
<td align="center">%</td>
</tr>
<tr>
<td align="center">Heater valve signal</td>
<td align="center">The control signal (0%&#x2013;100%) sent to the heater valve</td>
<td align="center">Signal</td>
<td align="center">%</td>
</tr>
<tr>
<td align="center">Chiller valve signal</td>
<td align="center">The control signal (0%&#x2013;100%) sent to the chiller valve</td>
<td align="center">Signal</td>
<td align="center">%</td>
</tr>
<tr>
<td align="center">Heat recovery Bypass signal</td>
<td align="center">The control signal (0%&#x2013;100%) sent to the heat recovery bypass</td>
<td align="center">Signal</td>
<td align="center">%</td>
</tr>
<tr>
<td align="center">Exhaust air filter (actual)</td>
<td align="center">The operational state of the exhaust air filter</td>
<td align="center">Logic</td>
<td align="center">0/1</td>
</tr>
<tr>
<td align="center">Outdoor air filter (actual)</td>
<td align="center">The operational state of the outdoor air filter</td>
<td align="center">Logic</td>
<td align="center">0/1</td>
</tr>
<tr>
<td align="center">AHU status (actual)</td>
<td align="center">The operational state (on/off) of the air handling unit (AHU)</td>
<td align="center">Logic</td>
<td align="center">0/1</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Sensor readings were recorded at 5-min intervals over the period from 1 March 2021, at 00:00 to 27 August 2021, at 20:20. The resulting dataset contains 51,713 rows and 21 features, each time-stamped to provide a detailed snapshot of the AHU&#x2019;s operational state and environmental context. These data reflect real-world operating conditions, including natural fluctuations and occasional sensor anomalies, offering a reliable foundation for developing and evaluating models for fault detection and predictive maintenance.</p>
</sec>
<sec id="s3-2">
<label>3.2</label>
<title>Predictive maintenance</title>
<p>This section presents the PdM methodology developed to detect, classify, and forecast operational faults in AHUs. The approach integrates structured data preprocessing, rule-based diagnostics using the APAR framework, and supervised ML models.</p>
<sec id="s3-2-1">
<label>3.2.1</label>
<title>Data preprocessing</title>
<p>The preprocessing of raw sensor data is an essential step in building reliable and effective PdM models. In this study, a comprehensive real-world dataset was collected from an operational AHU at the I4 Helse building over 6&#xa0;months.</p>
<p>The dataset consisted of 51,713 rows of time-series data, captured at 5-min intervals, and covered 21 distinct features. These features included critical operational variables such as outside air temperature, supply air temperature, mixed air temperature, exhaust air temperature, actuator signals, valve control levels, fan status, and general system states. However, like most sensor-based building datasets, the raw format was incomplete and noisy. Without proper preprocessing, these issues can severely impair model training, introduce bias, and distort both APAR rule evaluations and ML predictions. Therefore, a structured and multi-step data preparation approach was applied and illustrated in <xref ref-type="fig" rid="F4">Figure 4</xref>.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Data preprocessing workflow illustrating the pipeline from raw sensor readings to a fully cleaned and standardized dataset.</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g004.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a data preprocessing workflow. Stages: Raw Dataset (initial dataset before cleaning), Data Cleaning (handling missing values and duplicates), Multicollinearity Check (assessing and removing redundant features), Dimensional Reduction (selecting important features for modeling), and Processed Dataset Ready for Modeling (final dataset prepared for analysis).</alt-text>
</graphic>
</fig>
<p>The initial phase of data preprocessing involved a thorough investigation of missing values, which are common in long-term building monitoring systems due to sensor faults, communication errors, or scheduled maintenance. The analysis revealed that 113 rows in the dataset contained one or more missing entries. Crucially, no single feature exhibited more than 112 missing values, as presented in <xref ref-type="table" rid="T4">Table 4</xref>, indicating that the data loss was sparse and distributed. These missing entries were localized in three-time windows: at midnight on 1 March 2021; between 08:35 and 14:05 on 22 May 2021; and between 07:20 and 11:00 on 22 July 2021. These gaps may be associated with either system resets or temporary sensor dropouts. Because the missing data represented only 0.22% of the total dataset, and because machine learning algorithms such as Random Forest and Neural Networks typically require complete input vectors (<xref ref-type="bibr" rid="B46">Trojanova et al., 2009</xref>; <xref ref-type="bibr" rid="B40">Schein et al., 2006</xref>), the most pragmatic solution was to remove these 113 rows. This brought the final dataset to 51,600 complete entries.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Feature-wise count of missing values before cleaning.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">No.</th>
<th align="center">Features</th>
<th align="center">Number of missing values</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">1</td>
<td align="center">Exhaust air temp (actual)</td>
<td align="center">111</td>
</tr>
<tr>
<td align="center">2</td>
<td align="center">Return air temp (actual)</td>
<td align="center">112</td>
</tr>
<tr>
<td align="center">3</td>
<td align="center">Mixed air temp (actual)</td>
<td align="center">112</td>
</tr>
<tr>
<td align="center">4</td>
<td align="center">Supply air temp (actual)</td>
<td align="center">112</td>
</tr>
<tr>
<td align="center">5</td>
<td align="center">Exhaust valve signal</td>
<td align="center">111</td>
</tr>
<tr>
<td align="center">6</td>
<td align="center">Heater pump signal</td>
<td align="center">111</td>
</tr>
<tr>
<td align="center">7</td>
<td align="center">Outdoor air filter (actual)</td>
<td align="center">111</td>
</tr>
<tr>
<td align="center">8</td>
<td align="center">Heater valve signal</td>
<td align="center">111</td>
</tr>
<tr>
<td align="center">9</td>
<td align="center">Supply air temp (min)</td>
<td align="center">111</td>
</tr>
<tr>
<td align="center">10</td>
<td align="center">AHU status (actual)</td>
<td align="center">111</td>
</tr>
<tr>
<td align="center">11</td>
<td align="center">Supply air temp (max)</td>
<td align="center">111</td>
</tr>
<tr>
<td align="center">12</td>
<td align="center">Chiller pump signal</td>
<td align="center">111</td>
</tr>
<tr>
<td align="center">13</td>
<td align="center">Outdoor air valve signal</td>
<td align="center">111</td>
</tr>
<tr>
<td align="center">14</td>
<td align="center">Chiller signal</td>
<td align="center">111</td>
</tr>
<tr>
<td align="center">15</td>
<td align="center">Fresh air fan (actual)</td>
<td align="center">111</td>
</tr>
<tr>
<td align="center">16</td>
<td align="center">Exhaust air fan signal</td>
<td align="center">111</td>
</tr>
<tr>
<td align="center">17</td>
<td align="center">Exhaust air filter (actual)</td>
<td align="center">111</td>
</tr>
<tr>
<td align="center">18</td>
<td align="center">Chiller valve signal</td>
<td align="center">111</td>
</tr>
<tr>
<td align="center">19</td>
<td align="center">Heat recovery Bypass signal</td>
<td align="center">111</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>To further address the implications of this decision, an additional evaluation was conducted to ensure that the removal of the 113 incomplete rows did not adversely affect the temporal structure of the dataset, the APAR fault-detection process, or the performance of the machine learning models used in this study. The missing entries occurred in three isolated windows, each short in duration and occurring during otherwise steady operational periods. Such patterns are consistent with Missing Completely At Random (MCAR), where the probability of missingness is unrelated to the sensor values themselves (<xref ref-type="bibr" rid="B20">Maya and Chafic, 2022</xref>). Under MCAR conditions and with a missing rate as low as 0.22%, listwise deletion is widely accepted as the preferred method because it avoids introducing artificial trends or smoothing effects that can arise from statistical interpolation (<xref ref-type="bibr" rid="B31">Mallapragada et al., 2020</xref>).</p>
<p>Moreover, imputing HVAC sensor values can distort transient thermodynamic behaviour, particularly for variables such as supply air temperature, mixed air temperature, and return air temperature, all of which play central roles in APAR rule evaluation. APAR diagnostics rely on strict thermodynamic relationships, such as the magnitude and direction of temperature differences, and imputing synthetic values within missing intervals may unintentionally mask true anomalies or create non-physical consistency among sensor readings (<xref ref-type="bibr" rid="B7">Duan et al., 2021</xref>). For these reasons, deletion was considered the safer choice for preserving the physical validity of the time-series dataset prior to APAR analysis.</p>
<p>With respect to ML, the remaining dataset of 51,600 complete rows provides a sufficiently large training corpus for both RF and ANN models. Prior studies have shown that these models are robust to minor, non-systematic data reductions when the dataset remains large and diverse (<xref ref-type="bibr" rid="B29">Macieira et al., 2021</xref>). Furthermore, HVAC fault detection literature consistently recommends prioritizing physical fidelity over artificial completeness, particularly when the impact of deletion is negligible, as in this case (<xref ref-type="bibr" rid="B26">Lestinen et al., 2018</xref>). Thus, based on statistical justification, domain considerations, and alignment with established preprocessing practices, the removal of the 113 incomplete rows is both methodologically sound and appropriate for the objectives of this study.</p>
<p>Following the removal of incomplete data, a duplicate row check was conducted. Duplicate records can arise during logging restarts or file mergers, and if left unaddressed, they can bias the frequency distribution of operational modes or inflate feature importance in models. A full dataset scan using hash-based row comparison confirmed that there were no duplicate entries, ensuring consistency in the time-series progression. The cleaned dataset was then analyzed for the presence of outliers, particularly in the temperature features. Significant spikes were detected in variables such as supply air temperature and mixed air temperature, as shown in <xref ref-type="fig" rid="F5">Figure 5</xref>. These outliers could result from momentary sensor calibration issues, rapid mode transitions, or even control failures (<xref ref-type="bibr" rid="B40">Schein et al., 2006</xref>). However, given that the purpose of this study includes fault detection, these extreme values were preserved rather than removed. They were marked for potential influence on APAR rule triggering and ML training, as they could represent authentic system anomalies.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Outlier detection plots for selected temperature variables.</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g005.tif">
<alt-text content-type="machine-generated">Graph showing AHU temperature time history from March 2021 to August 2021. It includes five line graphs: Outside Temperature in blue (0-30 degrees), Mixed Air Temperature in red (0-60 degrees), Supply Air Temperature in green (0-150 degrees), Return Air Temperature in purple (0-100 degrees), and Exhaust Air Temperature in orange (0-150 degrees). All graphs have varying degrees of fluctuations, with some peaks and troughs.</alt-text>
</graphic>
</fig>
<p>Once the data was cleaned, a multicollinearity analysis was performed to identify and mitigate redundancy among highly correlated features. Multicollinearity can negatively affect both interpretability and model stability, especially in models relying on feature importance rankings or linear separability. To evaluate this, pairwise Pearson correlation coefficients were computed for all combinations of continuous features.</p>
<p>The Pearson correlation coefficient <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is defined by <xref ref-type="disp-formula" rid="e1">Equation 1</xref>.<disp-formula id="e1">
<mml:math id="m2">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msqrt>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msqrt>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
<p>Where <italic>xi</italic> and <italic>yi</italic> are defined as two features, i.e., <inline-formula id="inf2">
<mml:math id="m3">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf3">
<mml:math id="m4">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> represent the mean values of these features. The value of Pearson&#x2019;s correlation coefficient (<inline-formula id="inf4">
<mml:math id="m5">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) varies from &#x2212;1 to 1, where values closer to &#xb1;1 indicate stronger correlation. Features with &#x2223;r&#x2223; &#x3e; 0.9 were flagged for review. Although strong correlations were observed between temperature variables, such as between supply and mixed air temperatures, these features were retained due to their functional significance in HVAC logic and their specific use in APAR rules. Redundancy in this case was tolerated to preserve interpretability and allow physical behavior to remain visible in the dataset. Final selection of features for training was left to the feature importance analysis conducted later during machine learning model development.</p>
<p>The final preprocessing step involved standardization of all continuous variables using Z-score normalization. This is a necessary transformation to bring all feature values to a common scale, especially when using gradient-based models such as neural networks. Standardization also accelerates convergence and prevents numerical instability during optimization. The Z-score transformation is defined as shown in <xref ref-type="disp-formula" rid="e2">Equation 2</xref>.<disp-formula id="e2">
<mml:math id="m6">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="normal">&#x3bc;</mml:mi>
</mml:mrow>
<mml:mi mathvariant="normal">&#x3c3;</mml:mi>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
</p>
<p>Where <inline-formula id="inf5">
<mml:math id="m7">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> the normalized feature value X is the original feature value, &#x3bc; is the feature mean, and &#x3c3; is the standard deviation. After transformation, each feature has zero meaning and unit variance. This normalization was applied uniformly to all numeric variables, including temperatures, control signals, and valve openings.</p>
</sec>
<sec id="s3-2-2">
<label>3.2.2</label>
<title>Fault detection using APAR rules</title>
<p>The objective is to identify and categorize faults in the AHU system using both expert-defined logic and data-driven modelling. While APAR provides interpretable diagnostics without the need for historical labels, machine learning models enhance scalability and fault generalization under complex, nonlinear conditions.</p>
<p>The APAR engine was implemented in Python and applied to the cleaned and standardized dataset described in <xref ref-type="sec" rid="s3-2-1">Section 3.2.1</xref>. To enable effective rule evaluation, it was essential to first determine the system&#x2019;s operating modes based on control signal logic and component behavior. These modes (also referred to as operating states in this study) define the conditions under which each APAR rule should be active. In line with common terminology in AHU diagnostics, the terms free cooling and zero energy mode are also used interchangeably to describe the condition where outdoor air is leveraged for cooling without engaging mechanical cooling. Due to the absence of direct damper position data, modes that depend on damper signals or require high outdoor air fractions were evaluated carefully. An outdoor air fraction (OAF) metric was derived from return, mixed, and outdoor air temperatures and used as a proxy to assess economizer behavior and support mode classification. In line with established practices in AHU diagnostics (<xref ref-type="bibr" rid="B39">Schein and Schein, 2006</xref>; <xref ref-type="bibr" rid="B21">Katipamula and Brambley, 2005a</xref>; <xref ref-type="bibr" rid="B40">Schein et al., 2006</xref>; <xref ref-type="bibr" rid="B22">Katipamula and Brambley, 2005b</xref>). The OAF was estimated using the following mixing equation, as presented in <xref ref-type="disp-formula" rid="e3">Equation 3</xref>.<disp-formula id="e3">
<mml:math id="m8">
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>F</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mo>&#x2212;</mml:mo>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
</p>
<p>Where, <inline-formula id="inf6">
<mml:math id="m9">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf7">
<mml:math id="m10">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf8">
<mml:math id="m11">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denote mixed, return, and outdoor air temperatures, respectively.</p>
<p>Mechanical cooling conditions were instead attributed solely to Mode 4 (mechanical cooling with minimum outdoor air), and rules were selected accordingly. An unoccupied mode (Off Mode) was inferred from inactivity across all control signals. The custom mode name, detection logic employed for each mode, the mapped APAR operating mode, and the list of rules that were implemented are presented in <xref ref-type="table" rid="T5">Table 5</xref>. For example, Heating Mode was triggered when the supply and exhaust fans were active, the heating valve was open, and the cooling valve was closed.</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Mode detection and rule mapping.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Custom mode name</th>
<th align="center">Detection logic used</th>
<th align="center">Mapped APAR mode(s)</th>
<th align="center">Detection type</th>
<th align="center">Supported rules</th>
<th align="center">Notes</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Heating mode</td>
<td align="center">Heater valve: Open, chiller valve: Closed, fans: On</td>
<td align="center">Mode 1</td>
<td align="center">Direct</td>
<td align="center">1, 2, 3, 4</td>
<td align="center">Fully supported</td>
</tr>
<tr>
<td align="center">Free cooling mode</td>
<td align="center">Fans: On, chiller valve: Closed, heater valve: Closed</td>
<td align="center">Mode 2</td>
<td align="center">Direct</td>
<td align="center">5, 6, 7</td>
<td align="center">Fully supported</td>
</tr>
<tr>
<td align="center">Mechanical cooling mode</td>
<td align="center">Chiller valve: Open, heater valve: Closed, fans: On</td>
<td align="center">Mode 4</td>
<td align="center">Direct</td>
<td align="center">16, 17, 18,19, and 20</td>
<td align="center">Fully supported</td>
</tr>
<tr>
<td align="center">Unknown occupied mode</td>
<td align="center">AHU ON, but not matching any defined mode</td>
<td align="center">Mode 5</td>
<td align="center">Inferred</td>
<td align="center">22</td>
<td align="center">Only rule 22 retained (no damper dependency)</td>
</tr>
<tr>
<td align="center">All occupied modes</td>
<td align="center">Mode 1, 2, 3, 4, or 5</td>
<td align="center">Direct</td>
<td align="center">Direct</td>
<td align="center">25, 26, 27, 28</td>
<td align="center">Occupancy inferred from fan/valve activity</td>
</tr>
<tr>
<td align="center">OffMode (unoccupied)</td>
<td align="center">All fans, valves, and pumps OFF</td>
<td align="center">Mode 99</td>
<td align="center">Inferred</td>
<td align="center">None</td>
<td align="center">Excluded from APAR rule evaluation</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>While the full APAR framework includes 28 rules, a total of 18 APAR rules were implemented based on the available sensor inputs, including supply air temperature (SAT), return air temperature (RAT), mixed air temperature (MAT), and exhaust air temperature (EAT), along with fan statuses and actuator signals. Ten rules were excluded due to the unavailability of key inputs such as damper position and changeover temperature. The full list of omitted rules and the rationale for their exclusion are detailed in <xref ref-type="table" rid="T6">Table 6</xref>.</p>
<table-wrap id="T6" position="float">
<label>TABLE 6</label>
<caption>
<p>Omitted APAR rules and justification.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Rule &#x23;</th>
<th align="left">Justification for omission</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">9, 15</td>
<td align="left">Requires change over air temperature (tco) for switching between modes 3 and 4</td>
</tr>
<tr>
<td align="left">8, 10,12, 13, 14</td>
<td align="left">Related to mode 3 operation; mode 3 omitted based on OAF analysis (OAF &#x2248;0.3)</td>
</tr>
<tr>
<td align="left">21, 23, 24</td>
<td align="left">Requires damper signal (ud)</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>To visualize the flow from raw sensor input to rule activation and fault output, a diagnostic architecture diagram is presented in <xref ref-type="fig" rid="F6">Figure 6</xref>. This illustrates how operational modes are detected and used to conditionally apply rule checks across multiple system components.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Logic flow diagram for APAR rule execution.</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g006.tif">
<alt-text content-type="machine-generated">Flowchart depicting a fault detection process starting with reading timestamp data, including temperatures, control signals, and operating states. The process involves checking the operating mode, leading to different modes: Heating (R1-R4), Free Cooling (R5-R7), Mechanical Cooling (R16-R20), Unknown Occupied (R22), and All Occupied (R25-R28). Rule threshold checks are applied, comparing sensor values to thresholds. If a fault is detected per timestamp, it is labeled. The process ends with an endpoint.</alt-text>
</graphic>
</fig>
<sec id="s3-2-2-1">
<label>3.2.2.1</label>
<title>Thresholds and tuning parameters</title>
<p>Thresholds form the backbone of APAR fault logic, distinguishing normal variation from actual faults. Key parameters include the supply air temperature deviation threshold (&#x3b5;t), which defines the allowable deviation from expected SAT values, and signal-based thresholds such as &#x3b5;hc and &#x3b5;cc, which represent the minimum activity levels for heating and cooling valves, respectively. These parameters, summarized in <xref ref-type="table" rid="T7">Table 7</xref>, were selected based on experimental tuning and established literature (<xref ref-type="bibr" rid="B40">Schein et al., 2006</xref>) (<xref ref-type="bibr" rid="B39">Schein and Schein, 2006</xref>). To reflect the system&#x2019;s typical behavior and align with the available dataset, sensor noise (&#x3b5;t) and fan-induced temperature shifts were adjusted accordingly. For instance, since the AHU under study uses fresh and exhaust fans rather than supply and return fans, both &#x394;Tsf and &#x394;Trf were set to zero.</p>
<table-wrap id="T7" position="float">
<label>TABLE 7</label>
<caption>
<p>Threshold parameters used in APAR evaluation.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Parameter</th>
<th align="center">Symbol</th>
<th align="center">Value</th>
<th align="center">Purpose</th>
<th align="center">Tuned</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Threshold for errors in temperature measurements</td>
<td align="center">&#x3b5;t</td>
<td align="center">1.7&#xa0;&#xb0;C</td>
<td align="center">Acceptable deviation before fault is triggered</td>
<td align="center">Tuned for rule 7 and 16</td>
</tr>
<tr>
<td align="left">Threshold parameter accounting for airflow-related errors</td>
<td align="center">&#x3b5;f</td>
<td align="center">0.3</td>
<td align="center">Used in outdoor air fraction calculations</td>
<td align="center">&#x2013;</td>
</tr>
<tr>
<td align="left">Threshold parameter for the heating coil valve control signal</td>
<td align="center">&#x3b5;<sub>hc</sub>
</td>
<td align="center">0.05</td>
<td align="center">Defines how close the heating valve signal must be to fully open (1)</td>
<td align="center">&#x2013;</td>
</tr>
<tr>
<td align="left">Threshold parameter for the cooling coil valve control signal</td>
<td align="center">&#x3b5;<sub>cc</sub>
</td>
<td align="center">0.05</td>
<td align="center">Defines how close the cooling valve signal must be to fully open (1)</td>
<td align="center">&#x2013;</td>
</tr>
<tr>
<td align="left">Temperature rises across the supply fan</td>
<td align="center">&#x394;T<sub>sf</sub>
</td>
<td align="center">0&#xa0;&#xb0;C</td>
<td align="center">Typical SAT rise due to supply fan operation</td>
<td align="center">&#x2013;</td>
</tr>
<tr>
<td align="left">Temperature rise across the return fan</td>
<td align="center">&#x394;T<sub>rf</sub>
</td>
<td align="center">0&#xa0;&#xb0;C</td>
<td align="center">Typical RAT drop due to return fan operation</td>
<td align="center">&#x2013;</td>
</tr>
<tr>
<td align="left">Threshold on the minimum temperature difference between the return and outdoor air</td>
<td align="center">&#x394;T<sub>min</sub>
</td>
<td align="center">5.6&#xa0;&#xb0;C</td>
<td align="center">Expected difference to detect outdoor air effects</td>
<td align="center">&#x2013;</td>
</tr>
<tr>
<td align="left">Threshold on the minimum outdoor air fraction</td>
<td align="center">(Q<sub>oa</sub>/Q<sub>sa</sub>) <sub>min</sub>
</td>
<td align="center">0.2</td>
<td align="center">Required to detect minimum fresh air intake</td>
<td align="center">&#x2013;</td>
</tr>
<tr>
<td align="left">Outdoor air fraction &#x3d; (tma - Tra)/(Toa - tra)</td>
<td align="center">Qoa/Qsa</td>
<td align="left">&#x200b;</td>
<td align="left">&#x200b;</td>
<td align="left">&#x200b;</td>
</tr>
<tr>
<td align="left">Maximum number of mode changes per hour</td>
<td align="center">MTmax</td>
<td align="center">4</td>
<td align="center">Used to flag unstable mode control behavior</td>
<td align="center">Maximum number of mode changes per hour</td>
</tr>
<tr>
<td align="left">Supply air temperature set point</td>
<td align="center">Tsa,s</td>
<td align="center">23.5&#xa0;&#xb0;C</td>
<td align="left">&#x200b;</td>
<td align="left">Drived from SAT(min) and SAT(max) and tuned further for rule 8</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>A fixed supply air temperature setpoint of 23.5&#xa0;&#xb0;C was adopted for all rule evaluations. This value represents the midpoint between the dataset&#x2019;s recorded <inline-formula id="inf9">
<mml:math id="m12">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi mathvariant="italic">MIN</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> (15&#xa0;&#xb0;C) and maximum (32&#xa0;&#xb0;C) SAT bounds, a strategy commonly adopted in APAR studies when actual setpoints are unavailable (<xref ref-type="bibr" rid="B34">Nassif et al., 2022</xref>). While some implementations vary SAT_SP with season or control mode, a fixed midpoint setpoint ensures consistency in fault rule evaluation across different modes.</p>
<p>To improve the reliability of the APAR based fault detection system, a threshold tuning procedure was developed to address the over-detection of faults in specific rules. These excessive fault flags, identified through visual inspection, were often associated with persistent temperature differences between supply air (Tsa) and mixed air (Tma) during periods when the AHU was inactive. Given the absence of mechanical activity, including fan operation, these differences indicated passive thermal effects, or systemic sensor offsets, rather than true faults. This observation was supported by statistical analysis of temperature trends, confirming that a baseline level of temperature rise exists even in inactive modes of operation.</p>
<p>In line with general APAR parameter tuning practices (<xref ref-type="bibr" rid="B22">Katipamula and Brambley, 2005b</xref>), the passive temperature rise was quantified using operational data collected during &#x201c;StateOff&#x201d; periods. Two key temperature differences were computed: (1) the difference between Tsa and Tma, and (2) the difference between return air temperature (Tra) and Tma, weighted by the outdoor air fraction. These metrics reflect residual gains across the supply and return ducts in the absence of airflow. The average of these two values was selected as a threshold adjustment factor. This factor was later incorporated into specific rules to improve fault detection robustness and reduce the occurrence of false alarms. The results of this analysis, along with their implementation in APAR rules, are presented in the Results and Discussion section.</p>
<p>While APAR delivers transparent logic, it is inherently limited by its reliance on pre-defined thresholds and binary logic. To complement it, supervised ML models were developed to classify AHU operating conditions based on the APAR-generated fault labels. Two models were implemented: RF classifier and a feedforward ANN. These models enable pattern recognition in high-dimensional data and generalize beyond rigid rule definitions.</p>
</sec>
</sec>
<sec id="s3-2-3">
<label>3.2.3</label>
<title>Model development and performance</title>
<p>To improve methodological transparency and reproducibility, additional preprocessing and modeling details are summarized in this section. All logical and categorical variables were label-encoded, and all continuous variables were standardized using Z-score normalization to ensure consistent scaling across temperature and control-signal features, in line with established preprocessing practices for HVAC machine learning workflows. The dataset was partitioned using a stratified split into 70% training, 15% validation, and 15% testing subsets to preserve class distribution and enable reliable early stopping during model tuning. Hyperparameters were explicitly defined for both models: the Random Forest classifier employed 100 estimators, Gini impurity, bootstrap sampling, and class weighting, while the ANN consisted of three hidden layers (128-64-32 neurons), ReLU activations, dropout rates of 0.30, 0.20, and 0.20, the Adam optimizer, a batch size of 32, and 50 training epochs. These configurations align with best-practice guidance in recent HVAC analytics literature, emphasizing transparent reporting of preprocessing, normalization, and model hyperparameters to ensure robustness and reproducibility (<xref ref-type="bibr" rid="B48">Ulpiani et al., 2021</xref>; <xref ref-type="bibr" rid="B26">Lestinen et al., 2018</xref>).</p>
<p>Model development was performed in Google Colab using Python 3.1. Libraries used included Pandas and NumPy for data handling, Scikit-learn for ML pipeline development, Matplotlib and Plotly for visualization, and Imbalanced-learn for data augmentation. While additional algorithms such as XGBoost and LightGBM were explored, RF and ANN were chosen as primary models due to their balance between performance, interpretability, and robustness.</p>
<p>The dataset used for model training consisted of the preprocessed and standardized data described in <xref ref-type="sec" rid="s3-2-1">Section 3.2.1</xref>. Logical variables were label-encoded, and continuous variables were scaled using Z-score normalization. To address the imbalance between the majority class (&#x201c;No Fault&#x201d;) and minority fault types, Synthetic Minority Oversampling Technique (SMOTE) was applied to the training subset. The data was then partitioned into 70% training, 15% validation, and 15% testing sets. Feature selection was guided using ANOVA F-test scores and RF feature importance rankings.</p>
<p>The RF model was trained using 100 trees, Gini impurity as the split criterion, bootstrap sampling, and a fixed random seed (42) to ensure reproducibility. Class weighting was enabled to ensure sensitivity to rare fault classes. The full architecture and logic of the model are depicted in <xref ref-type="fig" rid="F7">Figure 7</xref>. Evaluation metrics included accuracy, precision, recall, F1-score per class, and ROC-AUC analysis. The model demonstrated strong performance in detecting both frequent and rare fault types under imbalanced conditions.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Random forest-based fault detection structure.</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g007.tif">
<alt-text content-type="machine-generated">Diagram of a fault detection system using decision trees. Input signals like temperature and valve signals are processed by three decision trees. Results undergo majority voting, leading to a fault detection output. The process direction is indicated by a red arrow.</alt-text>
</graphic>
</fig>
<p>The ANN was constructed as a multi-layer feedforward model. The input layer matched the number of selected features. Three hidden layers, each with 128, 64, and 32 neurons, respectively, utilized ReLU activation functions, while the output layer employed SoftMax to produce class probabilities. Dropout regularization was applied with rates of 30%, 20%, and 20% across the layers to prevent overfitting. The network was trained using the Adam optimizer and the sparse categorical cross-entropy loss function, as defined in <xref ref-type="disp-formula" rid="e4">Equation 4</xref>.<disp-formula id="e4">
<mml:math id="m13">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mi mathvariant="normal">y</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>log</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mover accent="true">
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
</p>
<p>Where L is the loss, <inline-formula id="inf10">
<mml:math id="m14">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the true class label, and <inline-formula id="inf11">
<mml:math id="m15">
<mml:mrow>
<mml:mfenced open="(" close="" separators="|">
<mml:mrow>
<mml:mover accent="true">
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>) is the predicted probability for the correct class. Training was run for 50 epochs with a batch size of 32, and early stopping was used to halt training once validation loss plateaued. The model&#x2019;s structure and training logic are shown in <xref ref-type="fig" rid="F8">Figure 8</xref>.</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Neural network-based fault detection structure.</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g008.tif">
<alt-text content-type="machine-generated">Neural network diagram showing connections from various inputs like outside temperature, supply air temperature, and others to hidden layers, leading to an output labeled &#x201c;Fault.&#x201d; Inputs are listed on the left, hidden layers in the middle, and output on the right. Connections are represented by lines between nodes.</alt-text>
</graphic>
</fig>
<p>Both ML models were evaluated using confusion matrices, precision-recall tables, and ROC curves for each fault class. While the RF model offered high interpretability and rapid inference, the ANN demonstrated higher fault detection accuracy for complex cases involving subtle multi-variable interactions.</p>
</sec>
<sec id="s3-2-4">
<label>3.2.4</label>
<title>Model evaluation and optimization</title>
<p>The trained ML models RF and ANN were evaluated through a rigorous and multi-dimensional performance assessment strategy to verify their reliability and diagnostic accuracy. Since these models were built to detect a wide range of fault classes based on APAR-labeled training data, including several rare but operationally critical conditions, the evaluation focused not only on overall accuracy but also on the models&#x2019; sensitivity and specificity across individual fault categories.</p>
<p>A fundamental diagnostic tool used in this assessment was the confusion matrix, which provides a compact summary of model predictions versus actual class labels. For each model, a confusion matrix was computed for both the training and testing sets, with entries reflecting true positives, false positives, true negatives, and false negatives. The accuracy metric was computed as the ratio of correctly predicted samples to total predictions, while recall, or true positive rate (TPR), was calculated as the ratio of true positives to the total actual positives in each class. These matrices were visualized as heatmaps, where high diagonal values indicate strong classification performance and off-diagonal values reveal confusion between specific fault types. Such visual diagnostics allowed identification of patterns such as systematic confusion between heating and cooling faults or difficulty distinguishing low-frequency classes from the dominant &#x201c;No Fault&#x201d; category.</p>
<p>In addition to confusion matrices, detailed classification reports were generated for each model, capturing precision, recall, and F1-score for all fault categories. Precision, defined as the proportion of true positives among all predicted positives for a class, reflects how many of the faults flagged by the model were correct. Recall quantifies the model&#x2019;s ability to detect all instances of a given fault type, and the F1-score, as the harmonic mean of precision and recall, balances the trade-off between false alarms and missed detections. These metrics were calculated on the unseen test dataset and compiled into comparative tables to highlight model strengths. The RF model typically achieved higher precision, particularly in the dominant operational mode where no faults were present, whereas the ANN model displayed superior recall for minority classes, especially those with low representation in the training data.</p>
<p>To further analyze the models&#x2019; discriminative power across varying thresholds, ROC curves were plotted for each fault class using a one-vs-all approach. These curves chart the relationship between the TPR and the False Positive Rate (FPR) across all decision boundaries, providing a holistic view of model sensitivity. The Area Under the Curve (AUC) was computed for each ROC curve, serving as a threshold-independent performance metric. AUC values closer to 1.0 indicate excellent model performance, whereas values near 0.5 suggest no better than random classification. These curves were overlaid for the two models, solid lines for the RF and dashed lines for the ANN, to allow direct visual comparison of performance consistency across all classes, particularly for rare and high-risk faults.</p>
<p>Despite strong baseline performance, initial training results revealed a noticeable bias in both models toward the majority class. This imbalance compromised their ability to detect less frequent but operationally significant faults. To address this, a series of model optimization techniques were introduced to enhance the sensitivity, robustness, and generalization capabilities of the classifiers.</p>
<p>The first strategy involved refining the feature set to include only the most informative variables, as determined by statistical and model-based importance rankings. Specifically, features such as Heater Valve Signal, Outside Temperature, Mixed and Supply Air Temperatures, and AHU operational status consistently ranked highest and were retained to improve focus and reduce noise. This dimensionality reduction helped both models generalize better, particularly under oversampling conditions.</p>
<p>Second, the conventional 80-20 train-test split was replaced by a stratified three-part division: 70% for training, 15% for validation, and 15% for testing. The validation set enabled continuous hyperparameter tuning without contaminating the test set, providing a buffer against overfitting. This revision allowed early stopping and dropout monitoring to be tied to actual generalization performance rather than internal variance.</p>
<p>To mitigate the imbalance in fault class frequency, the SMOTE was applied to the training data. SMOTE works by synthetically generating new samples of rare classes by interpolating between existing instances. This effectively enriched the training data with realistic but previously unseen examples of rare faults, enhancing the models&#x27; exposure to operational edge cases and improving their ability to detect failures that occur infrequently but carry high operational risk.</p>
<p>Complementary to SMOTE, class weighting was integrated into the training procedure to prioritize rare classes further, with the weights calculated as shown in <xref ref-type="disp-formula" rid="e5">Equation 5</xref>.<disp-formula id="e5">
<mml:math id="m16">
<mml:mrow>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>x</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
</p>
<p>Where w<italic>i</italic> is the weight for class i, N is the total number of samples, k is the number of classes, and <inline-formula id="inf12">
<mml:math id="m17">
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the number of instances belonging to class <italic>i</italic>. This strategy penalized the misclassification of rare faults more heavily than common operational states, encouraging the model to allocate greater learning capacity to underrepresented events.</p>
<p>For the ANN model specifically, regularization and training control were essential. Dropout layers with rates of 30%, 20%, and 20% were introduced after each hidden layer to prevent co-adaptation of neurons and reduce overfitting. In addition, the training process was monitored using validation loss, and early stopping was employed to halt training once performance on the validation set no longer improved over a defined number of epochs. These methods helped avoid overtraining and ensured model robustness even when trained on augmented and imbalanced data.</p>
</sec>
<sec id="s3-2-5">
<label>3.2.5</label>
<title>Regression model for forecasting AHU fault incidence</title>
<p>To complement the classification models in <xref ref-type="sec" rid="s3-2-3">Section 3.2.3</xref>, a separate feedforward ANN regression model was developed to forecast short term AHU fault incidence. The aim was to estimate how the total number of APAR detected faults evolves across the 6&#xa0;months monitoring period, providing a time profile of expected maintenance needs that corresponds to the sequence shown in <xref ref-type="fig" rid="F20">Figure 20</xref>. The forecasting problem was formulated at daily resolution, using the cleaned 5&#xa0;minutes dataset described in <xref ref-type="sec" rid="s3-1">Sections 3.1</xref> and <xref ref-type="sec" rid="s3-2-1">3.2.1</xref> as a starting point.</p>
<p>The original 5-min time series was first aggregated to calendar days. For each day, the number of time steps where the variable &#x201c;Fault Detected&#x201d; differed from &#x201c;no fault&#x201d; was counted. This daily count, which combines all APAR rule activations into a single measure, defined the scalar regression target <inline-formula id="inf13">
<mml:math id="m18">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> that is referred to as daily fault incidence. In parallel, daily predictors <inline-formula id="inf14">
<mml:math id="m19">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> were constructed as summaries of the same operational variables used in the classification task. For each day, the mean values of all continuous sensor and control variables were computed, including Outside Temperature, Exhaust Air Temp (Actual), Return Air Temp (Actual), Mixed Air Temp (Actual), Supply Air Temp (Actual), Exhaust Valve Signal, Heater Pump Signal, Outdoor Air Filter (Actual), Heater Valve Signal, Supply Air Temp (Min), AHU Status (Actual), Supply Air Temp (Max), Chiller Pump Signal, Outdoor Air Valve Signal, Chiller Signal, Fresh Air Fan (Actual), Exhaust Air Fan Signal, Exhaust Air Filter (Actual), Chiller Valve Signal, and Heat Recovery Bypass Signal. The categorical &#x201c;State&#x201d; variable was encoded as daily operating mode fractions by calculating the proportion of 5-min intervals spent in each mode (StateOff, StateHeating, StateZeroEnergy, StateCooling, and Unclassified), so that these fractions sum to one for each day. To capture temporal dependence, two simple autoregressive features were added: the fault incidence on the previous day <inline-formula id="inf15">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and a 7&#xa0;days rolling mean of fault incidence over the preceding week. Since these lagged features require at least 7&#xa0;days of history, the first 7&#xa0;days of the monitoring period were discarded, which resulted in samples with paired feature vectors <inline-formula id="inf16">
<mml:math id="m21">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and targets <inline-formula id="inf17">
<mml:math id="m22">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. All continuous predictors were standardized using Z score normalization based on the training subset, and the same scaling parameters were applied to the validation and test subsets.</p>
<p>The ANN regression model reused the general feedforward architecture introduced for the classifier, with modifications to the output layer and loss function to accommodate continuous targets. The network consisted of an input layer matching the dimensionality of the daily feature vector, followed by three fully connected hidden layers with 128, 64, and 32 neurons, respectively, each using rectified linear unit activation. Dropout regularization with rates of 0.30, 0.20, and 0.20 was applied after the first, second, and third hidden layer in order to reduce overfitting. The output layer was a single linear neuron that produced the predicted daily fault incidence <inline-formula id="inf18">
<mml:math id="m23">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. The model was implemented in Keras with the Adam optimizer and a mean squared error loss function. Training used mini batches of size 32 for a maximum of 50 epochs, with early stopping based on the validation loss so that training was terminated when generalization performance no longer improved.</p>
<p>To preserve the temporal structure of the series, the samples were split chronologically into 70 percent for training, 15 percent for validation, and 15 percent for testing. The training subset was used to fit the network weights, the validation subset was used for early stopping and selection of hyperparameters such as learning rate and dropout configuration, and the final model was evaluated on the held-out test subset. Forecast performance was examined using standard regression metrics, including root mean squared error and mean absolute error between observed and predicted daily fault counts on the test period, together with the Pearson correlation coefficient to assess how well the model captured day to day variability in fault incidence.</p>
<p>After this evaluation, the selected configuration was retrained on the full monitoring period, again using early stopping, and then used to generate one step ahead predictions across the 6&#xa0;months of data. For each day <inline-formula id="inf19">
<mml:math id="m24">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, the model estimated the fault incidence on day <inline-formula id="inf20">
<mml:math id="m25">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> based on the aggregated operational variables and lagged fault incidence up to day <inline-formula id="inf21">
<mml:math id="m26">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The resulting sequence of daily predictions, plotted together with the observed daily fault counts, defines the 6&#xa0;month prediction horizon referred to in <xref ref-type="sec" rid="s4-8">Section 4.8</xref>.</p>
</sec>
</sec>
<sec id="s3-3">
<label>3.3</label>
<title>Digital twin implementation</title>
<p>Unlike conventional BIM-BMS integration, the proposed DT provides a real-time, bidirectional bridge between measured sensor data and its 3D representation in Revit, while also offering a lightweight and accessible web platform for dynamic visualization. Rather than serving merely as a static geometric replica, the DT functions as a dynamic operational mirror through two core components: a Revit-based integration powered by pyRevit scripting, and a custom-built web interface for external monitoring.</p>
<p>While Revit provides the base environment for modeling and visualization, it lacks native support for real-time data and diagnostic logic. To bridge this gap, pyRevit, a Python-based extension, was used to enable live data integration, rule evaluation, and system feedback within the BIM model. This transformed Revit into an interactive diagnostic tool rather than a static design platform.</p>
<sec id="s3-3-1">
<label>3.3.1</label>
<title>Real-time data and rule integration via pyRevit</title>
<p>To enable real-time monitoring and operational awareness directly within the BIM model, a custom data integration was developed using pyRevit platform. The objective was to link actual AHU sensor data, including temperature values and control signals, with Autodesk Revit, allowing real-time system behavior to be visualized and analyzed directly in the 3D environment. This approach embeds the dynamic operational state of the AHU inside the digital building model, supporting both maintenance planning and system diagnostics in a spatially contextualized interface.</p>
<p>To establish the data connection between real-time data and the Revit model, a pyRevit extension was developed. This custom script reads sensor values from a CSV file generated by a local monitoring system at 1-s intervals. This continuous data loop ensures that the digital twin reflects real-world system dynamics in near real time. The digital twin operates in a near real-time configuration, not a fully live streaming mode. Data synchronization is achieved through manual CSV uploads and periodic updates, yielding latency within seconds. This distinguishes it from continuous API-based real-time DT architectures as seen in other studies (<xref ref-type="bibr" rid="B16">Ilambirai and Padmini, 2019</xref>). While this semi-real-time workflow validates feasibility within constrained sensor environments, future work will transition to direct BMS-based streaming for full automation. Additionally, three APAR rules (R1, R7, and R16) described in <xref ref-type="sec" rid="s3-2-2">Section 3.2.2</xref> were implemented in simplified form within the pyRevit environment. If one or more rules are triggered based on the current sensor readings, the system provides immediate feedback through pop-up alerts using Revit&#x2019;s TaskDialog interface and highlights the event on a live temperature chart with colored markers. While no direct visual change occurs in the Revit model itself, the external visualization panel serves as a real-time diagnostic.</p>
<p>The overall architecture of this data workflow, including data acquisition, rule processing via pyRevit, and potential integration with Revit parameters, is illustrated in <xref ref-type="fig" rid="F9">Figure 9</xref>.</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>Architecture of the Data Workflow for AHU Fault Detection via pyRevit and Revit Parameters.</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g009.tif">
<alt-text content-type="machine-generated">Flowchart of the Sensor-to-BIM Data Pipeline for AHU Diagnostics via pyRevit: External Sensor Data (API or CSV Source) flows to Data Fetching (Python Script), then to pyRevit Plugin (Rule Processing and Visualization), and finally to Revit Parameters (Current Temperature and Fault Status).</alt-text>
</graphic>
</fig>
<p>This implementation establishes a foundational workflow for future enhancements that could embed operational feedback directly into Revit elements through parameter bindings and model-based visual cues.</p>
</sec>
<sec id="s3-3-2">
<label>3.3.2</label>
<title>Web platform for real-time visualization</title>
<p>To complement the Revit-based integration and extend system accessibility beyond the BIM environment, a lightweight web platform was developed for real-time monitoring of AHU operation. The goal of this platform is to provide a device-agnostic, browser-based interface that can be accessed by engineers, technicians, or facility managers without requiring access to Revit or the BMS. This ensures operational transparency and continuous oversight of critical indoor environmental parameters regardless of the user&#x2019;s technical environment.</p>
<p>Generally, the web platform&#x2019;s architecture is modular and scalable. A Python Flask server functions as the backend, continuously polling live AHU data from the same source used in the pyRevit integration, either a shared CSV file updated by the local data logger or a direct API from the I4 Helse building&#x2019;s monitoring system. The frontend is implemented in JavaScript and uses libraries such as Chart.js or Plotly to render dynamic and responsive graphs. Depending on the polling configuration, sensor values are refreshed every 2&#x2013;5&#xa0;s, enabling the visualization of near-real-time conditions across the system, while in this research, the interface is initialized through manual CSV upload, allowing users to simulate real-time data streaming. Upon file selection, JavaScript parses the dataset and begins a timed playback loop, refreshing data at 2-s intervals to emulate live monitoring.</p>
<p>The web dashboard displays a curated set of critical performance parameters, including Supply Air Temperature, Return Air Temperature, Mixed Air Temperature, Exhaust Air Temperature, and Outside Temperature, which are plotted using the Chart.js library in dynamic, real-time line graphs. Each chart is color-coded and synchronized to the same time axis to allow for comprehensive trend analysis. In addition to showing current values, the dashboard also presents a rolling time window typically covering the last 30&#xa0;min to help identify trends and transient anomalies. When fault conditions are detected based on rule evaluations or model predictions, the interface highlights them using visual indicators such as color-coded warnings or fault badges.</p>
<p>Crucially, the web platform is not a passive viewer but an intelligent monitoring layer. It actively integrates a subset of the APAR rule engine described in <xref ref-type="sec" rid="s3-2-2">Section 3.2.2</xref>, such as (R1, R7, and R16), applying these rules to the incoming data stream in real-time for fault conditions. Detected violations are recorded in a fault log panel with timestamped descriptions and are simultaneously reflected in a 3D visualization of the AHU system. This visualization, rendered using Three.js, dynamically changes the color of the AHU model to indicate fault type: red for heating faults (R1), green for zero-energy violations (R7), and blue for cooling faults (R16). In the absence of detected issues, the system defaults to neutral yellow. An auditory alert is also triggered alongside fault conditions to enhance user awareness. These dual detection systems, rule-based and data-driven, work together to ensure high diagnostic coverage and timely response to emerging system issues. This web-based solution effectively externalizes the intelligence of the predictive maintenance framework, making it available to stakeholders in a flexible, platform-independent format.</p>
</sec>
</sec>
</sec>
<sec sec-type="results|discussion" id="s4">
<label>4</label>
<title>Results and discussions</title>
<sec id="s4-1">
<label>4.1</label>
<title>Data preprocessing outcomes</title>
<sec id="s4-1-1">
<label>4.1.1</label>
<title>Multicollinearity test result</title>
<p>To visualize the correlation structure among input features, a Pearson correlation heatmap was generated. The heatmap <xref ref-type="fig" rid="F10">Figure 10</xref> illustrates the degree of linear relationship between feature pairs, with values closer to &#xb1;1 indicating stronger correlations.</p>
<fig id="F10" position="float">
<label>FIGURE 10</label>
<caption>
<p>Heat map illustrating multi-collinearity.</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g010.tif">
<alt-text content-type="machine-generated">Heatmap titled &#x201c;Multi-colinearity Heatmap (Pearson Correlation)&#x201d; showing correlation coefficients between various air temperature, valve, and signal measures. The scale ranges from negative one to one, with darker shades indicating stronger correlations. Key correlations include high values between similar metrics, such as 1.0 for identical comparisons, and varied strengths for others, color-coded from light to dark blue.</alt-text>
</graphic>
</fig>
<p>Although the dataset was cleaned before correlation analysis, several features appear without correlation values. This does not indicate missing data but rather the absence of meaningful correlations for features that remained constant throughout the dataset. In this case, four features, as shown in <xref ref-type="fig" rid="F10">Figure 10</xref>, include Outdoor Air Filter (Actual), Exhaust Air Filter (Actual), Supply Air Temp (Min), and Supply Air Temp (Max), did not vary over time. This likely occurred because the values of these four features did not change over time. For example, the two air filters are always operating, and the minimum and maximum supply air temperatures were fixed at 15&#xa0;&#xb0;C and 32&#xa0;&#xb0;C, respectively. In addition to the heatmap, <xref ref-type="table" rid="T8">Table 8</xref> summarizes the feature pairs with correlation values exceeding 0.9, which were considered highly correlated for redundancy screening.</p>
<table-wrap id="T8" position="float">
<label>TABLE 8</label>
<caption>
<p>Highly correlated feature pairs.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">No.</th>
<th align="left">Feature 1</th>
<th align="left">Feature 2</th>
<th align="left">Correlation</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">0</td>
<td align="left">Return air temp (actual)</td>
<td align="left">Mixed air temp (actual)</td>
<td align="left">0.916884</td>
</tr>
<tr>
<td align="center">1</td>
<td align="left">Exhaust valve signal</td>
<td align="left">AHU status (actual)</td>
<td align="left">1.000000</td>
</tr>
<tr>
<td align="center">2</td>
<td align="left">Exhaust valve signal</td>
<td align="left">Outdoor air valve signal</td>
<td align="left">1.000000</td>
</tr>
<tr>
<td align="center">3</td>
<td align="left">Exhaust valve signal</td>
<td align="left">Fresh air fan (actual)</td>
<td align="left">0.993825</td>
</tr>
<tr>
<td align="center">4</td>
<td align="left">Exhaust valve signal</td>
<td align="left">Exhaust air fan signal</td>
<td align="left">0.996266</td>
</tr>
<tr>
<td align="center">5</td>
<td align="left">Heater pump signal</td>
<td align="left">Heat recovery Bypass signal</td>
<td align="left">0.906359</td>
</tr>
<tr>
<td align="center">6</td>
<td align="left">AHU status (actual)</td>
<td align="left">Outdoor air valve signal</td>
<td align="left">1.000000</td>
</tr>
<tr>
<td align="center">7</td>
<td align="left">AHU status (actual)</td>
<td align="left">Fresh air fan (actual)</td>
<td align="left">0.993825</td>
</tr>
<tr>
<td align="center">8</td>
<td align="left">AHU status (actual)</td>
<td align="left">Exhaust air fan signal</td>
<td align="left">0.996266</td>
</tr>
<tr>
<td align="center">9</td>
<td align="left">Outdoor air valve signal</td>
<td align="left">Fresh air fan (actual)</td>
<td align="left">0.993825</td>
</tr>
<tr>
<td align="center">10</td>
<td align="left">Outdoor air valve signal</td>
<td align="left">Exhaust air fan signal</td>
<td align="left">0.996266</td>
</tr>
<tr>
<td align="center">11</td>
<td align="left">Chiller signal</td>
<td align="left">Chiller valve signal</td>
<td align="left">0.994473</td>
</tr>
<tr>
<td align="center">12</td>
<td align="left">Fresh air fan (actual)</td>
<td align="left">Exhaust air fan signal</td>
<td align="left">0.991044</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>While several strongly correlated feature pairs were observed, such as Exhaust Air Fan Signal and Exhaust Valve Signal the decision was made to retain all features for the initial iteration. This was based on the relatively small dataset size and the potential importance of each variable to system behavior. Redundant features may still be considered for removal in future iterations or feature importance analysis.</p>
</sec>
<sec id="s4-1-2">
<label>4.1.2</label>
<title>Feature importance analysis</title>
<p>Temperature-related features emerged as the most critical factors influencing fault prediction. As shown in <xref ref-type="fig" rid="F11">Figure 11</xref>, the &#x201c;Heater Valve Signal&#x201d; had the highest importance in the Random Forest model, with a score of (0.358), followed by &#x201c;Outside Temperature&#x201d; (0.156), &#x201c;Supply Air Temp (Actual)&#x201d; (0.106), and &#x201c;Mixed Air Temp (Actual)&#x201d; (0.103). These findings suggest that external and internal temperature variations significantly affect AHU stability, potentially influencing heating, cooling, and ventilation performance. &#x201c;Return Air Temp (Actual)&#x201d; (0.091) and &#x201c;Exhaust Air Temp (Actual)&#x201d; (0.068) also contributed meaningfully, and reinforcing the role of air temperature regulation in maintaining system reliability. High ANOVA scores for these features, which are presented in <xref ref-type="table" rid="T9">Table 9</xref>, further validate their statistical relevance, confirming that temperature fluctuations are closely associated with fault occurrences. The &#x201c;State&#x201d; variable, representing the operational mode of the AHU, displayed an important score of (0.027), indicating that system state transitions play a significant role in predicting malfunctions.</p>
<fig id="F11" position="float">
<label>FIGURE 11</label>
<caption>
<p>Feature importance ranking for AHU fault prediction using the Random Forest model.</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g011.tif">
<alt-text content-type="machine-generated">Bar chart titled &#x201c;Feature Importance for Fault Prediction&#x201d; showing the impact of various features on a model, using a random forest algorithm. Top factors are &#x201c;Heater Valve Signal&#x201d; and &#x201c;Outside Temperature.&#x201d; Feature importance values range from 0.00 to 0.35.</alt-text>
</graphic>
</fig>
<table-wrap id="T9" position="float">
<label>TABLE 9</label>
<caption>
<p>Feature importance analysis for AHU fault prediction using ANOVA F-test and random forest.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Feature</th>
<th align="center">ANOVA score</th>
<th align="center">Random forest importance</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Heater valve signal</td>
<td align="center">2,233.842</td>
<td align="center">0.358</td>
</tr>
<tr>
<td align="center">Outside temperature</td>
<td align="center">80.433</td>
<td align="center">0.156</td>
</tr>
<tr>
<td align="center">Supply air temp (actual)</td>
<td align="center">1,140.263</td>
<td align="center">0.106</td>
</tr>
<tr>
<td align="center">Mixed air temp (actual)</td>
<td align="center">333.297</td>
<td align="center">0.103</td>
</tr>
<tr>
<td align="center">Return air temp (actual)</td>
<td align="center">808.930</td>
<td align="center">0.091</td>
</tr>
<tr>
<td align="center">Exhaust air temp (actual)</td>
<td align="center">1,137.871</td>
<td align="center">0.068</td>
</tr>
<tr>
<td align="center">State</td>
<td align="center">642.799</td>
<td align="center">0.027</td>
</tr>
<tr>
<td align="center">Chiller valve signal</td>
<td align="center">1,403.744</td>
<td align="center">0.022</td>
</tr>
<tr>
<td align="center">Heat recovery Bypass signal</td>
<td align="center">310.091</td>
<td align="center">0.017</td>
</tr>
<tr>
<td align="center">Chiller signal</td>
<td align="center">1,400.981</td>
<td align="center">0.014</td>
</tr>
<tr>
<td align="center">Fresh air fan (actual)</td>
<td align="center">364.451</td>
<td align="center">0.011</td>
</tr>
<tr>
<td align="center">Exhaust air fan signal</td>
<td align="center">359.894</td>
<td align="center">0.008</td>
</tr>
<tr>
<td align="center">Heater pump signal</td>
<td align="center">321.573</td>
<td align="center">0.007</td>
</tr>
<tr>
<td align="center">Chiller pump signal</td>
<td align="center">362.161</td>
<td align="center">0.006</td>
</tr>
<tr>
<td align="center">Exhaust valve signal</td>
<td align="center">359.992</td>
<td align="center">0.004</td>
</tr>
<tr>
<td align="center">AHU status</td>
<td align="center">359.992</td>
<td align="center">0.003</td>
</tr>
<tr>
<td align="center">Outdoor air valve signal</td>
<td align="center">359.992</td>
<td align="center">0.002</td>
</tr>
<tr>
<td align="center">Supply air temp (max)</td>
<td colspan="2" align="center">0</td>
</tr>
<tr>
<td align="center">Supply air temp (min)</td>
<td colspan="2" align="center">0</td>
</tr>
<tr>
<td align="center">Outdoor air filter (actual)</td>
<td colspan="2" align="center">0</td>
</tr>
<tr>
<td align="center">Exhaust air filter (actual)</td>
<td colspan="2" align="center">0</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Several operational control features also demonstrated a strong influence on fault detection. &#x201c;Chiller Valve Signal&#x201d; (0.022), &#x201c;Heat Recovery Bypass Signal&#x201d; (0.017), and &#x201c;chiller Signal&#x201d; (0.014) emerged as key indicators of AHU performance, emphasizing the importance of monitoring mechanical components and airflow regulation. The ANOVA analysis further highlighted the significance of control-related parameters, with highest value achieved by &#x201c;Heater Valve Signal&#x201d; (2,233.842), followed by &#x201c;Chiller Valve Signal&#x201d; (1,403.744), and &#x201c;Chiller Signal&#x201d; (1,400.980), demonstrating particularly high variance in relation to fault occurrences. These findings indicate that heating and cooling system components contribute substantially to fault prediction and require closer monitoring for effective fault diagnosis.</p>
<p>Specific parameters exhibited negligible or no importance in both methods. Features such as &#x201c;Supply Air Temp (Max),&#x201d; &#x201c;Supply Air Temp (Min),&#x201d; &#x201c;Outdoor Air Filter (Actual),&#x201d; and &#x201c;Exhaust Air Filter (Actual)&#x201d; had no measurable impact on the fault detection model, suggesting that they do not significantly contribute to predicting system failures. Retaining irrelevant features could introduce noise into the model, reducing its accuracy and increasing computational complexity. To prevent unnecessary redundancy and maintain predictive efficiency, these non-influential features were eliminated from further analysis.</p>
<p>Following this refinement, the final set of features retained for predictive modeling included &#x201c;Heater valve signal,&#x201d; &#x201c;Outdoor Temp,&#x201d; &#x201c;Supply Air Temp (Actual),&#x201d; &#x201c;Mixed Air Temp (Actual),&#x201d; &#x201c;Return Air Temp (Actual),&#x201d; &#x201c;Exhaust Air Temp (Actual),&#x201d; &#x201c;State,&#x201d; &#x201c;Chiller Valve Signal,&#x201d; &#x201c;Heat Recovery Bypass Signal,&#x201d; &#x201c;Chiller Signal,&#x201d; and &#x201c;Fresh Air Fan (Actual).&#x201d; These features collectively capture the most relevant aspects of AHU operation, ensuring that the fault prediction model is built on data-driven, high-impact variables. The identification and selection of these parameters enhance model accuracy, optimize predictive performance, and contribute to a more effective fault detection strategy.</p>
</sec>
</sec>
<sec id="s4-2">
<label>4.2</label>
<title>Operating state (mode) classification</title>
<p>Each time step in the dataset was classified into one of the custom operating modes based on the control signal logic outlined in <xref ref-type="table" rid="T5">Table 5</xref>. This classification used combinations of valve and fan statuses to assign samples to Heating, Free Cooling, Mechanical Cooling (Mode 4 only), Unclassified Occupied, All Occupied, or OffState modes. Outdoor air fraction (OAF) analysis revealed an average OAF of approximately 0.3, indicating limited use of outdoor air and validating the exclusion of Mode 3 (100% outdoor air operation) from this study. As a result, all instances of mechanical cooling were attributed exclusively to Mode 4.</p>
<p>
<xref ref-type="fig" rid="F12">Figure 12</xref> illustrates the distribution of operational states for the AHU according to the available dataset. Approximately 75.7% of the data points are categorized under the (State Off) condition. This trend indicates that the AHU remains inactive for a significant portion of the monitored period. Following that (State Heating) represents 14.7%, suggesting a considerable demand for heating or operation that needs urgent attention (State Zero Energy), which generally reflects minimal or balanced energy conditions, contributes 5.3%, while (State Cooling) accounts for 3.8%, demonstrating less frequent cooling operations. Notably, the Unclassified State, although representing only 0.4%, is highlighted to detect potential anomalies or data segments that did not satisfy the established operational criteria. This distribution provides valuable information on the operational performance of the AHU and serves as a critical reference for further analyses, including fault detection and energy performance assessment.</p>
<fig id="F12" position="float">
<label>FIGURE 12</label>
<caption>
<p>Distribution of AHU states.</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g012.tif">
<alt-text content-type="machine-generated">Pie chart titled &#x201c;Distribution of AHU States&#x201d; with segments: StateOff at 75.7% (grey), StateHeating at 14.7% (orange), StateFreeCooling at 5.3% (blue), StateCooling at 3.8% (green), Unclassified at 0.4% (purple).</alt-text>
</graphic>
</fig>
</sec>
<sec id="s4-3">
<label>4.3</label>
<title>Fault statistics and rule violation</title>
<p>Based on the available data and operating mode classification, 18 APAR rules were effectively implemented across the supported modes. The rule engine was executed per timestep, with logical conditions assessed based on the active mode and sensor/control signal inputs. The implemented rules covered various fault categories, including thermal performance, setpoint tracking, and control logic inconsistencies.</p>
<p>As described in <xref ref-type="sec" rid="s3-2-2">Section 3.2.2</xref> and summarized in <xref ref-type="table" rid="T6">Table 6</xref>, ten APAR rules were excluded due to missing input variables or inactive operating conditions. Specifically, rules associated with Mode 3 were omitted from implementation based on the outdoor air fraction analysis, which confirmed the absence of full economizer operation. Additional exclusions were due to unavailable damper signals or sensors required for Mode 3/4 changeover logic. Despite these omissions, the retained 18 rules provided broad diagnostic coverage across the operational modes analyzed.</p>
<p>A critical limitation of the current field implementation lies in the restricted diagnostic coverage resulting from the application of only 18 of the 28 standard APAR rules. This reduction was primarily due to constraints within the building management system, notably the absence of key input variables such as damper position feedback and changeover temperature signals, which are essential for executing the complete rule set. The omission of these parameters introduces diagnostic blind spots that hinder the detection of specific, well-documented HVAC faults, including malfunctioning economizer dampers, improper mixed-air control, and mode sequencing anomalies. Previous studies have demonstrated that incomplete or low-resolution sensor networks can significantly impair fault-detection accuracy and limit the generalizability of machine-learning-assisted FDD systems (<xref ref-type="bibr" rid="B30">Maksoud et al., 2022</xref>; <xref ref-type="bibr" rid="B42">Schreiber et al., 2021</xref>). This constraint underscores the necessity of conducting a structured sensitivity analysis to quantify the influence of rule omission on overall diagnostic performance. Future work will therefore investigate the integration of virtual sensing and soft-sensing methodologies to estimate the missing variables and restore full rule coverage, thereby improving fault visibility and diagnostic robustness (<xref ref-type="bibr" rid="B27">Littleog and Rubin, 2019</xref>).</p>
</sec>
<sec id="s4-4">
<label>4.4</label>
<title>Fault detection overview using APAR</title>
<p>The dataset reveals that the category with the highest fault density is &#x201c;StateCooling,&#x201d; which has a fault rate of 28.55%. In contrast, &#x201c;StateHeating,&#x201d; associated with heating functions, shows a rate of 8.37%. Meanwhile, &#x201c;StateZeroEnergy,&#x201d; indicating low energy usage, corresponds to a rate of 4.79%. Interestingly, &#x201c;StateOff,&#x201d; which signifies system inactivity, recorded no faults in its entries. Additionally, Unclassified entries, which could not be assigned to a specific state, accounted for 1.35%. This distribution underscores a higher likelihood of faults during active heating and cooling periods, as illustrated in <xref ref-type="fig" rid="F13">Figure 13</xref>. A comprehensive analysis of individual APAR rule infractions provides significant insights into the specific types and frequencies of operational faults within the system, presented in <xref ref-type="table" rid="T10">Table 10</xref>. Throughout the observation period, 50,296 data points were documented as &#x201c;No Fault,&#x201d; representing the majority of the recorded observations, indicating normal system operation. However, several recurring issues were observed. The main occurrence was &#x201c;fully open valve with drifting temperature during heating&#x201d; (Rule 4), which occurred 521 times. &#x201c;High cooling supply temperature&#x201d; (Rule 16), insufficient cooling with open valves (Rules 19 and 20), and inconsistent heating outputs (Rule 3) were additional serious issues. Periodic faults related to free cooling, such as supply and mixed temperature mismatches (Rule 7), room not cooling properly (Rule 6), and warm outdoor air affecting cooling (Rule 5), were also noted. Additionally, less frequent but notable issues included mode switching (Rule 28) and simultaneous heating and cooling (Rule 22). Overall, these findings emphasize the framework&#x2019;s ability to enhance diagnostics and optimize AHU operations.</p>
<fig id="F13" position="float">
<label>FIGURE 13</label>
<caption>
<p>Fault distribution by AHU state.</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g013.tif">
<alt-text content-type="machine-generated">Pie chart illustrating fault percentage by AHU state. StateCooling occupies 65 percent in blue, StateHeating 20.19 percent in green, StateFreeCooling 11.56 percent in orange, and Unclassified 3.25 percent in red.</alt-text>
</graphic>
</fig>
<table-wrap id="T10" position="float">
<label>TABLE 10</label>
<caption>
<p>Fault counts per rule.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Fault computed</th>
<th align="center">Fault counts</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">No fault</td>
<td align="center">50,296</td>
</tr>
<tr>
<td align="center">Rule 4: Heating: Fully open valve, drifting temp</td>
<td align="center">521</td>
</tr>
<tr>
<td align="center">Rule 19: Mech cooling: Valve fully open, insufficient cooling</td>
<td align="center">265</td>
</tr>
<tr>
<td align="center">Rule 20: Mech cooling: Valve fully open, drifting temp</td>
<td align="center">147</td>
</tr>
<tr>
<td align="center">Rule 16: Mech cooling: Supply temp too high</td>
<td align="center">121</td>
</tr>
<tr>
<td align="center">Rule 3: Heating: Fully open valve, insufficient heating</td>
<td align="center">85</td>
</tr>
<tr>
<td align="center">Rule 7: Free cooling: Supply and mixed temp inconsistency</td>
<td align="center">79</td>
</tr>
<tr>
<td align="center">Rule 5: Free cooling: outdoor air too warm for cooling</td>
<td align="center">27</td>
</tr>
<tr>
<td align="center">Rule 6: Free cooling: Room not cooling properly</td>
<td align="center">25</td>
</tr>
<tr>
<td align="center">Rule 1: Heating: low supply temp in heating</td>
<td align="center">20</td>
</tr>
<tr>
<td align="center">Rule 28: Frequent mode switching</td>
<td align="center">12</td>
</tr>
<tr>
<td align="center">Rule 22: Heating and cooling conflict</td>
<td align="center">1</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s4-5">
<label>4.5</label>
<title>Threshold tuning factor estimation</title>
<p>To quantify the passive temperature, rise across the AHU under non-operational conditions, temperature data were analyzed during periods classified as &#x201c;StateOff,&#x201d; when no active heating, cooling, or ventilation was expected. Following fault detection best practices, these intervals offered a reliable baseline for identifying inherent thermal drift or ambient thermal gain (<xref ref-type="bibr" rid="B22">Katipamula and Brambley, 2005b</xref>).</p>
<p>Statistical analysis revealed an average temperature rise of approximately 3.3&#xa0;&#xb0;C between the supply air (Tsa) and mixed air (Tma), and about 1.7&#xa0;&#xb0;C between the return air (Tra) and Tma, weighted by the outdoor air fraction. The average of these two values, 2.5&#xa0;&#xb0;C, was adopted as a representative threshold tuning factor. This empirically derived threshold was applied to selected APAR rules, resulting in a substantial reduction in false fault detections by filtering out minor temperature difference between supply air and the setpoint, deviations caused by ambient conditions or sensor offset rather than actual system faults.</p>
</sec>
<sec id="s4-6">
<label>4.6</label>
<title>Rule-specific tuning outcomes</title>
<p>Specific APAR rules were found to be particularly sensitive to small temperature deviations, resulting in an excessive number of false positives. In particular, Rules 7 and 16 initially used an error threshold <inline-formula id="inf22">
<mml:math id="m27">
<mml:mrow>
<mml:msub>
<mml:mi>&#x25b;</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1.7</mml:mn>
<mml:mo>&#xb0;</mml:mo>
<mml:mi mathvariant="normal">C</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> which frequently triggered non-critical alarms during idle or transitional states.</p>
<p>To improve robustness, the 2.5&#xa0;&#xb0;C threshold adjustment factor derived from passive duct temperature analysis was added to the original threshold. This resulted in an effective <inline-formula id="inf23">
<mml:math id="m28">
<mml:mrow>
<mml:msub>
<mml:mi>&#x25b;</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>4.2</mml:mn>
<mml:mo>&#xb0;</mml:mo>
<mml:mi mathvariant="normal">C</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> for both rules. Rule 7, which evaluates <inline-formula id="inf24">
<mml:math id="m29">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mtext>Tsa</mml:mtext>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x394;</mml:mo>
<mml:mtext>Tsf</mml:mtext>
<mml:mo>&#x2212;</mml:mo>
<mml:mtext>Tma</mml:mtext>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3e;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>&#x3b5;</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and Rule 16, which flags <inline-formula id="inf25">
<mml:math id="m30">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3e;</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">T</mml:mi>
<mml:mtext>ma</mml:mtext>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mo>&#x394;</mml:mo>
<mml:mtext>Tsf</mml:mtext>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b5;</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, were thereby tuned to suppress minor thermal variations while maintaining sensitivity to significant anomalies. This adjustment significantly reduced nuisance alarms, in line with recent research that emphasizes the value of data-driven threshold setting in HVAC fault detection (<xref ref-type="bibr" rid="B57">Zhu et al., 2022</xref>).</p>
</sec>
<sec id="s4-7">
<label>4.7</label>
<title>Visualization of key APAR rule violations</title>
<p>This section presents selected visualizations of rule violations detected during the dataset period to offer deeper insight into the practical triggering of specific APAR rules. Each case illustrates the temperature trends, control signal behavior, and operational state at the time of the fault. These visualizations validate the logical foundation of each rule and demonstrate the correlation between system behavior and rule activation.</p>
<sec id="s4-7-1">
<label>4.7.1</label>
<title>Rule 3 violation: heating coil fully open with insufficient heating</title>
<p>
<xref ref-type="fig" rid="F14">Figure 14</xref> illustrates the violation of Rule 3, which states that a fault is detected when the heating valve is fully open <inline-formula id="inf26">
<mml:math id="m31">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mfenced open="|" close="" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>U</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>_</mml:mo>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mfenced open="|" close="" separators="|">
<mml:mrow>
<mml:mo>&#x2264;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">&#x25b;</mml:mi>
<mml:mtext>hc</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> but the supply air temperature remains significantly below the setpoint <inline-formula id="inf27">
<mml:math id="m32">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>a</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2265;</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">&#x25b;</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>. On 8 March 2021, between 09:35 and 11:35, the AHU operated in StateHeating, during which the heating valve command remained at 100%, indicating full activation of the heating coil. However, as shown in the middle plot, the supply air temperature (Tsa) stagnated at approximately 14.35&#xa0;&#xb0;C, failing to approach the setpoint of 23.5&#xa0;&#xb0;C. This prolonged deviation, despite maximum valve effort, suggests a heating performance deficiency. Possible root causes may include insufficient hot water supply, coil fouling, or a sensor calibration issue. The bottom plot confirms that no cooling coil activity occurred during this time (ucc &#x3d; 0), affirming that heating was the primary conditioning mode.</p>
<fig id="F14" position="float">
<label>FIGURE 14</label>
<caption>
<p>Rule 3 violation: Heating coil fully open but supply air temperature failed to reach setpoint (8 March 2021).</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g014.tif">
<alt-text content-type="machine-generated">Three-panel line graph showing faults detected over time on March 8, 2021. The top panel displays states and faults, with a heating fault occurring between 10:00 and 11:30. The middle panel shows temperature in degrees Celsius, highlighting a drop in temperature at 10:00. The bottom panel indicates signal percentage, showing changes around 10:00. Labels mark specific states and values.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s4-7-2">
<label>4.7.2</label>
<title>Rule 7 violation: inconsistent supply and mixed air temperatures during free cooling</title>
<p>
<xref ref-type="fig" rid="F15">Figure 15</xref> presents a time-series visualization of the AHU operating state, temperature measurements, and control signals during a Rule 7 fault detection event. This rule applies during free cooling (Mode 2) and expects the supply air temperature (Tsa) and mixed air temperature (Tma) to be nearly equal. On 2 May 2021, a Rule 7 violation was recorded from 14:55 to 18:15. As seen in the second plot, the temperature difference between Tsa and Tma consistently exceeded the adjusted threshold of 4.2&#xa0;&#xb0;C, triggering the Rule 7 fault. The discrepancy could be caused by either inaccurate sensor reading or performance degradation in the air mixing process. The control signal plot further illustrates that both the heating and cooling coil valve signals remained at 0%, indicating that mechanical conditioning was disabled and cooling was expected to occur via outdoor air ventilation, which confirms that neither heating nor mechanical cooling influenced this behavior, affirming the presence of a free cooling anomaly.</p>
<fig id="F15" position="float">
<label>FIGURE 15</label>
<caption>
<p>Rule 7 violation Significant Tsa-Tma deviation during free cooling with no active conditioning (2 May 2021).</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g015.tif">
<alt-text content-type="machine-generated">Three graphs show data over time. The first graph displays states and faults, highlighting a &#x201c;Free Cooling&#x201d; fault between 15:00 and 18:00. The second graph shows temperatures, with Tsa at 23.84&#x00B0;C and Tma at 19.54&#x00B0;C. The third graph depicts signal percentages for uhc and ucc, with uhc peaking sharply around 18:00. The x-axis represents time on May 2, 2021.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s4-7-3">
<label>4.7.3</label>
<title>Rule 16 violation: stateCooling: supply temperature too high</title>
<p>Rule 16 identifies anomalies in mechanical cooling mode when the supply air temperature (T<sub>sa</sub>) exceeds the expected cooling performance threshold, calculated as <inline-formula id="inf28">
<mml:math id="m33">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mo>&#x394;</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">T</mml:mi>
<mml:mtext>sf</mml:mtext>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">&#x3b5;</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. On 7 June 2021, between 14:20 and 15:40, this rule was activated while the AHU was operating in mechanical cooling. As presented in <xref ref-type="fig" rid="F16">Figure 16</xref>, throughout this period, the cooling coil (u&#x1d4b8;&#x1d4b8;) was fully open (100%), yet the supply air temperature persisted at a level of higher than the mixed air temperature. At 14:50, for example, T<sub>ma</sub> &#x3d; 26.41&#xa0;&#xb0;C and T<sub>sa</sub> &#x3d; 32.14&#xa0;&#xb0;C, violating the rule&#x2019;s threshold. This persistent deviation suggests ineffective cooling performance despite maximum valve actuation, potentially due to chiller inefficiency, or other mechanical faults within the cooling subsystem.</p>
<fig id="F16" position="float">
<label>FIGURE 16</label>
<caption>
<p>Rule 16 violation: Inadequate cooling response despite fully open cooling valve (7 June 2021).</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g016.tif">
<alt-text content-type="machine-generated">Three charts display data over time from June 7, 2021. Top chart shows faults and states with a red line indicating a fault due to high supply temperature at 14:45. Middle chart shows temperatures, with Tsa at 32.14&#x00B0;C and Tma at 26.41&#x00B0;C. Bottom chart indicates signal percentages, with ucc at 100% and uhc at 0%.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s4-7-4">
<label>4.7.4</label>
<title>Rule 28 violation: frequent mode switching</title>
<p>Rule 28 is designed to detect excessive transitions between operating modes that may indicate unstable control behavior or sensor anomalies. It is applied across all occupied modes. On 23 April 2021, between 09:00 and 10:00, the AHU underwent multiple rapid transitions among StateCooling, StateHeating, and Unclassified. The number of mode changes during this 1-h period exceeded the predefined threshold (MT<sub>max</sub> &#x3d; 4), thereby triggering Rule 28. This behavior is illustrated in <xref ref-type="fig" rid="F17">Figure 17</xref>, where the red line denotes the fault signal and the blue line reflects the fluctuating operating states. Such frequent switching can result in inefficient system operation, occupant discomfort, and accelerated wear of mechanical components.</p>
<fig id="F17" position="float">
<label>FIGURE 17</label>
<caption>
<p>Rule 28 violation: Excessive operating mode transitions within 1&#xa0;hour (23 April 2021).</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g017.tif">
<alt-text content-type="machine-generated">Line graph titled &#x201c;Faults Detected&#x201d; shows states and faults over time on April 23, 2021. The blue line indicates states like Cooling, Heating, and Unclassified. The red line shows a fault labeled &#x201c;Frequent mode switching.&#x201d; The x-axis represents the time from 08:45 to 10:45.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="s4-8">
<label>4.8</label>
<title>Evaluation of model performance</title>
<p>The performance of the two supervised learning classifiers Random Forest (RF) and Artificial Neural Network (ANN), was assessed utilizing standard classification metrics, including classification accuracy, confusion matrices, precision, recall, F1-scores, and Receiver Operating Characteristic (ROC) curve analysis. The evaluation addressed both overall classification quality and the models&#x2019; ability to detect rare yet critical AHU faults under imbalanced data conditions. As summarized in <xref ref-type="table" rid="T11">Table 11</xref>, both models exhibited very high overall accuracy (ANN: 99.41%; RF: 99.71%) and weighted F1&#x2010;scores (ANN: 99.38%; RF: 99.69%), reflecting their ability to identify the dominant &#x201c;No Fault&#x201d; condition correctly. Nevertheless, their performance diverged on minority fault categories: ANN achieved only 0.14 recall on Class 0 (&#x201c;Free Cooling: outdoor air too warm&#x201d;) and completely failed to detect Class 3 (&#x201c;Frequent Mode Switching&#x201d;), whereas RF attained perfect precision and recall on Class 0 but likewise missed Class 3. These differences are further reflected in the macro&#x2010;averaged F1&#x2010;scores of 0.74 (ANN) and 0.85 (RF), indicating superior baseline generalization by the RF model. Receiver Operating Characteristic (ROC) analysis confirmed near&#x2010;ideal separability for most classes (AUC &#x2248; 1.00), except for Class 3 (ANN: 0.88; RF: 0.75), underscoring the need for targeted remediation.</p>
<table-wrap id="T11" position="float">
<label>TABLE 11</label>
<caption>
<p>Performance metrics before and after optimization.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Model</th>
<th align="center">Accuracy (%)</th>
<th align="center">Macro avg (F1)</th>
<th align="center">Weighted avg (F1)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">ANN (initial)</td>
<td align="center">99.41</td>
<td align="center">0.736</td>
<td align="center">0.994</td>
</tr>
<tr>
<td align="center">RF (initial)</td>
<td align="center">99.71</td>
<td align="center">0.851</td>
<td align="center">0.997</td>
</tr>
<tr>
<td align="center">ANN (optimized)</td>
<td align="center">98.87</td>
<td align="center">0.763</td>
<td align="center">0.990</td>
</tr>
<tr>
<td align="center">RF (optimized)</td>
<td align="center">99.63</td>
<td align="center">0.848</td>
<td align="center">0.996</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>To address this imbalance, SMOTE and class&#x2010;weighted training were applied. Post&#x2010;optimization shown in <xref ref-type="table" rid="T12">Table 12</xref>, ANN&#x2019;s recall for Class 0 and Class 3 improved dramatically from 0.14 to 1.00 and from 0.00 to 1.00, respectively; however, its overall accuracy decreased modestly to 98.8%. Corresponding weighted precision, recall, and F1&#x2010;scores were 99.3%, 98.8%, and 99.0%. RF maintained robust accuracy (99.6%) and high weighted metrics (precision: 99.7%; recall: 99.6%; F1&#x2010;score: 99.6%) but continued to overlook Class 3. Macro&#x2010;averaged F1&#x2010;scores rose to 0.76 (ANN) and remained at 0.85 (RF), confirming enhanced detection of minority classes at minimal cost to overall performance. ROC curves after optimization presented in <xref ref-type="fig" rid="F18">Figure 18</xref> demonstrate both models achieving AUC &#x3d; 1.00 for Class 3, a substantial improvement, and only a negligible drop in RF&#x2019;s AUC for Class 9 (to 0.99).</p>
<table-wrap id="T12" position="float">
<label>TABLE 12</label>
<caption>
<p>Comparison of recall for rare faults before and after optimization.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Fault class</th>
<th align="center">Fault type</th>
<th align="center">ANN recall (before)</th>
<th align="center">ANN recall (after)</th>
<th align="center">RF recall (before)</th>
<th align="center">RF recall (after)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Class 0</td>
<td align="center">Free cooling: outdoor air too warm for cooling</td>
<td align="center">0.143</td>
<td align="center">1.000</td>
<td align="center">1.000</td>
<td align="center">1.000</td>
</tr>
<tr>
<td align="center">Class 3</td>
<td align="center">Frequent mode switch</td>
<td align="center">0.000</td>
<td align="center">1.000</td>
<td align="center">0.000</td>
<td align="center">0.000</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F18" position="float">
<label>FIGURE 18</label>
<caption>
<p>ROC curves for ANN and RF models, showing the class-wise discrimination ability, with improved AUC scores after data balancing and model refinement.</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g018.tif">
<alt-text content-type="machine-generated">ROC curve comparing ANN and Random Forest for different classes. The x-axis represents the false positive rate, and the y-axis represents the true positive rate. Most classes for both models achieve an AUC of 1.00, indicating high performance. One class for ANN has an AUC of 0.99, and one for Random Forest an AUC of 0.99. A diagonal line indicates random chance.</alt-text>
</graphic>
</fig>
<p>A more detailed comparison of sensitivity to rare faults is presented in <xref ref-type="table" rid="T13">Table 13</xref>. ANN&#x2019;s precision on Class 0 increased from 0.50 to 0.64, and although its false&#x2010;positive rate rose, the model&#x2019;s ability to flag rare faults was markedly improved. The RF model preserved very high precision (1.00) across rare&#x2010;fault classes, reinforcing its reliability where false alarms can be costly. Confusion&#x2010;matrix visualizations shown in <xref ref-type="fig" rid="F19">Figure 19</xref> corroborate these findings by illustrating the reduction in misclassifications for ANN on previously undetected classes and the RF model&#x2019;s consistently low false&#x2010;alarm rates.</p>
<table-wrap id="T13" position="float">
<label>TABLE 13</label>
<caption>
<p>Comparison of precision for rare faults before and after optimization.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Fault class</th>
<th align="center">Fault type</th>
<th align="center">ANN precision (before)</th>
<th align="center">ANN precision (after)</th>
<th align="center">RF precision (before)</th>
<th align="center">RF precision (after)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Class 0</td>
<td align="center">Free cooling: outdoor air too warm for cooling</td>
<td align="center">0.500</td>
<td align="center">0.643</td>
<td align="center">1.000</td>
<td align="center">1.000</td>
</tr>
<tr>
<td align="center">Class 3</td>
<td align="center">Frequent mode switch</td>
<td align="center">0.000</td>
<td align="center">0.222</td>
<td align="center">0.000</td>
<td align="center">0.000</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F19" position="float">
<label>FIGURE 19</label>
<caption>
<p>Confusion matrices for ANN and Random Forest models, illustrating the classification performance on the test dataset after optimization.</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g019.tif">
<alt-text content-type="machine-generated">Six confusion matrices compare the performance of Artificial Neural Network (ANN) and Random Forest (RF) models across training, validation, and test datasets. Matrices display actual vs. predicted labels, with higher diagonal values indicating better performance. Top row: ANN train and validation. Middle row: RF train and validation. Bottom row: ANN and RF test. Side color bars represent the scale of predictions.</alt-text>
</graphic>
</fig>
<p>Finally, an ANN-based regression model was utilized to forecast AHU fault incidence over a 6-month period. The temporal profile illustrated in <xref ref-type="fig" rid="F20">Figure 20</xref> reveals a sequence of distinct fault occurrences, with an early concentration in late summer and autumn, and prominent spikes during the winter months. Notable peaks in fault activity occur in December and February, reflecting transitions in seasonal load demands and system response to extreme conditions. The model highlights recurring faults, including Valve&#x2010;related inefficiencies, frequent mode switching, and low supply temperatures. Additionally, cooling-related faults, such as outdoor air being too warm and insufficient cooling valve performance, appear earlier in the prediction horizon. Periods of minimal predicted fault activity offer opportunities for maintenance scheduling and operational recalibration. These results underscore the model&#x2019;s potential in facilitating proactive maintenance and improving AHU energy efficiency.</p>
<fig id="F20" position="float">
<label>FIGURE 20</label>
<caption>
<p>ANN-based regression predictions of AHU faults over the next 6&#xa0;months, showing expected fault occurrences and seasonal trends.</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g020.tif">
<alt-text content-type="machine-generated">Line chart titled &#x201c;Fault Trends Over the Next 6 Months&#x201d; displaying detected faults over time. The x-axis shows dates from September 2021 to March 2022. The y-axis lists faults, including insufficient heating and cooling. Peaks in faults occur in October 2021, December 2021, and February 2022. Blue line with dots represents detected faults.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s4-9">
<label>4.9</label>
<title>Demonstration of the digital twin platform functionality and outcome</title>
<sec id="s4-9-1">
<label>4.9.1</label>
<title>Real-time data and rule integration via pyRevit</title>
<p>Upon executing the pyRevit-based fault detection system, the real-time monitoring script processed AHU sensor data and effectively identified several fault events in accordance with the APAR rules. The system&#x2019;s real-time capabilities ensure prompt identification and resolution of faults.</p>
<p>An external live plot visualized temperature data with dynamic color-coded markers corresponding to each fault rule, as illustrated in <xref ref-type="fig" rid="F21">Figure 21</xref>. Green markers indicated violations of Rule 7, aligning with observed temperature deviations, while red and blue markers represented violations of Rules 1 and 16, respectively. Throughout the execution, the user interface remained responsive, continuously updating the visualization panel and triggering alerts without any noticeable delay.</p>
<fig id="F21" position="float">
<label>FIGURE 21</label>
<caption>
<p>Revit UI showing live sensor values updated through pyRevit.</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g021.tif">
<alt-text content-type="machine-generated">Screenshot of Autodesk Revit showing a 3D view of a mechanical system with pink and blue ductwork. A live temperature fault detection graph is overlaid, displaying temperature fluctuations over time with labeled data points. The project browser and properties panel are visible on the left, indicating the 3D view settings.</alt-text>
</graphic>
</fig>
<p>Although the system did not implement parameter updates within the Revit model, it successfully integrated a live visualization panel with Revit&#x2019;s 3D view, providing users with a comprehensive understanding of the system&#x2019;s behavior. These results underscore the system&#x2019;s practicality in swiftly detecting and communicating AHU operational anomalies using a lightweight rule-based approach.</p>
</sec>
<sec id="s4-9-2">
<label>4.9.2</label>
<title>Web platform for real-time visualization</title>
<p>The developed DT predictive maintenance platform was successfully deployed to simulate real-time monitoring and rule-based fault detection for an AHU using historical operational data. As illustrated in <xref ref-type="fig" rid="F22">Figure 22</xref>, the web dashboard rendered synchronized time-series plots of five key environmental and system parameters: Supply Air Temperature, Mixed Air Temperature, Return Air Temperature, Exhaust Air Temperature, and Outside Air Temperature. These data were streamed from the CSV dataset at a time every 2&#xa0;s, effectively replicating the behavior of a live sensor feed. The platform incorporated three APAR rules to detect operational anomalies: Rule 1 for detecting underperforming heating coils in StateHeating, Rule 7 for abnormal thermal deviation in ZeroEnergyState, and Rule 16 for excessive supply temperature during StateCooling.</p>
<fig id="F22" position="float">
<label>FIGURE 22</label>
<caption>
<p>Web dashboard showing time-series plots and live fault alert.</p>
</caption>
<graphic xlink:href="fbuil-11-1734945-g022.tif">
<alt-text content-type="machine-generated">Graphs display temperature trends for supply, mixed, return, exhaust, and outside air. The first section shows stable temperatures with no faults. The second section includes detected faults for cooling and heating, with the AHU 3D view highlighting a detected fault marked R16.</alt-text>
</graphic>
</fig>
<p>Each time an APAR condition was violated, the system generated a real-time alert that was logged in the &#x201c;Detected Faults&#x201d; panel, alongside a timestamp and contextual explanation. Over the duration of the simulation, the fault detection engine responded consistently and accurately, correctly identifying dozens of rule violations. This confirmed the effectiveness of the rule-based monitoring logic. Additionally, a 3D visualization of the AHU was rendered using Three.js, providing a rotating fan model as a visual indicator of system activity. The dashboard operated entirely in the browser, requiring no backend server beyond the lightweight Flask API. This result demonstrates the practicality of using a browser-based digital twin solution for real-time HVAC system diagnostics, particularly in settings where platform independence and rapid feedback are essential.</p>
</sec>
</sec>
<sec id="s4-10">
<label>4.10</label>
<title>Implications of the results</title>
<p>The findings of this study confirm that integrating deterministic APAR rules with supervised machine learning classifiers offers a powerful and scalable framework for Air Handling Unit (AHU) fault detection and predictive maintenance. The optimized models demonstrated exceptional classification accuracy 99.6% for Random Forest and 98.8% for Artificial Neural Networks (ANN) effectively capturing complex fault patterns across multiple operational states.</p>
<p>Compared to prior studies that rely solely on rule-based diagnostics or machine learning in isolation, this work introduces a hybrid framework that synthesizes three complementary layers: (1) rule-based fault diagnosis using APAR for expert-informed logic, (2) supervised classification via Random Forest and ANN for pattern learning, and (3) forecasting using regression-based modeling for 6-month fault trend prediction. This integration represents a novel contribution to the field of building performance and HVAC analytics. While previous literature has explored rule-based FDD systems (<xref ref-type="bibr" rid="B14">House and Vaezi-Nejad, 2001</xref>; <xref ref-type="bibr" rid="B54">Yu et al., 2014</xref>) or ML-driven approaches (<xref ref-type="bibr" rid="B25">Lee et al., 2018</xref>), few have combined both within a unified diagnostic pipeline. Moreover, the addition of a real-time digital twin operationalized through both pyRevit and a browser-based dashboard enables interactive fault visualization and enhances the interpretability and trustworthiness of automated diagnoses, which remains an underexplored Frontier in existing research.</p>
<p>From an operational standpoint, the data-driven threshold tuning (&#x3b5;<sub>t</sub> &#x3d; 2.5&#xa0;&#xb0;C), based on passive duct temperature analysis, substantially reduced false positives in temperature-based rules such as Rule 7 and Rule 16. This refinement helped suppress nuisance alarms caused by minor thermal drifts during system idle or transitional states, thereby lowering actuator cycling, reducing energy consumption, and extending component lifespans. These improvements directly enhance the long-term efficiency and reliability of HVAC operations.</p>
<p>The predictive capability of the framework demonstrated by 6-month ahead forecasts revealed seasonal spikes in fault activity, particularly under high-stress operational modes like StateCooling (28.6%) and StateHeating (8.4%). This insight enables a strategic shift from reactive to proactive maintenance. Facility managers can anticipate fault-prone periods and schedule interventions during low-risk intervals, minimizing unplanned downtime, reducing labor and part replacement costs, and improving overall operational continuity.</p>
<p>Additionally, the study employed feature importance analysis to identify and retain only the most informative variables, including air temperature, outside temperature, and valve control signals. This focused approach improved model robustness and interpretability while reducing computational burden. It also ensures that the proposed system remains compatible with data-constrained environments, making it suitable for deployment in both modern and legacy Building Management Systems (BMS).</p>
<p>Finally, the successful implementation of real-time visualization through the pyRevit-integrated digital twin and the lightweight web dashboard demonstrated the system&#x2019;s potential for live deployment. The dynamic visualization of rule violations with contextual temperature and control signal plots enabled rapid fault recognition and intuitive system understanding. These tools foster operator trust and responsiveness both essential qualities for scalable adoption in facility management.</p>
<p>In summary, this work advances the state of the art by demonstrating how the integration of APAR rules, machine learning, and digital twin technologies can deliver a robust and interpretable fault detection system. It empowers facility managers to move towards predictive maintenance strategies, supports more intelligent scheduling and resource use, and contributes to energy-efficient HVAC operation. These outcomes reflect the practical viability of the framework while addressing broader goals of occupant comfort, operational resilience, and sustainable building performance.</p>
</sec>
<sec id="s4-11">
<label>4.11</label>
<title>Challenges and limitations</title>
<p>Despite the successful deployment and validation of the proposed predictive maintenance system, some practical limitations were encountered during implementation. One of the primary constraints was the unavailability of complete sensor datasets, particularly damper position signals, minimum outdoor air thresholds, and changeover temperature inputs. These missing variables prevented the implementation of 10 out of the 28 standard APAR rules. However, this limitation was addressed through strategic rule adaptation and mode merging, allowing 18 APAR rules to be implemented while retaining strong diagnostic coverage.</p>
<p>Another important consideration concerns the fault labeling used for training the machine learning models. Rather than relying on automated APAR rule triggers, fault labels were manually generated based on rule logic and observed sensor behavior. While this approach offers greater control and precision in label assignments, the labels were not validated against facility-reported ground-truth events. Consequently, classification results remain constrained by expert interpretations of rule conditions, rather than operational confirmation.</p>
<p>The dataset used in this study exhibited significant class imbalance, with the &#x201c;No Fault&#x201d; category overwhelmingly dominating the sample distribution. This is a common challenge in fault detection, as real-world systems typically operate under normal conditions most of the time. Although the SMOTE and class weighting were employed to mitigate this imbalance, these methods introduced a trade-off between overall accuracy and sensitivity to minority fault classes, most evident in the reduced accuracy of the ANN model after optimization. Furthermore, the digital twin prototype, while effective in simulating system behavior and visualizing diagnostics, was built on batch-updated datasets rather than continuous real-time streaming from the BMS. This limited its functionality to near-real-time insights. However, the digital twin architecture remains fully compatible with live-streamed BMS integration, suggesting strong potential for real-time deployment. While these challenges reflect the practical constraints of working with real-world building data, the study successfully delivered a robust and functional solution within these bounds. The implemented framework effectively combined expert-driven rule interpretation, high-performing machine learning models, and real-time visualization capabilities. Together, these elements demonstrate a scalable and adaptable foundation for intelligent fault detection in HVAC systems. To further enhance the system&#x2019;s capabilities, reliability, and generalizability, future work should prioritize the expansion of sensor coverage to enable full implementation of all 28 APAR diagnostic rules. This includes incorporating damper position indicators, outdoor air changeover thresholds, and other critical control signals currently missing from the dataset. Additionally, integrating fault labels validated by facility management personnel would provide essential ground-truth data for benchmarking the model&#x2019;s performance against real-world operational failures. Finally, upgrading the digital twin to stream live data directly from BMS would elevate the system from a near-real-time prototype to a fully operational predictive maintenance platform with actionable insights.</p>
<p>Furthermore, the digital twin prototype, while effective in simulating system behavior and visualizing diagnostics, was built on batch-updated datasets rather than continuous real-time streaming from the BMS. This limited its functionality to near-real-time insights. The current study focused on technical validation of the DT&#x2019;s fault detection accuracy rather than formal usability testing or latency benchmarking. Additionally, usability testing with facility personnel and quantitative performance evaluations, such as response time measurements and user acceptance surveys, were not conducted within the scope of this research. While the architecture remains fully compatible with live-streamed BMS integration and the interface was functionally demonstrated, structured user testing and benchmarking against existing solutions would strengthen practical validation. These challenges reflect the practical constraints of working with real-world building data; nevertheless, the study successfully delivered a robust and functional solution within these bounds.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s5">
<label>5</label>
<title>Conclusion</title>
<p>This study developed and tested a hybrid predictive maintenance framework for Air Handling Units (AHUs), combining rule-based diagnostics (APAR), machine learning models (Random Forest and Artificial Neural Network), and a real-time digital twin platform. Using 6&#xa0;months of high-resolution data from a non-residential building in Grimstad, Norway, the framework effectively detected, classified, and predicted AHU faults with high accuracy and interpretability. Limited sensor availability allowed the implementation of 18 out of 28 standard APAR rules, modified to ensure broad diagnostic coverage. A Python-based APAR engine handled initial fault detection, and machine learning models trained on APAR-labeled data, augmented with SMOTE and class weighting, achieved over 98 percent classification accuracy, with Random Forest reaching 99.6 percent. The ANN model showed improved sensitivity to minority faults, while Random Forest offered greater precision under imbalance. Feature engineering reduced false alarms, highlighting the effectiveness of combining rule-based logic with data-driven methods.</p>
<p>The digital twin implementation, realized through pyRevit and a lightweight web dashboard, enabled near-real-time visualization, pop-up alerts, and 3D-based fault localization, making diagnostics more intuitive for facility managers. This integration represents a step forward in operational transparency by bridging BIM and predictive analytics. Despite limitations in sensor coverage, the framework proved robust, interpretable, and scalable across building types. It demonstrates that hybrid APAR and machine learning systems can enhance fault detection and support smarter, more efficient HVAC operations. However, it should be noted that the classification and forecasting results are based on APAR-derived fault labels rather than facility-verified ground truth events. Therefore, the reported performance metrics reflect the models&#x2019; ability to reproduce the APAR diagnostic logic, not the confirmation of actual physical failures. With full instrumentation and deeper BMS integration, the system can evolve into an automated, real-time diagnostic engine. This research lays a practical foundation for resilient, energy efficient facility management by uniting expert knowledge, machine learning, and digital twin technologies.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The data that support the findings of this study are available from the corresponding author upon reasonable request.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>HZ: Formal Analysis, Writing &#x2013; review and editing, Methodology, Writing &#x2013; original draft, Data curation, Software, Conceptualization, Visualization. FZ: Validation, Conceptualization, Methodology, Writing &#x2013; original draft, Writing &#x2013; review and editing, Software, Formal Analysis, Visualization. HH: Supervision, Writing &#x2013; review and editing, Conceptualization, Writing &#x2013; original draft. DK: Resources, Writing &#x2013; original draft, Supervision, Conceptualization, Writing &#x2013; review and editing, Investigation.</p>
</sec>
<ack>
<title>Acknowledgements</title>
<p>This research was conducted at the Department of Built Environment, Faculty of Technology, Art and Design, Oslo Metropolitan University. We thank the Faculty of Technology, Art and Design, Oslo Metropolitan University, for covering the article processing charge (APC).</p>
</ack>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was used in the creation of this manuscript. During the preparation of this work, the authors used ChatGPT to improve the readability and language of selected paragraphs, as well as to assist with coding. After using this tool, the authors reviewed and edited the content as needed and take full responsibility for the content of the published article.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abd Wahab</surname>
<given-names>N. H.</given-names>
</name>
<name>
<surname>Hasikin</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Wee Lai</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Xia</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Bei</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Systematic review of predictive maintenance and digital twin technologies challenges, opportunities, and best practices</article-title>. <source>PeerJ Comput. Sci.</source> <volume>10</volume>, <fpage>e1943</fpage>. <pub-id pub-id-type="doi">10.7717/peerj-cs.1943</pub-id>
<pub-id pub-id-type="pmid">38686003</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Aghili</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Rezaei</surname>
<given-names>A. H. M.</given-names>
</name>
<name>
<surname>Tafazzoli</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Khanzadi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Rahbar</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Artificial intelligence approaches to energy management in HVAC systems: a systematic review</article-title>. <source>Buildings</source> <volume>15</volume>, <fpage>1008</fpage>. <pub-id pub-id-type="doi">10.3390/buildings15071008</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Awadallah</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Grolinger</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Sadhu</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Remote collaborative framework for real-time structural condition assessment using augmented reality</article-title>. <source>Adv. Eng. Inf.</source> <volume>62</volume>, <fpage>102652</fpage>. <pub-id pub-id-type="doi">10.1016/j.aei.2024.102652</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bouabdallaoui</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lafhaj</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yim</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Ducoulombier</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Bennadji</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Predictive maintenance in building facilities: a machine learning-based approach</article-title>. <source>Sensors (Basel)</source> <volume>21</volume>, <fpage>1044</fpage>. <pub-id pub-id-type="doi">10.3390/s21041044</pub-id>
<pub-id pub-id-type="pmid">33546418</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cakir</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Guvenc</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Mistikoglu</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>The experimental application of popular machine learning algorithms on predictive maintenance and the design of IIoT based condition monitoring system</article-title>. <source>Comput. Industrial Eng.</source> <volume>151</volume>, <fpage>106948</fpage>. <pub-id pub-id-type="doi">10.1016/j.cie.2020.106948</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dey</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Dong</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>A probabilistic approach to diagnose faults of air handling units in buildings</article-title>. <source>Energy Build.</source> <volume>130</volume>, <fpage>177</fpage>&#x2013;<lpage>187</lpage>. <pub-id pub-id-type="doi">10.1016/j.enbuild.2016.08.017</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Duan</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ning</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Duan</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Real-time surveillance-video-based personalized thermal comfort recognition</article-title>. <source>Energy Build.</source> <volume>244</volume>, <fpage>110989</fpage>.</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Felgueiras</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Santos</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Fonseca</surname>
<given-names>L. M.</given-names>
</name>
<name>
<surname>Caetano</surname>
<given-names>N. S.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Buildings sustainability: the HVAC contribution</article-title>. <source>J. Clean Energy Technologies</source> <volume>4</volume> (<issue>5</issue>), <fpage>375</fpage>&#x2013;<lpage>379</lpage>. <pub-id pub-id-type="doi">10.18178/jocet.2016.4.5.316</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gourabpasi</surname>
<given-names>A. H.</given-names>
</name>
<name>
<surname>Nik-Bakht</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>BIM-based automated fault detection and diagnostics of HVAC systems in commercial buildings</article-title>. <source>J. Build. Eng.</source> <volume>87</volume>, <fpage>109022</fpage>. <pub-id pub-id-type="doi">10.1016/j.jobe.2024.109022</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Grieves</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Vickers</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Digital twin: mitigating unpredictable, undesirable emergent behavior in complex systems</article-title>,&#x201d; in <source>Transdisciplinary perspectives on complex systems: new findings and approaches</source>, <fpage>85</fpage>&#x2013;<lpage>113</lpage>.</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Hassan</surname>
<given-names>H. H.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bouloukakis</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Yus</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Kattepur</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2024</year>). &#x201c;<article-title>Efficient scheduling of smart building energy systems using AI planning</article-title>,&#x201d; in <source>2024 10th international conference on ICT for sustainability (ICT4S)</source> (<publisher-name>IEEE</publisher-name>).</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hodavand</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Ramaji</surname>
<given-names>I. J.</given-names>
</name>
<name>
<surname>Sadeghi</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Digital twin for fault detection and diagnosis of building operations: a systematic review</article-title>. <source>Buildings</source> <volume>13</volume>, <fpage>1426</fpage>. <pub-id pub-id-type="doi">10.3390/buildings13061426</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hosamo</surname>
<given-names>H. H.</given-names>
</name>
<name>
<surname>Svennevig</surname>
<given-names>P. R.</given-names>
</name>
<name>
<surname>Svidt</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Nielsen</surname>
<given-names>H. K.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A digital twin predictive maintenance framework of air handling units based on automatic fault detection and diagnostics</article-title>. <source>Energy Build.</source> <volume>261</volume>, <fpage>111988</fpage>. <pub-id pub-id-type="doi">10.1016/j.enbuild.2022.111988</pub-id>
</mixed-citation>
</ref>
<ref id="B62">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hosamo</surname>
<given-names>H. H.</given-names>
</name>
<name>
<surname>Nielsen</surname>
<given-names>H. K.</given-names>
</name>
<name>
<surname>Kraniotis</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Svennevig</surname>
<given-names>P. R.</given-names>
</name>
<name>
<surname>Svidt</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Digital Twin framework for automated fault source detection and prediction for comfort performance evaluation of existing non-residential Norwegian buildings</article-title>. <source>Energy Build.</source> <volume>281</volume>, <fpage>112732</fpage>. <pub-id pub-id-type="doi">10.1016/j.enbuild.2022.112732</pub-id>
</mixed-citation>
</ref>
<ref id="B63">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hosamo</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Mazzetto</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Data-driven ventilation and energy optimization in smart office buildings: insights from a high-resolution occupancy and indoor climate dataset</article-title>. <source>Sustainability</source> <volume>17</volume> (<issue>1</issue>), <fpage>58</fpage>. <pub-id pub-id-type="doi">10.3390/su17010058</pub-id>
</mixed-citation>
</ref>
<ref id="B61">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hosamo</surname>
<given-names>H. H.</given-names>
</name>
<name>
<surname>Rolfsen</surname>
<given-names>C. N.</given-names>
</name>
<name>
<surname>Zeka</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Sandbeck</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Said</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Saetre</surname>
<given-names>M. A.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Navigating the adoption of 5D building information modeling: insights from norway</article-title>. <source>Infrastructures</source> <volume>9</volume> (<issue>4</issue>), <fpage>75</fpage>. <pub-id pub-id-type="doi">10.3390/infrastructures9040075</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>House</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Vaezi-Nejad</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>An expert rule set for fault detection in air-handling units</article-title>. <source>ASHRAE Trans.</source>, <fpage>858</fpage>&#x2013;<lpage>871</lpage>.</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Olgun</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Assaad</surname>
<given-names>R. H.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>An intelligent BIM-enabled digital twin framework for real-time structural health monitoring using wireless IoT sensing, digital signal processing, and structural analysis</article-title>. <source>Expert Syst. Appl.</source> <volume>252</volume>, <fpage>124204</fpage>. <pub-id pub-id-type="doi">10.1016/j.eswa.2024.124204</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Ilambirai</surname>
<given-names>R. C.</given-names>
</name>
<name>
<surname>Padmini</surname>
<given-names>P. S.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Efficient self-learning artificial neural network controller for critical heating</article-title>,&#x201d; in <source>Ventilation and air conditioning systems</source>.</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jardine</surname>
<given-names>A. K.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Banjevic</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>A review on machinery diagnostics and prognostics implementing condition-based maintenance</article-title>. <source>Mech. Systems Signal Processing</source> <volume>20</volume> (<issue>7</issue>), <fpage>1483</fpage>&#x2013;<lpage>1510</lpage>. <pub-id pub-id-type="doi">10.1016/j.ymssp.2005.09.012</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Johansson</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Roup&#xe9;</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Real-world applications of BIM and immersive VR in construction</article-title>. <source>Automation Constr.</source> <volume>158</volume>, <fpage>105233</fpage>. <pub-id pub-id-type="doi">10.1016/j.autcon.2023.105233</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Johansson</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Roup&#xe9;</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Bosch-Sijtsema</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Real-time visualization of building information models (BIM)</article-title>. <source>Automation Construction</source> <volume>54</volume>, <fpage>69</fpage>&#x2013;<lpage>82</lpage>. <pub-id pub-id-type="doi">10.1016/j.autcon.2015.03.018</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Katipamula</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Brambley</surname>
<given-names>M. R.</given-names>
</name>
</person-group> (<year>2005a</year>). <article-title>Methods for fault detection, diagnostics, and prognostics for building systems&#x2014;a review, part II</article-title>. <source>Hvac&#x26;R Res.</source> <volume>11</volume> (<issue>2</issue>), <fpage>169</fpage>&#x2013;<lpage>187</lpage>. <pub-id pub-id-type="doi">10.1080/10789669.2005.10391133</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Katipamula</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Brambley</surname>
<given-names>M. R.</given-names>
</name>
</person-group> (<year>2005b</year>). <article-title>Methods for fault detection, diagnostics, and prognostics for building systems&#x2014;a review, part I</article-title>. <source>Hvac&#x26;R Res.</source> <volume>11</volume> (<issue>1</issue>), <fpage>3</fpage>&#x2013;<lpage>25</lpage>. <pub-id pub-id-type="doi">10.1080/10789669.2005.10391123</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kazado</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Kavgic</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Eskicioglu</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Integrating building information modeling (BIM) and sensor technology for facility management</article-title>. <source>J. Inf. Technol. Constr. (ITcon)</source> <volume>24</volume> (<issue>23</issue>), <fpage>440</fpage>&#x2013;<lpage>458</lpage>.</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kim</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Do</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Mago</surname>
<given-names>P. J.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>K. H.</given-names>
</name>
<name>
<surname>Cho</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Energy modeling and model predictive control for HVAC in buildings: a review of current research trends</article-title>. <source>Energies</source> <volume>15</volume>, <fpage>7231</fpage>. <pub-id pub-id-type="doi">10.3390/en15197231</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname>
<given-names>J. H.</given-names>
</name>
<name>
<surname>Shin</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Realff</surname>
<given-names>M. J.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Machine learning: overview of the recent progresses and implications for the process systems engineering field</article-title>. <source>Comput. Chem. Eng.</source> <volume>114</volume>, <fpage>111</fpage>&#x2013;<lpage>121</lpage>. <pub-id pub-id-type="doi">10.1016/j.compchemeng.2017.10.008</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lestinen</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kilpel&#xe4;inen</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kosonen</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Jokisalo</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Koskela</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Experimental study on airflow characteristics with asymmetrical heat load distribution and low-momentum diffuse ceiling ventilation</article-title>. <source>Building Environ.</source> <volume>134</volume>, <fpage>168</fpage>&#x2013;<lpage>180</lpage>. <pub-id pub-id-type="doi">10.1016/j.buildenv.2018.02.029</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Littleog</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Rubin</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2019</year>). <source>Statistical analysis with missing data</source>.</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ji</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Development and application of a digital twin model for net zero energy building operation and maintenance utilizing BIM-IoT integration</article-title>. <source>Energy Build.</source> <volume>328</volume>, <fpage>115170</fpage>. <pub-id pub-id-type="doi">10.1016/j.enbuild.2024.115170</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Macieira</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Gomes</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Vale</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Energy management model for HVAC control supported by reinforcement learning</article-title>. <source>Energies</source> <volume>14</volume>, <fpage>8210</fpage>. <pub-id pub-id-type="doi">10.3390/en14248210</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maksoud</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Al-Beer</surname>
<given-names>H. B.</given-names>
</name>
<name>
<surname>Mushtaha</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Yahia</surname>
<given-names>M. W.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Self-learning buildings: integrating artificial intelligence to create a building that can adapt to future challenges</article-title>. <source>IOP Conf. Ser.: Earth Environ. Sci.</source> <volume>1019</volume>, <fpage>012047</fpage>. <pub-id pub-id-type="doi">10.1088/1755-1315/1019/1/012047</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mallapragada</surname>
<given-names>D. S.</given-names>
</name>
<name>
<surname>Sepulveda</surname>
<given-names>N. A.</given-names>
</name>
<name>
<surname>Jenkins</surname>
<given-names>J. D.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Long-run system value of battery energy storage in future grids with increasing wind and solar generation</article-title>.</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maya</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chafic</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Energy management, critical analysis and recommendations: case study Lebanon</article-title>.</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mistry</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Use of artifical intelligence in optimizing HVAC energy consumption</article-title>.</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Mobley</surname>
<given-names>R. K.</given-names>
</name>
</person-group> (<year>2002</year>). <source>An introduction to predictive maintenance</source>. <publisher-name>Elsevier</publisher-name>.</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nassif</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Tahmasebi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ridwana</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Ebrahimi</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>New optimal supply air temperature and minimum zone air flow resetting strategies for VAV systems</article-title>. <source>Buildings</source> <volume>12</volume> (<issue>3</issue>), <fpage>348</fpage>. <pub-id pub-id-type="doi">10.3390/buildings12030348</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Pan</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>Review of energy saving technologies research in HVAC systems</article-title>,&#x201d; in <source>E3S web of conferences</source> (<publisher-name>EDP Sciences). Chengdu, China: Southwest Jiaoton University</publisher-name>.</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Parzinger</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hanfstaengl</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Sigg</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Residual analysis of predictive modelling data for automated fault detection in building&#x2019;s heating</article-title>,&#x201d; in <source>Ventilation and air conditioning systems</source>.</mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>P&#xe9;rez-Lombard</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ortiz</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Pout</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>A review on buildings energy consumption information</article-title>. <source>Energy Buildings</source> <volume>40</volume> (<issue>3</issue>), <fpage>394</fpage>&#x2013;<lpage>398</lpage>. <pub-id pub-id-type="doi">10.1016/j.enbuild.2007.03.007</pub-id>
</mixed-citation>
</ref>
<ref id="B59">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Prabhu Bam</surname>
<given-names>R. V.</given-names>
</name>
<name>
<surname>Prabhu Gaonkar</surname>
<given-names>R. S.</given-names>
</name>
<name>
<surname>Pazhayidam George</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>A machine learning framework for detection and severity estimation of faults for chillers and air handling units in HVAC systems</article-title>. <source>Energy Build.</source> <volume>313</volume>, <fpage>114235</fpage>. <pub-id pub-id-type="doi">10.1016/j.enbuild.2024.114235</pub-id>
</mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Pruvost</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Forns-Samso</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Gnepper</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Enge-Rosenblatt</surname>
<given-names>O.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>Integrating energy system monitoring and maintenance services into a BIM-Based digital twin</article-title>,&#x201d; in <source>IECON 2023-49th annual conference of the IEEE industrial electronics Society</source> <publisher-name>(IEEE)</publisher-name>.</mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Schein</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Schein</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2006</year>). <source>Results from field testing of embedded air handling unit and variable air volume box fault detection tools</source>. <publisher-name>Gaithersburg, MD: U.S. Department of Commerce, National Institute of Standards and Technology</publisher-name>.</mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schein</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bushby</surname>
<given-names>S. T.</given-names>
</name>
<name>
<surname>Castro</surname>
<given-names>N. S.</given-names>
</name>
<name>
<surname>House</surname>
<given-names>J. M.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>A rule-based fault detection method for air handling units</article-title>. <source>Energy Buildings</source> <volume>38</volume> (<issue>12</issue>), <fpage>1485</fpage>&#x2013;<lpage>1492</lpage>. <pub-id pub-id-type="doi">10.1016/j.enbuild.2006.04.014</pub-id>
</mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schiavi</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Havard</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Beddiar</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Baudry</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>BIM data flow architecture with AR/VR technologies: use cases in architecture, engineering and construction</article-title>. <source>Automation Constr.</source> <volume>134</volume>, <fpage>104054</fpage>. <pub-id pub-id-type="doi">10.1016/j.autcon.2021.104054</pub-id>
</mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schreiber</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Schwartz</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Muller</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Towards an intelligent HVAC system automation using reinforcement learning</article-title>. <source>J. Phys. Conf. Ser.</source> <volume>2042</volume>, <fpage>012028</fpage>. <pub-id pub-id-type="doi">10.1088/1742-6596/2042/1/012028</pub-id>
</mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Simpeh</surname>
<given-names>E. K.</given-names>
</name>
<name>
<surname>Pillay</surname>
<given-names>J. P. G.</given-names>
</name>
<name>
<surname>Ndihokubwayo</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Nalumu</surname>
<given-names>D. J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Improving energy efficiency of HVAC systems in buildings: a review of best practices</article-title>. <source>Int. J. Build. Pathology Adapt.</source> <volume>40</volume> (<issue>2</issue>), <fpage>165</fpage>&#x2013;<lpage>182</lpage>. <pub-id pub-id-type="doi">10.1108/ijbpa-02-2021-0019</pub-id>
</mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tan</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Digital twins for sustainable design and management of smart city buildings and municipal infrastructure</article-title>. <source>Sustain. Energy Technol. Assessments</source> <volume>64</volume>, <fpage>103682</fpage>. <pub-id pub-id-type="doi">10.1016/j.seta.2024.103682</pub-id>
</mixed-citation>
</ref>
<ref id="B45">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tian</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Gomez-Rosero</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Capretz</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Health prognostics classification with autoencoders for predictive maintenance of HVAC systems</article-title>. <source>Energies</source> <volume>16</volume>, <fpage>7094</fpage>. <pub-id pub-id-type="doi">10.3390/en16207094</pub-id>
</mixed-citation>
</ref>
<ref id="B46">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Trojanova</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Vass</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Macek</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Roji&#x10d;ek</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Stluka</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Fault diagnosis of air handling units</article-title>. <source>IFAC Proc. Vol.</source> <volume>42</volume> (<issue>8</issue>), <fpage>366</fpage>&#x2013;<lpage>371</lpage>. <pub-id pub-id-type="doi">10.3182/20090630-4-es-2003.00061</pub-id>
</mixed-citation>
</ref>
<ref id="B47">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tuhaise</surname>
<given-names>V. V.</given-names>
</name>
<name>
<surname>Tah</surname>
<given-names>J. H. M.</given-names>
</name>
<name>
<surname>Abanda</surname>
<given-names>F. H.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Technologies for digital twin applications in construction</article-title>. <source>Automation Constr.</source> <volume>152</volume>, <fpage>104931</fpage>. <pub-id pub-id-type="doi">10.1016/j.autcon.2023.104931</pub-id>
</mixed-citation>
</ref>
<ref id="B48">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ulpiani</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Ranzi</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Santamouris</surname>
<given-names>M. J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Expanding the applicability of daytime radiative cooling: technological developments and limitations</article-title>. <source>Energy Build.</source> <volume>243</volume>, <fpage>110990</fpage>. <pub-id pub-id-type="doi">10.1016/j.enbuild.2021.110990</pub-id>
</mixed-citation>
</ref>
<ref id="B49">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>van Dinter</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Tekinerdogan</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Catal</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Predictive maintenance using digital twins: a systematic literature review</article-title>. <source>Inf. Softw. Technol.</source> <volume>151</volume>, <fpage>107008</fpage>. <pub-id pub-id-type="doi">10.1016/j.infsof.2022.107008</pub-id>
</mixed-citation>
</ref>
<ref id="B50">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Veerendra</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Dey</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Mantle</surname>
<given-names>E. J.</given-names>
</name>
<name>
<surname>Manoj</surname>
<given-names>A. P.</given-names>
</name>
<name>
<surname>Padavala</surname>
<given-names>S. S. A. B.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Building information modeling&#x2013;simulation and analysis of a university edifice and its environs&#x2013;A sustainable design approach</article-title>. <source>Green Technol. Sustain.</source> <volume>3</volume> (<issue>2</issue>), <fpage>100150</fpage>. <pub-id pub-id-type="doi">10.1016/j.grets.2024.100150</pub-id>
</mixed-citation>
</ref>
<ref id="B51">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xie</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Merino</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Moretti</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Pauwels</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Chang</surname>
<given-names>J. Y.</given-names>
</name>
<name>
<surname>Parlikad</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Digital twin enabled fault detection and diagnosis process for building HVAC systems</article-title>. <source>Automation Constr.</source> <volume>146</volume>, <fpage>104695</fpage>. <pub-id pub-id-type="doi">10.1016/j.autcon.2022.104695</pub-id>
</mixed-citation>
</ref>
<ref id="B52">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Pitt</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Common data environment for digital twins from building to city levels</article-title>. <source>Automation Constr.</source> <volume>174</volume>, <fpage>106131</fpage>. <pub-id pub-id-type="doi">10.1016/j.autcon.2025.106131</pub-id>
</mixed-citation>
</ref>
<ref id="B53">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Doan</surname>
<given-names>D. T.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Digital twins in construction: architecture, applications, trends and challenges</article-title>. <source>Buildings</source> <volume>14</volume>, <fpage>2616</fpage>. <pub-id pub-id-type="doi">10.3390/buildings14092616</pub-id>
</mixed-citation>
</ref>
<ref id="B54">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Woradechjumroen</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>A review of fault detection and diagnosis methodologies on air-handling units</article-title>. <source>Energy Build.</source> <volume>82</volume>, <fpage>550</fpage>&#x2013;<lpage>562</lpage>. <pub-id pub-id-type="doi">10.1016/j.enbuild.2014.06.042</pub-id>
</mixed-citation>
</ref>
<ref id="B55">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>AHU sensor fault diagnosis in various operating conditions based on a hybrid data-driven model combined energy consumption</article-title>. <source>J. Build. Eng.</source> <volume>87</volume>, <fpage>109028</fpage>. <pub-id pub-id-type="doi">10.1016/j.jobe.2024.109028</pub-id>
</mixed-citation>
</ref>
<ref id="B56">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhong</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Xia</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Duan</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Overview of predictive maintenance based on digital twin technology</article-title>. <source>Heliyon</source> <volume>9</volume> (<issue>4</issue>), <fpage>e14534</fpage>. <pub-id pub-id-type="doi">10.1016/j.heliyon.2023.e14534</pub-id>
<pub-id pub-id-type="pmid">37025897</pub-id>
</mixed-citation>
</ref>
<ref id="B57">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Pang</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>An effective fault detection method for HVAC systems using the LSTM-SVDD algorithm</article-title>. <source>Buildings</source> <volume>12</volume> (<issue>2</issue>), <fpage>246</fpage>. <pub-id pub-id-type="doi">10.3390/buildings12020246</pub-id>
</mixed-citation>
</ref>
<ref id="B60">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zini</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Carcasci</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Machine learning-based energy monitoring method applied to the HVAC systems electricity demand of an Italian healthcare facility</article-title>. <source>Smart Energy</source> <volume>14</volume>, <fpage>100137</fpage>. <pub-id pub-id-type="doi">10.1016/j.segy.2024.100137</pub-id>
</mixed-citation>
</ref>
<ref id="B58">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zonta</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>da Costa</surname>
<given-names>C. A.</given-names>
</name>
<name>
<surname>da Rosa Righi</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>de Lima</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>da Trindade</surname>
<given-names>E. S.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>G. P.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Predictive maintenance in the industry 4.0: a systematic literature review</article-title>. <source>Comput. Industrial Eng.</source> <volume>150</volume>, <fpage>106889</fpage>. <pub-id pub-id-type="doi">10.1016/j.cie.2020.106889</pub-id>
</mixed-citation>
</ref>
</ref-list>
<sec id="s12">
<title>Glossary</title>
<def-list>
<def-item>
<term id="G1-fbuil.2025.1734945">
<bold>AHUs</bold>
</term>
<def>
<p>Air Handling Units</p>
</def>
</def-item>
<def-item>
<term id="G2-fbuil.2025.1734945">
<bold>ANN</bold>
</term>
<def>
<p>Artificial Neural Networks</p>
</def>
</def-item>
<def-item>
<term id="G3-fbuil.2025.1734945">
<bold>ANOVA</bold>
</term>
<def>
<p>analysis of variance</p>
</def>
</def-item>
<def-item>
<term id="G4-fbuil.2025.1734945">
<bold>APAR</bold>
</term>
<def>
<p>Air-Handling Unit Performance Assessment Rules</p>
</def>
</def-item>
<def-item>
<term id="G5-fbuil.2025.1734945">
<bold>API</bold>
</term>
<def>
<p>Application Programming Interface</p>
</def>
</def-item>
<def-item>
<term id="G6-fbuil.2025.1734945">
<bold>AUC</bold>
</term>
<def>
<p>Area Under the Curve</p>
</def>
</def-item>
<def-item>
<term id="G7-fbuil.2025.1734945">
<bold>BIM</bold>
</term>
<def>
<p>Building Information Modeling</p>
</def>
</def-item>
<def-item>
<term id="G8-fbuil.2025.1734945">
<bold>BMS</bold>
</term>
<def>
<p>Building Management System</p>
</def>
</def-item>
<def-item>
<term id="G9-fbuil.2025.1734945">
<bold>CUSUM</bold>
</term>
<def>
<p>Cumulative Sum</p>
</def>
</def-item>
<def-item>
<term id="G10-fbuil.2025.1734945">
<bold>DT</bold>
</term>
<def>
<p>Digital Twin</p>
</def>
</def-item>
<def-item>
<term id="G11-fbuil.2025.1734945">
<bold>EAT</bold>
</term>
<def>
<p>Exhaust Air Temperature</p>
</def>
</def-item>
<def-item>
<term id="G12-fbuil.2025.1734945">
<bold>FDD</bold>
</term>
<def>
<p>Fault Detection and Diagnostics</p>
</def>
</def-item>
<def-item>
<term id="G13-fbuil.2025.1734945">
<bold>FPR</bold>
</term>
<def>
<p>False Positive Rate</p>
</def>
</def-item>
<def-item>
<term id="G14-fbuil.2025.1734945">
<bold>HVAC</bold>
</term>
<def>
<p>Heating, Ventilation, and Air Conditioning</p>
</def>
</def-item>
<def-item>
<term id="G15-fbuil.2025.1734945">
<bold>IoT</bold>
</term>
<def>
<p>Internet of Things</p>
</def>
</def-item>
<def-item>
<term id="G16-fbuil.2025.1734945">
<bold>MAT</bold>
</term>
<def>
<p>Mixed Air Temperature</p>
</def>
</def-item>
<def-item>
<term id="G17-fbuil.2025.1734945">
<bold>ML</bold>
</term>
<def>
<p>Machine Learning</p>
</def>
</def-item>
<def-item>
<term id="G18-fbuil.2025.1734945">
<bold>OAF</bold>
</term>
<def>
<p>outdoor air fraction</p>
</def>
</def-item>
<def-item>
<term id="G19-fbuil.2025.1734945">
<bold>PdM</bold>
</term>
<def>
<p>Predictive Maintenance</p>
</def>
</def-item>
<def-item>
<term id="G20-fbuil.2025.1734945">
<bold>RAT</bold>
</term>
<def>
<p>Return Air Temperature</p>
</def>
</def-item>
<def-item>
<term id="G21-fbuil.2025.1734945">
<bold>RF</bold>
</term>
<def>
<p>Random Forest</p>
</def>
</def-item>
<def-item>
<term id="G22-fbuil.2025.1734945">
<bold>ROC</bold>
</term>
<def>
<p>Receiver operating characteristic</p>
</def>
</def-item>
<def-item>
<term id="G23-fbuil.2025.1734945">
<bold>SAT</bold>
</term>
<def>
<p>Supply Air Temperature</p>
</def>
</def-item>
<def-item>
<term id="G24-fbuil.2025.1734945">
<bold>SMOTE</bold>
</term>
<def>
<p>Synthetic Minority Oversampling Technique</p>
</def>
</def-item>
<def-item>
<term id="G25-fbuil.2025.1734945">
<bold>Tco</bold>
</term>
<def>
<p>Change over Air Temperature</p>
</def>
</def-item>
<def-item>
<term id="G26-fbuil.2025.1734945">
<bold>TPR</bold>
</term>
<def>
<p>True positive rate</p>
</def>
</def-item>
<def-item>
<term id="G27-fbuil.2025.1734945">
<bold>VR</bold>
</term>
<def>
<p>virtual reality</p>
</def>
</def-item>
<def-item>
<term id="G28-fbuil.2025.1734945">
<bold>MCAR</bold>
</term>
<def>
<p>Missing Completely At Random</p>
</def>
</def-item>
<def-item>
<term id="G29-fbuil.2025.1734945">
<bold>PID</bold>
</term>
<def>
<p>Proportional Integral Derivative</p>
</def>
</def-item>
<def-item>
<term id="G30-fbuil.2025.1734945">
<bold>AFDD</bold>
</term>
<def>
<p>Automated Fault Detection and Diagnostics</p>
</def>
</def-item>
</def-list>
</sec>
<fn-group>
<fn fn-type="custom" custom-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3218353/overview">Elisa Caracci</ext-link>, University of Cassino, Italy</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2347216/overview">Aref Maksoud</ext-link>, University of Sharjah, United Arab Emirates</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3262200/overview">Christos Tsallis</ext-link>, University of West Attica, Greece</p>
</fn>
</fn-group>
</back>
</article>