<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Built Environ.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Built Environment</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Built Environ.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2297-3362</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1753382</article-id>
<article-id pub-id-type="doi">10.3389/fbuil.2026.1753382</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Interpretable machine learning for predicting the bearing capacity of double shear-bolted connections: a data-driven evaluation</article-title>
<alt-title alt-title-type="left-running-head">Kookalani et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fbuil.2026.1753382">10.3389/fbuil.2026.1753382</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Kookalani</surname>
<given-names>Soheila</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3283752"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Hongchen</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Dash</surname>
<given-names>Tirtharaj</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1821614"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Mathew</surname>
<given-names>Alwyn</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Brilakis</surname>
<given-names>Ioannis</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
</contrib-group>
<aff id="aff1">
<label>1</label>
<institution>Department of Engineering, University of Cambridge</institution>, <city>Cambridge</city>, <country country="GB">United Kingdom</country>
</aff>
<aff id="aff2">
<label>2</label>
<institution>Department of Biochemistry, University of Cambridge</institution>, <city>Cambridge</city>, <country country="GB">United Kingdom</country>
</aff>
<author-notes>
<corresp id="c001">
<label>&#x2a;</label>Correspondence: Soheila Kookalani, <email xlink:href="mailto:sk2268@cam.ac.uk">sk2268@cam.ac.uk</email>
</corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-17">
<day>17</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>12</volume>
<elocation-id>1753382</elocation-id>
<history>
<date date-type="received">
<day>24</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>09</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>14</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Kookalani, Liu, Dash, Mathew and Brilakis.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Kookalani, Liu, Dash, Mathew and Brilakis</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-17">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Accurate prediction of the bearing capacity of double shear-bolted connections in structural steel is essential for ensuring safety and efficiency in structural design. This study explores the application of ten machine learning algorithms to enhance prediction accuracy while addressing the interpretability challenges often associated with such models.</p>
</sec>
<sec>
<title>Methods</title>
<p>Models were tuned with 10-fold crossvalidation and assessed using RMSE, R<sup>2</sup> and a20 accuracy index. A comprehensive sensitivity analysis evaluates the influence of input parameters, while advanced interpretability techniques, such as partial dependence plots, accumulated local effects, and Shapley additive explanations, are employed alongside parametric studies to elucidate the decision-making processes of the models.</p>
</sec>
<sec>
<title>Results</title>
<p>These methods facilitate the identification of critical variables that influence bearing capacity predictions at both local and global scales.</p>
</sec>
<sec>
<title>Discussion</title>
<p>The study demonstrates that machine learning can be a trustworthy and data-driven complement to conventional mechanics-based approaches, when coupled with rigorous interpretability, advancing both safety and efficiency in steelconnection design. The findings highlight the potential of interpretable machine learning approaches to not only improve predictive precision but also provide actionable insights into complex model behaviours, ultimately advancing structural engineering practices and promoting data-driven design methodologies.</p>
</sec>
</abstract>
<kwd-group>
<kwd>bearing capacity prediction</kwd>
<kwd>double shear-bolted connections</kwd>
<kwd>interpretable AI</kwd>
<kwd>machine learning</kwd>
<kwd>sensitivity analysis</kwd>
<kwd>structural steel joints</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement>
</funding-group>
<counts>
<fig-count count="16"/>
<table-count count="5"/>
<equation-count count="23"/>
<ref-count count="78"/>
<page-count count="26"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Computational Methods in Structural Engineering</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>Bolted and welded connections are widely regarded as the backbone of steel structures, ensuring load transfer and global stability across a broad spectrum of engineering applications. In comparison to welded joints, bolted connections offer several advantages, including expedited assembly and reduced cost by obviating specialized labour and on-site welding procedures, while still achieving reliable structural performance when bolt holes and net-section effects are properly accounted for in design (<xref ref-type="bibr" rid="B74">Zakir et al., 2022</xref>). Within the family of bolted connections, bearing-type joints have garnered particular attention due to the primary load transfer mechanism arising from the bearing action between the bolt shank and the plate hole surface. Over the years, significant research has been dedicated to clarifying the behaviour of bearing-type bolted connections, often focusing on parameters such as bolt diameter, edge and end distances, plate thickness, and material characteristics.</p>
<p>Recent investigations have expanded our understanding of bolted connections under a variety of geometric configurations and load conditions. For instance, <xref ref-type="bibr" rid="B45">Lyu et al. (2019)</xref> provided a comprehensive review of how edge and end distances influence the ultimate capacity of bolted connections. Earlier work also identified the presence of an &#x201c;active shear plane&#x201d; between the gross and net shear planes, lending crucial insights into design practices governed by structural steel specifications (<xref ref-type="bibr" rid="B16">Clements and Teh, 2013</xref>; <xref ref-type="bibr" rid="B64">Teh and Clements, 2012</xref>). Building upon these findings, The and Uz (<xref ref-type="bibr" rid="B65">Teh and Uz, 2015a</xref>; <xref ref-type="bibr" rid="B66">Teh and Uz, 2015b</xref>) introduced a formula to predict the ultimate shear-out capacity, which was subsequently refined to integrate a more accurate bearing coefficient that better captures combined bearing and shear-out failures (<xref ref-type="bibr" rid="B67">Teh and Uz, 2016</xref>).</p>
<p>In recent years, the integration of Artificial Intelligence (AI) into civil engineering has led to transformative advances across structural, geotechnical, and materials domains (<xref ref-type="bibr" rid="B42">Liu et al., 2025</xref>; <xref ref-type="bibr" rid="B68">Torres et al., 2025</xref>; <xref ref-type="bibr" rid="B37">Kookalani et al., 2025</xref>). For instance, <xref ref-type="bibr" rid="B76">Zhang et al. (2022)</xref> utilized deep neural networks enhanced with Harris Hawks Optimization (HHO) to generalize friction angle predictions of clays for slope stability analysis. <xref ref-type="bibr" rid="B24">Ghanizadeh et al. (2023)</xref> developed a hybrid Multivariate Adaptive Regression Spline- Evolutionary-Based Search (MARS-EBS) model for estimating the bearing capacity of geogrid-reinforced stone columns, emphasizing the value of hybrid soft computing techniques. Similarly, <xref ref-type="bibr" rid="B12">Barkho et al. (2023)</xref> employed ensemble Convolutional Neural Networks (CNNs) to achieve high-accuracy structural damage classification. Further, <xref ref-type="bibr" rid="B77">Zhao et al. (2022)</xref> presented a Whale Optimization Algorithm (WOA)-optimized neural network to accurately predict RC beam deflection, outperforming conventional Artificial Neural Network (ANN) models. In the materials domain, <xref ref-type="bibr" rid="B9">Ashrafian et al. (2023)</xref> demonstrated the application of MARS and Extreme Learning Machine (ELM) to predict the strength of sustainable concrete mixes. Similarly, <xref ref-type="bibr" rid="B50">Mahmood et al. (2022)</xref> combined various mathematical models to assess the compressive strength of cement-grouted sands, illustrating hybrid Machine Learning (ML) model effectiveness in complex materials characterization. These studies collectively illustrate the growing sophistication and diversity of ML applications in civil and structural engineering.</p>
<p>Despite these advances, predicting the bearing capacity of multi-bolt configurations, particularly in double shear arrangements, continues to pose significant challenges. The majority of existing design equations and predictive models have their roots in single-bolted connection tests, which do not fully encapsulate the intricate load distribution that arises in multi-bolt layouts (<xref ref-type="bibr" rid="B46">Lyu et al., 2020a</xref>). <xref ref-type="bibr" rid="B46">Lyu et al. (2020a)</xref> demonstrated that the bearing strength of individual fasteners in multi-bolt connections exhibits a non-linear relationship with the edge distance-to-hole diameter ratio, underscoring the pitfalls of extrapolating single-bolt findings to multi-bolt scenarios. This discrepancy highlights the urgent need for more robust, data-driven approaches that can account for the complexity of multi-bolt load sharing and failure mechanisms. In particular, this research is motivated by the lack of accurate and interpretable predictive tools for multi-bolt double shear, bearing-type bolted connections, where current design equations, largely extrapolated from single-bolt tests and simplified interaction rules, may fail to capture the combined influence of end and edge distances, pitch, and bolt-row configuration on bearing capacity.</p>
<p>While traditional methods such as Eurocode and American Institute of Steel Construction (AISC) analytical formulations, or numerical models like Finite Element Analysis (FEA), have been validated over decades and remain essential in design practice, they rely on simplifying assumptions such as linear elasticity, idealized boundary conditions, and uniform material behaviour. These constraints can hinder their effectiveness in capturing the complex, nonlinear behaviours present in double shear-bolted connections, especially in multi-bolt scenarios where interaction effects dominate. In contrast, ML models offer a flexible, data-driven complement to traditional mechanics-based and numerical approaches, as they can learn intricate patterns from large experimental datasets without being restricted to a predefined functional form or the simplifying assumptions embedded in existing design equations, provided that the training data are sufficiently rich and representative (<xref ref-type="bibr" rid="B13">Cabrera et al., 2023</xref>; <xref ref-type="bibr" rid="B57">Sadrossadat et al., 2022</xref>; <xref ref-type="bibr" rid="B59">Sarir et al., 2021a</xref>). This study demonstrates that gradient-boosting models improve predictive accuracy compared with traditional regression methods and other ML techniques, achieving error levels that are compatible with use as complementary, decision-support tools in connection design.</p>
<p>Furthermore, unlike conventional ML applications often criticized for their opacity, this study incorporates explainability tools, which transparently reveal how key features such as normalized end and edge distances and the number of bolt rows influence predictions. This not only enhances trust in ML outputs but also bridges the gap between black-box models and engineering insight. When paired with Building Information Modelling (BIM) in future implementations, such interpretable ML frameworks could enable real-time design evaluations and optimization workflows that are often impractical using FEA or static code-based checks alone (<xref ref-type="bibr" rid="B49">Mahdavipour et al., 2026</xref>). Therefore, ML should not be viewed as a replacement for traditional methods, but rather as a robust and scalable complement, particularly suited for design scenarios that demand generalization across multiple configurations, rapid feedback, and insight into high-dimensional parameter interactions.</p>
<p>Furthermore, the implementation of BIM in the context of bolted connections introduces new opportunities for enhanced structural analysis and decision-making. BIM enables the integration of ML-based predictive models within digital twins, allowing for real-time assessment of connection performance and facilitating data-driven optimization of bolted connections in steel structures. BIM-driven workflows can improve the efficiency and reliability of bolted connection design by incorporating as-built data, geometric constraints, and material properties, reducing material waste and construction errors (<xref ref-type="bibr" rid="B2">Alavi et al., 2024a</xref>; <xref ref-type="bibr" rid="B3">Alavi et al., 2024b</xref>; <xref ref-type="bibr" rid="B4">Alavi et al., 2024c</xref>; <xref ref-type="bibr" rid="B5">Alavi et al., 2024d</xref>; <xref ref-type="bibr" rid="B6">Alavi et al., 2024e</xref>; <xref ref-type="bibr" rid="B7">Alavi et al., 2024f</xref>). The synergy between BIM and ML-driven predictions fosters a more comprehensive understanding of structural behaviour, ultimately contributing to more sustainable and efficient construction practices.</p>
<p>In response to these challenges, the present study harnesses the capabilities of ML to predict the bearing capacity of double shear-bolted connections in structural steel. The primary objective of this study is to develop and interpret ML-based models that can accurately predict the bearing capacity of double shear-bolted connections, thereby contributing to safer and more data-informed structural engineering practices. Leveraging a sizable dataset of 443 experimental results, 10&#xa0;ML algorithms, including linear and ridge regression, support vector machines, tree-based methods, and boosting ensembles, are systematically trained, and tuned using grid search and <italic>K</italic>-fold Cross-Validation (CV) to ensure reliable performance. The choice of these algorithms reflects the growing recognition that ML techniques can capture complex, nonlinear relationships in engineering problems more effectively than many traditional predictive models.</p>
<p>However, from an engineering standpoint, predictive performance alone is insufficient (<xref ref-type="bibr" rid="B38">Li et al., 2023</xref>; <xref ref-type="bibr" rid="B40">Lim and Chi, 2019</xref>; <xref ref-type="bibr" rid="B48">Ma et al., 2024</xref>); explainability and interpretability are equally critical for effective design and decision-making (<xref ref-type="bibr" rid="B23">Geyer et al., 2024</xref>; <xref ref-type="bibr" rid="B43">Love et al., 2023</xref>). To this end, the study employs advanced interpretability frameworks such as Shapley Additive Explanations (SHAP), Accumulated Local Effects (ALE), and Partial Dependence Plots (PDPs). Sensitivity analyses, including first-order and total-effect measures, further elucidate the relative significance of input parameters, while a parametric study sheds light on how variations in key variables influence the bearing capacity. This work not only delivers highly accurate predictions but also offers insights into how different factors drive the ML model&#x2019;s behaviour by integrating these techniques.</p>
<p>The remainder of this paper is structured as follows: <xref ref-type="sec" rid="s2">Section 2</xref> introduces the 10&#xa0;ML algorithms, including Linear Regression (LR), Ridge Regression (RR), Support Vector Regression (SVR), K-Nearest Neighbors (KNN), Decision Trees (DT), Random Forest (RF), Adaptive Boosting (AdaBoost), Extreme Gradient Boosting (XGBoost), Category Boosting (CatBoost), and Light Gradient Boosting Machine (LightGBM), and details the performance metrics, hyperparameter tuning strategies, and Sensitivity Analysis (SA) approaches. <xref ref-type="sec" rid="s3">Section 3</xref> focuses on interpretable ML methodologies, illustrating their respective contributions to model transparency. <xref ref-type="sec" rid="s4">Section 4</xref> presents a numerical example that demonstrates the practical application of these ML models in predicting bearing capacity. <xref ref-type="sec" rid="s5">Section 5</xref> then discusses the interpretability results, highlighting the implications for both design and research. Finally, <xref ref-type="sec" rid="s6">Section 6</xref> summarizes the key findings, outlines current limitations, and proposes potential directions for future work.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Digital twin integration</title>
<p>The rapid digital transformation of the construction sector has established digital twins as a central paradigm for data-driven design, monitoring, and lifecycle management of structural systems. Digital twins require analytical engines capable of providing continuous, reliable, and explainable predictions using both as-designed and as-built data. The interpretable ML framework developed in this study aligns with these requirements by offering a transparent, computationally efficient, and easily deployable predictive model for assessing the bearing capacity of double shear-bolted connections.</p>
<p>In a digital twin environment, geometric and material parameters, such as end and edge distances, pitch dimensions, bolt configurations, and strength ratios, are inherently embedded within BIM data structures. These parameters can be automatically extracted from Industry Foundation Classes (IFC)-compliant models or connected databases and fed directly into the ML-based prediction engine. The system can deliver real-time assessments of connection performance as the model geometry or material conditions evolve by integrating the ML model and its interpretability layers into a digital twin data pipeline. This capability enables faster and more adaptive structural evaluations compared to conventional FEA, which can be computationally intensive and time-prohibitive for iterative or real-time use.</p>
<p>The interpretability tools incorporated in this study reinforce the suitability of the model for digital twin adoption. SHAP values provide detailed, instance-level explanations of how geometric and material features influence predicted bearing capacity, ensuring transparency and auditing capability, essential requirements for engineering decision-making in digital twin ecosystems. Moreover, sensitivity analysis supports prioritisation of key parameters, guiding both model refinement and potential sensor placement strategies in operational digital twins.</p>
<p>Beyond design-phase applications, the proposed ML framework can be embedded into digital twins for structural health monitoring and predictive maintenance. When paired with inspection data, periodic updates, or real-time sensor readings, the digital twin can autonomously evaluate deviations from expected behaviour, identify performance degradation, and support proactive decision-making. Additionally, in emerging metaverse or immersive simulation environments, this model can serve as a computational engine for interactive scenario testing, virtual training, and construction rehearsal.</p>
<p>Overall, the integration of the interpretable ML model into digital twin workflows enhances the analytical capabilities of digital construction ecosystems by enabling fast, transparent, and data-driven assessments of bolted steel connections. This contribution supports ongoing efforts to establish intelligent, adaptive, and reliable digital twins for sustainable and efficient construction practice. It should be noted that, in this study, the ML model is developed and validated against experimental data only; the BIM and digital twin workflows discussed in this section are presented as a conceptual integration framework and as directions for future implementation, rather than as a fully realised software integration demonstrated in this paper.</p>
</sec>
<sec id="s3">
<label>3</label>
<title>Machine learning model development</title>
<p>This study examines the performance of 10&#xa0;ML algorithms, LR, RR, SVR, KNN, DT, RF, AdaBoost, XGBoost, CatBoost, and LightGBM, in order to identify the most suitable approach for accurately predicting the bearing capacity of double shear-bolted connections. Additionally, it introduces relevant performance metrics, an efficient hyperparameter tuning methodology, and a SA framework, all of which are designed to facilitate a comprehensive evaluation and optimization of the selected models. Optimization plays a crucial role in enhancing the predictive accuracy and generalisability of these models, ensuring practical applicability in engineering design and decision-making (<xref ref-type="bibr" rid="B33">Kookalani et al., 2022a</xref>).</p>
<sec id="s3-1">
<label>3.1</label>
<title>Machine learning models</title>
<sec id="s3-1-1">
<label>3.1.1</label>
<title>Linear regression (LR)</title>
<p>LR is a supervised ML technique used to model the relationship between dependent and independent variables through a linear equation (<xref ref-type="bibr" rid="B39">Liang and Song, 2009</xref>). It is among the simplest and most commonly employed methods for analysing how output variables are influenced by input features. LR operates by minimizing the least squares error between the observed and predicted values, enabling the determination of an optimal set of coefficients for the parameters. The result of LR is represented as a linear combination of input variables, defined by <xref ref-type="disp-formula" rid="e1">Equation 1</xref>:<disp-formula id="e1">
<mml:math id="m1">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>&#x3b2;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>M</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>&#x3b2;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>where <italic>&#x3b2;</italic>
<sub>
<italic>0</italic>
</sub> represents the intercept and <italic>&#x3b2;</italic>
<sub>
<italic>i</italic>
</sub> denotes the regression coefficient, with <italic>M</italic> being the total number of parameters. The regression coefficients are determined by minimizing the sum of squared residuals, as expressed in <xref ref-type="disp-formula" rid="e2">Equation 2</xref>:<disp-formula id="e2">
<mml:math id="m2">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b2;</mml:mi>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:munder>
<mml:mi>argmin</mml:mi>
<mml:mi>&#x3b2;</mml:mi>
</mml:munder>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>where <italic>N</italic> represents the total number of instances and <italic>y</italic>
<sub>
<italic>j</italic>
</sub> signifies the corresponding target output.</p>
</sec>
<sec id="s3-1-2">
<label>3.1.2</label>
<title>Ridge regression (RR)</title>
<p>RR is an advanced form of LR designed to mitigate the high variance often associated with LR [10]. In many ML methods, there is a balance to be struck between bias and variance. RR addresses this by incorporating a regularization term, which reduces the magnitude of regression coefficients. This helps to prevent overfitting and enhances the ability of the model to generalize (<xref ref-type="bibr" rid="B27">Hoerl and Kennard, 1970</xref>). RR achieves this by minimizing a penalized cost function, as expressed in <xref ref-type="disp-formula" rid="e3">Equation 3</xref>:<disp-formula id="e3">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b2;</mml:mi>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:munder>
<mml:mi>argmin</mml:mi>
<mml:mi>&#x3b2;</mml:mi>
</mml:munder>
<mml:msup>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:msubsup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x3b2;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>M</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>&#x3b2;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>M</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msubsup>
<mml:mi>&#x3b2;</mml:mi>
<mml:mi>i</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>where &#x3bb; controls the degree of coefficient shrinkage, with the RR coefficients tending toward zero as &#x3bb; increases.</p>
</sec>
<sec id="s3-1-3">
<label>3.1.3</label>
<title>Support vector regression (SVR)</title>
<p>SVR applies the foundational concepts of Support Vector Machines (SVMs) to address regression problems. In SVM, hyperplanes are designed to divide the data space with maximum margin, thereby enhancing the robustness of the predictive model. The SVM predictions are formulated as follows expressed in <xref ref-type="disp-formula" rid="e4">Equation 4</xref>:<disp-formula id="e4">
<mml:math id="m4">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3d5;</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mi>&#x3c9;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>where <italic>&#x3d5;</italic>(<italic>x</italic>) represents a mapping function that transforms the input into a higher-dimensional space, while <italic>b</italic> denotes the bias of the model. The model achieves flatness in <italic>f</italic>(<italic>x</italic>) by minimizing a convex optimization problem, ensuring the smallest possible values for <italic>&#x3c9;</italic> as expressed in <xref ref-type="disp-formula" rid="e5">Equation 5</xref> (<xref ref-type="bibr" rid="B63">Smola and Sch&#xf6;lkopf, 2004</xref>):<disp-formula id="e5">
<mml:math id="m5">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:msup>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="|">
<mml:mrow>
<mml:mi>&#x3c9;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3be;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msubsup>
<mml:mi>&#x3be;</mml:mi>
<mml:mi>j</mml:mi>
<mml:mo>&#x2a;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>where <italic>C</italic> represents the box constraint, and <italic>&#x3be;j</italic> and <italic>&#x3be;</italic>
<sup>
<italic>&#x2a;</italic>
</sup> <sub>
<italic>j</italic>
</sub> are the slack variables. Consequently, the SVM prediction is expressed in <xref ref-type="disp-formula" rid="e6">Equation 6</xref>:<disp-formula id="e6">
<mml:math id="m6">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
<mml:mo>&#x2a;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
<mml:mo>&#x2a;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>K</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>where <italic>&#x3b1;</italic>
<sub>
<italic>i</italic>
</sub> and <italic>&#x3b1;</italic>
<sub>
<italic>i</italic>
</sub>&#x2a; are Lagrange multipliers, and <italic>K</italic> (<italic>x</italic>
<sub>
<italic>i</italic>
</sub>, <italic>x</italic>) is the kernel function. These equations indicate that the kernel function, along with the parameters <italic>&#x3b5;</italic> and <italic>C</italic>, can be adjusted to optimize SVM predictions.</p>
</sec>
<sec id="s3-1-4">
<label>3.1.4</label>
<title>K-nearest neighbours (KNN)</title>
<p>KNN predicts the output variable by taking the mean of several nearby values, where <italic>k</italic> represents the number of neighbours considered (<xref ref-type="bibr" rid="B17">Cover and Hart, 1967</xref>). The fundamental idea behind KNN is that it assigns greater importance to the <italic>k</italic> closest samples in the training dataset that are near the new data point <italic>x</italic>. The conditional probability of <italic>x</italic> is determined using the <xref ref-type="disp-formula" rid="e7">Equation 7</xref>:<disp-formula id="e7">
<mml:math id="m7">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>m</mml:mi>
<mml:mrow>
<mml:mfenced open="|" close="" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>where <italic>I</italic> (<italic>y</italic>
<sub>
<italic>i</italic>
</sub> &#x3d; <italic>m</italic>) is an indicator function that returns 1 if the given observation belongs to the <italic>m</italic>-th class, and 0 otherwise; <italic>N</italic>
<sub>
<italic>k</italic>
</sub> denotes the number of instances.</p>
</sec>
<sec id="s3-1-5">
<label>3.1.5</label>
<title>Decision tree (DT)</title>
<p>DT is a supervised ML technique that can be utilised for regression models. This method constructs a predictive model in a tree-like structure based on training data (<xref ref-type="bibr" rid="B18">Dietterich, 2000</xref>). The tree comprises input variables from the dataset represented as internal nodes, decision rules forming the branches, and outputs appearing as leaf nodes. Regression trees fall under a category of ML methods that create estimation models by segmenting the feature space into multiple high-dimensional regions. By partitioning the feature space into <italic>D</italic> distinct regions, denoted as, <italic>R</italic>
<sub>1</sub>,&#x22ef;,<italic>R</italic>
<sub>
<italic>D</italic>
</sub>, the regression problem can be formulated as expressed in <xref ref-type="disp-formula" rid="e8">Equation 8</xref>:<disp-formula id="e8">
<mml:math id="m8">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>D</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>where <italic>c</italic>
<sub>
<italic>d</italic>
</sub> represents the average of the observations.</p>
</sec>
<sec id="s3-1-6">
<label>3.1.6</label>
<title>Random forest (RF)</title>
<p>RF is a ML technique that leverages an ensemble of decision trees. RF integrates the bagging approach with a strategy that selects random subsets of input parameters. In the bagging approach, each decision tree is constructed using a bootstrap sample, which is generated by randomly sampling the training dataset with replacement. This method mitigates overfitting by reducing the dependency on individual trees. Moreover, instead of considering all input parameters, RF uses a randomly chosen subset of them for each tree. As an advanced form of the bagging method, RF combines these features to improve performance. The final output of the model is determined by averaging the predictions of all individual decision trees, as shown in <xref ref-type="disp-formula" rid="e9">Equation 9</xref>:<disp-formula id="e9">
<mml:math id="m9">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>b</mml:mi>
</mml:msubsup>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>b</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>where <italic>B</italic> represents the total number of decision trees, <italic>Y</italic>
<sub>
<italic>b</italic>
</sub> denotes the output of each decision tree, and <italic>X&#x2032;</italic> refers to the unknown instances.</p>
</sec>
<sec id="s3-1-7">
<label>3.1.7</label>
<title>Adaptive boosting (AdaBoost)</title>
<p>The AdaBoost algorithm enhances the predictive accuracy of a model by combining multiple weak learners to create a strong learner (<xref ref-type="bibr" rid="B21">Freund and Schapire, 1997</xref>; <xref ref-type="bibr" rid="B75">Zhang and Ma, 2012</xref>). The structure of the learner is expressed in <xref ref-type="disp-formula" rid="e10">Equation 10</xref>:<disp-formula id="e10">
<mml:math id="m10">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>
</p>
<p>In this approach, samples that were misclassified in a previous step are assigned higher weights in subsequent iterations (<xref ref-type="bibr" rid="B62">Schapire and Singer, 1999</xref>; <xref ref-type="bibr" rid="B61">Schapire, 2013</xref>). The performance of the model is progressively improved by reducing errors at each step. Initially, all samples are assigned equal weights, and the mean square error of the prediction is calculated. Higher weights are then given to samples with larger errors, and the process is repeated until the results converge, as shown in <xref ref-type="disp-formula" rid="e11">Equation 11</xref>:<disp-formula id="e11">
<mml:math id="m11">
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mi>i</mml:mi>
</mml:munder>
</mml:mstyle>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>where <italic>E</italic> (.) represents the error function; <italic>F</italic>
<sub>
<italic>t</italic>-1</sub>(<italic>x</italic>) denotes the learner generated in the preceding step, and <italic>f</italic>
<sub>
<italic>t</italic>
</sub>(<italic>x</italic>) &#x3d; <italic>a</italic>
<sub>t</sub>
<italic>h</italic>(<italic>x</italic>) represents the weak learner contributing to the strong learner. Ultimately, AdaBoost combines these weak learners to construct a robust, unified learner.</p>
</sec>
<sec id="s3-1-8">
<label>3.1.8</label>
<title>Extreme gradient boosting (XGBoost)</title>
<p>XGBoost is a boosting algorithm that improves predictive accuracy by iteratively assigning higher weights to weak learners. The approach combines these weak learners to create a more robust and accurate model. Originally introduced by Chen et al. (<xref ref-type="bibr" rid="B15">Chen et al., 2018</xref>), gradient boosting employs the gradient vector of the misfit function to build a regression model, akin to gradient-descent methods. XGBoost represents a parallelized tree-based implementation of gradient boosting, where parameters are iteratively adjusted to minimize the residuals from the previous step (<xref ref-type="bibr" rid="B14">Chen and Guestrin, 2016</xref>). XGBoost incorporates regularization into both its objective function and loss function to prevent overfitting. The objective function for XGBoost is expressed in <xref ref-type="disp-formula" rid="e12">Equation 12</xref>:<disp-formula id="e12">
<mml:math id="m12">
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x2322;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>k</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mi>&#x3c9;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>where <italic>L</italic> represents the loss function that measures model bias, while <italic>&#x3c9;</italic> denotes the regularization term designed to reduce model complexity.</p>
</sec>
<sec id="s3-1-9">
<label>3.1.9</label>
<title>Category boosting (CatBoost)</title>
<p>CatBoost (<xref ref-type="bibr" rid="B19">Dorogush Veronika et al., 2018</xref>) introduces a novel gradient boosting technique that specifically addresses categorical input parameters. This approach employs symmetric decision trees to improve the efficiency of the inference process when using pre-trained weak learning models. CatBoost excels in delivering high performance, especially when handling highly noisy data with diverse characteristics and intricate relationships. To accomplish this, CatBoost randomly orders all instances and assigns values to categorical features. Priority weight coefficients and a priority factor are utilised to reduce the influence of low-frequency category instances and noise on data distribution. This process can be represented as expressed in <xref ref-type="disp-formula" rid="e13">Equation 13</xref>:<disp-formula id="e13">
<mml:math id="m13">
<mml:mrow>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>k</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mi>j</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mi>k</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msub>
<mml:mo>.</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b2;</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mi>k</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(13)</label>
</disp-formula>where <italic>p</italic> represents a prior value, and <italic>&#x3b2;</italic> denotes its corresponding weight.</p>
</sec>
<sec id="s3-1-10">
<label>3.1.10</label>
<title>Light gradient boosting machine (LightGBM)</title>
<p>LightGBM (<xref ref-type="bibr" rid="B28">Ke et al., 2017</xref>) is a decision tree-based algorithm that employs a leaf-wise growth strategy rather than the traditional depth-wise approach, leading to improved accuracy in more complex tree structures. It introduces two novel techniques, Exclusive Feature Bundling and Gradient-based One-Side Sampling, which enhance its efficiency and distinguish it from other gradient-boosted decision tree methods. The fundamental principle of LightGBM is to integrate <italic>M</italic> weak regression trees into a single, more powerful model, mathematically expressed in <xref ref-type="disp-formula" rid="e14">Equation 14</xref>:<disp-formula id="e14">
<mml:math id="m14">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>M</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(14)</label>
</disp-formula>where <italic>f</italic>
<sub>
<italic>m</italic>
</sub>(<italic>x</italic>) and <italic>F</italic>(<italic>x</italic>) denote the output of the <italic>m</italic>th weak regression tree and the final model output, respectively. The leaf-wise strategy with depth constraints and the histogram-based technique are key advancements that improve the efficiency and accuracy of the LightGBM algorithm.</p>
</sec>
</sec>
<sec id="s3-2">
<label>3.2</label>
<title>Performance indexes</title>
<p>This study utilizes three performance metrics: the root mean square error (<italic>RMSE</italic>), the coefficient of determination (<italic>R</italic>
<sup>2</sup>), and the <italic>a</italic>
<sub>20</sub> <italic>index</italic>. The equations for <italic>RMSE</italic>, <italic>R</italic>
<sup>2</sup>, and <italic>a</italic>
<sub>20</sub> are expressed in <xref ref-type="disp-formula" rid="e15">Equations 15</xref>&#x2013;<xref ref-type="disp-formula" rid="e17">17</xref>, respectively:<disp-formula id="e15">
<mml:math id="m15">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>M</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:math>
<label>(15)</label>
</disp-formula>
<disp-formula id="e16">
<mml:math id="m16">
<mml:mrow>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>Y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(16)</label>
</disp-formula>
<disp-formula id="e17">
<mml:math id="m17">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mn>20</mml:mn>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>x</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mn>20</mml:mn>
</mml:mrow>
<mml:mi>M</mml:mi>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(17)</label>
</disp-formula>where <italic>Y</italic>
<sub>
<italic>i</italic>
</sub> and <italic>Y</italic>
<sub>
<italic>pi</italic>
</sub> represent the actual and predicted values for the <italic>i</italic>th observation, respectively; <italic>N</italic>
<sub>
<italic>t</italic>
</sub> denotes the number of testing models, and <italic>Y&#x305;</italic>
<sub>
<italic>pi</italic>
</sub> is the mean of the predicted values. The optimal ML model is characterized by the lowest <italic>RMSE</italic> and the highest <italic>R</italic>
<sup>2</sup> and <italic>a</italic>
<sub>20</sub> index values. The parameter <italic>M</italic> denotes the total number of samples in the dataset, while <italic>m</italic>20 refers to the number of samples for which the ratio of the observed value to the predicted value lies within the range of 0.80&#x2013;1.20. This range indicates a &#xb1;20% tolerance, commonly accepted in engineering practices as a benchmark for acceptable prediction accuracy. The <italic>a</italic>
<sub>20</sub>, defined as the ratio <italic>m</italic>20/<italic>M</italic>, therefore provides an intuitive and practical measure of a model&#x2019;s predictive performance (<xref ref-type="bibr" rid="B10">Asteris et al., 2024a</xref>; <xref ref-type="bibr" rid="B11">Asteris et al., 2024b</xref>). In the case of a perfectly accurate model, the <italic>a</italic>
<sub>20</sub> would equal 1, indicating that all predictions fall within the acceptable range. The value of the proposed <italic>a</italic>
<sub>20</sub> lies in its interpretability and relevance to engineering applications, as it directly reflects the proportion of predictions that closely match the corresponding experimental results within a tolerable margin of error.</p>
</sec>
<sec id="s3-3">
<label>3.3</label>
<title>Hyperparameters tuning</title>
<p>Once the dataset has been prepared and the ML technique selected, the next crucial step is defining the parameters of the model, which significantly influence its performance. In this study, hyperparameter optimization is carried out using a combination of grid search and <italic>K</italic>-fold CV to minimise the risk of overfitting. Initially, potential parameter ranges are defined as grids. The model is then trained iteratively, testing all possible parameter combinations, with performance evaluated using <italic>K</italic>-fold CV. This approach ensures a reliable assessment of predictive accuracy while reducing bias from the random division of training and testing data. The dataset is split into <italic>K</italic> equally sized subsets, and the model undergoes <italic>K</italic> iterations, where <italic>K</italic>&#x2212;1 subsets are used for training and the remaining subset for validation. The final model performance is determined by averaging the results across all <italic>K</italic> iterations. In this study, a 10-fold CV approach is implemented, as it is a widely recognised standard that partitions the data into ten groups, effectively reducing the risk of overfitting.</p>
</sec>
<sec id="s3-4">
<label>3.4</label>
<title>Sensitivity analysis (SA)</title>
<p>
<xref ref-type="bibr" rid="B58">Saltelli et al. (2008)</xref> proposed a variance-based SA method to evaluate how changes in model input values affect the corresponding output. This technique examines interactions between input variables and the output factor by keeping all input parameters constant except for one, which is systematically varied (<xref ref-type="bibr" rid="B41">Liu et al., 2020</xref>).</p>
<sec id="s3-4-1">
<label>3.4.1</label>
<title>First-order sensitivity indices</title>
<p>The measurement vector in a multivariate <italic>k</italic>-input model can be expressed as <italic>y</italic> &#x3d; <italic>f</italic> (<italic>x</italic>
<sub>1</sub>, <italic>x</italic>
<sub>2</sub>, &#x2026; , <italic>x</italic>
<sub>
<italic>k</italic>
</sub>). The first-order index is determined using the <xref ref-type="disp-formula" rid="e18">Equation 18</xref>:<disp-formula id="e18">
<mml:math id="m18">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="" close="|" separators="|">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(18)</label>
</disp-formula>where <inline-formula id="inf1">
<mml:math id="m19">
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="" close="|" separators="|">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> quantifies the influence of the variable <italic>x</italic>
<sub>
<italic>i</italic>
</sub> on the output. When <italic>x</italic>
<sub>
<italic>i</italic>&#x200b;</sub> is held constant, <inline-formula id="inf2">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="" close="|" separators="|">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the variance of the mean value <italic>E</italic>(<italic>y</italic>), while <italic>V</italic>(<italic>y</italic>) denotes the unconditional variance of <italic>y</italic>.</p>
</sec>
<sec id="s3-4-2">
<label>3.4.2</label>
<title>Total-effect sensitivity indices</title>
<p>To capture the full variance of the output, higher-order indices for coupling terms must be considered, as the first-order index only accounts for the portion of output variation attributable to the variance of the input variable <italic>x</italic>
<sub>
<italic>i</italic>&#x200b;</sub>. Consequently, the total-effect index <italic>S</italic>
<sub>
<italic>T</italic>&#x200b;</sub> is employed to quantify the overall influence of <italic>x</italic>
<sub>
<italic>i</italic>
</sub> on the output variance. The total-effect index is defined as expressed in <xref ref-type="disp-formula" rid="e19">Equation 19</xref>:<disp-formula id="e19">
<mml:math id="m21">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:msub>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mo>&#x223c;</mml:mo>
</mml:msub>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="" close="|" separators="|">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(19)</label>
</disp-formula>where <inline-formula id="inf3">
<mml:math id="m22">
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="" close="|" separators="|">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf4">
<mml:math id="m23">
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="" close="|" separators="|">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represent the mean value of <italic>y</italic> when all variables except <italic>x</italic>
<sub>
<italic>i</italic>
</sub> are fixed and its variance, respectively. It is important to note that the difference between <italic>S</italic>
<sub>
<italic>i</italic>
</sub> and <italic>S</italic>
<sub>
<italic>T</italic>
</sub>&#x200b; reflects the interactions of <italic>x</italic>
<sub>
<italic>i</italic>
</sub> with other input variables.</p>
</sec>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Interpretable ML approaches</title>
<p>ML has transformed several disciplines, including structural engineering, by providing predictive models capable of addressing complex relationships and analysing large datasets (<xref ref-type="bibr" rid="B32">Kookalani et al., 2021</xref>; <xref ref-type="bibr" rid="B34">Kookalani et al., 2022b</xref>; <xref ref-type="bibr" rid="B35">Kookalani et al., 2022c</xref>; <xref ref-type="bibr" rid="B31">Kookalani and Cheng, 2021</xref>; <xref ref-type="bibr" rid="B70">Xiang et al., 2020a</xref>; <xref ref-type="bibr" rid="B71">Xiang et al., 2020b</xref>; <xref ref-type="bibr" rid="B72">Xiang et al., 2021</xref>; <xref ref-type="bibr" rid="B36">Kookalani et al., 2024</xref>). While these ML models can deliver accurate predictions, they often function as &#x201c;black boxes,&#x201d; lacking transparency in explaining the underlying mechanical or physical principles. This limitation can undermine the credibility of the ML models. To address this challenge, the current study employs three interpretability techniques, including PDP, ALE, and SHAP. Feature significance is assessed by evaluating the increase in prediction error resulting from alterations to factor values. A feature is deemed significant if the error rises substantially after modification, whereas minimal error change indicates low significance.</p>
<sec id="s4-1">
<label>4.1</label>
<title>Partial dependence plot (PDP)</title>
<p>
<xref ref-type="bibr" rid="B22">Friedman (2001)</xref> introduced the PDP to examine the marginal effect of a specific parameter on the output by depicting the average outcome values across different parameter settings. The PDP is useful for identifying the relationship between a feature and the target variable. The partial dependence, represented as <italic>f</italic>
<sub>
<italic>S</italic>
</sub>, for a subset of features <italic>x</italic>
<sub>
<italic>S</italic>
</sub>, is defined as expressed in <xref ref-type="disp-formula" rid="e20">Equation 20</xref>:<disp-formula id="e20">
<mml:math id="m24">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>C</mml:mi>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>C</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x222b;</mml:mo>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>C</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>C</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(20)</label>
</disp-formula>where <italic>x</italic>
<sub>
<italic>S</italic>
</sub> represents the factors considered for the PDP, while <italic>x</italic>
<sub>
<italic>C</italic>
</sub> denotes the complementary factors. A PDP can be generated for a dataset {<italic>X</italic>
<sub>
<italic>i</italic>
</sub>
<italic>, i</italic> &#x3d; 1<italic>, &#x2026; ,n</italic>}, as expressed in <xref ref-type="disp-formula" rid="e21">Equation 21</xref>:<disp-formula id="e21">
<mml:math id="m25">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>f</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(21)</label>
</disp-formula>
</p>
<p>A critical assumption of PDP is the independence of input parameters. When features are highly correlated, the analysis may involve artificial data samples that are unrealistic, resulting in significant bias in the estimated effect of the feature.</p>
</sec>
<sec id="s4-2">
<label>4.2</label>
<title>Accumulated local effects (ALE)</title>
<p>ALE is an unbiased alternative to PDP that represents the average effect of a feature within a ML algorithm (<xref ref-type="bibr" rid="B8">Apley and Zhu, 2020</xref>). Unlike PDP, which averages <italic>f</italic>
<sub>
<italic>S</italic>
</sub> (<italic>x</italic>
<sub>
<italic>S</italic>
</sub>) over all features and can produce biased results when features are highly correlated, ALE mitigates this issue by focusing on changes in predictions and constraining data to specific grids. ALE averages the changes in predictions by isolating the influence of correlated features, simplifying complex models by considering only one or two factors at a time. ALE can be expressed as <xref ref-type="disp-formula" rid="e22">Equation 22</xref>:<disp-formula id="e22">
<mml:math id="m26">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>A</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x222b;</mml:mo>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
</mml:msubsup>
</mml:mstyle>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mo>/</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>f</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>S</mml:mi>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>C</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="" separators="|">
<mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>d</mml:mi>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>c</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x222b;</mml:mo>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
</mml:msubsup>
</mml:mstyle>
<mml:mstyle displaystyle="true">
<mml:msub>
<mml:mo>&#x222b;</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>C</mml:mi>
</mml:msub>
</mml:msub>
</mml:mstyle>
<mml:msup>
<mml:mover accent="true">
<mml:mi>f</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>S</mml:mi>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>C</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>C</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="|" close="" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>d</mml:mi>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>C</mml:mi>
</mml:msub>
<mml:mi>d</mml:mi>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
<label>(22)</label>
</disp-formula>
</p>
<p>where <italic>c</italic> is a fixed constant, <inline-formula id="inf5">
<mml:math id="m27">
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>f</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>S</mml:mi>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>C</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
<mml:mover accent="true">
<mml:mi>f</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>C</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula> denotes the local effects of <italic>x</italic>
<sub>
<italic>S</italic>
</sub> on <inline-formula id="inf6">
<mml:math id="m28">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>f</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> at (<italic>x</italic>
<sub>1</sub>, <italic>x</italic>
<sub>
<italic>S</italic>
</sub>); <italic>z</italic>
<sub>0,1</sub> represents a value smaller than the minimum observation, and <italic>P</italic> (<italic>x</italic>
<sub>
<italic>C</italic>
</sub>&#x7c;<italic>x</italic>
<sub>
<italic>S</italic>
</sub>) indicates the density. ALE plots are centred around zero, making them particularly effective for correlated data and enhancing visualization. However, the choice of grid or interval can influence the plots and obscure data variability.</p>
</sec>
<sec id="s4-3">
<label>4.3</label>
<title>Shapley additive explanations (SHAP)</title>
<p>
<xref ref-type="bibr" rid="B44">Lundberg and Lee (2017)</xref> introduced the SHAP method, a technique grounded in conditional expectations and game theory, for assessing model predictions. SHAP is used to analyse the influence of individual input features on each output. Broadly, SHAP helps rank features based on their contribution to interaction effects. SHAP employs an additive feature attribution approach to create an interpretable model. The resulting model, represented as a linear function, is the sum of the actual contributions associated with each parameter. The interpretable framework is formulated as expressed in <xref ref-type="disp-formula" rid="e23">Equation 23</xref>:<disp-formula id="e23">
<mml:math id="m29">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>g</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>M</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(23)</label>
</disp-formula>where <italic>x</italic> &#x3d; (<italic>x</italic>
<sub>1</sub>, <italic>x</italic>
<sub>2</sub>, &#x2026; , <italic>x</italic>
<sub>
<italic>p</italic>
</sub>) represents the <italic>M</italic> input variables; and <italic>x&#x2019;</italic>
<sub>I</sub> denotes the simplified inputs. Using a mapping function <italic>x</italic> &#x3d; <italic>hx</italic> (<italic>x</italic>&#x2019;), the parameter <italic>x</italic>&#x2019; is transformed into <italic>x</italic>. Additionally, <italic>&#x3d5;</italic>
<sub>0</sub> and <italic>&#x3d5;</italic>
<sub>
<italic>i</italic>
</sub> represent a fixed value and the contribution of each parameter, respectively.</p>
</sec>
<sec id="s4-4">
<label>4.4</label>
<title>Comparative evaluation of interpretability techniques</title>
<p>PDP, ALE, and SHAP each offer distinct strengths in interpreting complex ML models, as presented in <xref ref-type="table" rid="T1">Table 1</xref>. PDP is easy to understand and effective for identifying overall trends but may misrepresent effects when features are correlated. ALE addresses this by providing unbiased local effects even with correlated inputs, though its accuracy depends on appropriate interval selection. SHAP offers the most comprehensive insights, attributing both global and local feature importance while capturing interaction effects, albeit at a higher computational cost. Their combined use provides a robust interpretability framework that balances simplicity, accuracy, and depth of insight.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Comparative summary of interpretability techniques.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Technique</th>
<th align="center">Advantages</th>
<th align="center">Limitations</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">PDP</td>
<td align="center">Simple and intuitive; shows global trends</td>
<td align="center">Assumes feature independence; can mislead with correlated variables</td>
</tr>
<tr>
<td align="center">ALE</td>
<td align="center">Handles correlated features; less biased than PDP</td>
<td align="center">Sensitive to interval size; offers less intuitive visuals than PDP</td>
</tr>
<tr>
<td align="center">SHAP</td>
<td align="center">Provides local and global insights; accounts for feature interactions; consistent</td>
<td align="center">Computationally intensive; complex to implement for large datasets</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s5">
<label>5</label>
<title>Numerical examples</title>
<p>Shear connections in steel structures are generally designed to transfer only shear forces, with no bending moment. However, while this is generally true, there are instances where a bending moment is also present in the shear connection, which can become a critical factor in determining its dimensions. <xref ref-type="fig" rid="F1">Figure 1</xref> shows two samples of this type of connection.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Shear connection in steel structures: <bold>(a)</bold> Simple shear connection; <bold>(b)</bold> large plate shear connection of the I-beam to the column (<xref ref-type="bibr" rid="B78">Kalab, 2023</xref>).</p>
</caption>
<graphic xlink:href="fbuil-12-1753382-g001.tif">
<alt-text content-type="machine-generated">Illustrations of two beam-to-column connections. (a) Shows HEB 140 connected to IPE 160 with two M12 bolts, indicating the line of shear transfer and moment force M. (b) Shows HEB 260 connected to IPE 400 with ten M20 bolts, indicating shear forces Vz along e1 and e2, with the line of shear transfer and moment force M.</alt-text>
</graphic>
</fig>
<p>This study involved the careful assembly of a comprehensive dataset comprising 443 samples extracted from existing literature (<xref ref-type="bibr" rid="B46">Lyu et al., 2020a</xref>; <xref ref-type="bibr" rid="B54">Mo&#x17e;e and Beg, 2014</xref>; <xref ref-type="bibr" rid="B53">Mo&#x17e;e and Beg, 2010</xref>; <xref ref-type="bibr" rid="B69">Wang et al., 2017</xref>; <xref ref-type="bibr" rid="B1">Ahmed and Teh, 2019</xref>; <xref ref-type="bibr" rid="B56">Rex and Easterling, 2003</xref>; <xref ref-type="bibr" rid="B30">Kim and Yura, 1999</xref>; <xref ref-type="bibr" rid="B73">Yang et al., 2013</xref>; <xref ref-type="bibr" rid="B26">Hai et al., 2019</xref>; <xref ref-type="bibr" rid="B51">Moe and Beg, 2011</xref>; <xref ref-type="bibr" rid="B29">Kim and Lee, 2020</xref>; <xref ref-type="bibr" rid="B47">Lyu et al., 2020b</xref>; <xref ref-type="bibr" rid="B20">Freitas et al., 2005</xref>; <xref ref-type="bibr" rid="B25">Guo et al., 2020</xref>; <xref ref-type="bibr" rid="B55">Puthli and Fleischer, 2001</xref>; <xref ref-type="bibr" rid="B52">Mo&#x17e;e, 2018</xref>). The input data used in the analysis comprise six distinct features that, according to established design standards and experimental studies, govern bearing and shear-out behavior in bolted connections. Each feature provides critical insight into the mechanical response and characteristics of the system. These features include end distance (<italic>e</italic>
<sub>1</sub>), edge distance (<italic>e</italic>
<sub>2</sub>), pitch parallel to loading (<italic>p</italic>
<sub>1</sub>), pitch perpendicular to loading (<italic>p</italic>
<sub>2</sub>), the number of bolt rows (<italic>N</italic>
<sub>
<italic>r</italic>
</sub>), and stress ratio (<italic>fu</italic>/<italic>fy</italic>). Consistent with conventional design formulations, geometric quantities are expressed as normalized ratios, <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>, <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>, <italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> and <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>, where <italic>d</italic>
<sub>0</sub> is the bolt-hole diameter, while <italic>fu</italic>/<italic>fy</italic> is already dimensionless. This choice ensures that the model learns from physically meaningful, scale-independent variables that govern bearing capacity across different bolt diameters. A visual representation of these features is provided in <xref ref-type="fig" rid="F2">Figure 2</xref>.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Input parameters (<xref ref-type="bibr" rid="B74">Zakir et al., 2022</xref>).</p>
</caption>
<graphic xlink:href="fbuil-12-1753382-g002.tif">
<alt-text content-type="machine-generated">Diagram of a metal plate with four holes arranged in two rows. Labels indicate &#x22;Load&#x22; as an upward arrow, &#x22;Row&#x22; near the bottom holes, and dimensions marked as \( p_1, p_2, e_1, e_2, t \). Hole diameters are labeled \( d \) and \( d_0 \).</alt-text>
</graphic>
</fig>
<p>The key output parameter, represented as <italic>f</italic>(<italic>x</italic>) &#x3d; <italic>F</italic>
<sub>max</sub>
<italic>/f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic>, corresponds to the normalized bearing capacity, <italic>F</italic>
<sub>max</sub> is the maximum measured load in the test, <italic>f</italic>
<sub>
<italic>u</italic>
</sub> is the ultimate tensile strength of the connected plate, <italic>n</italic> is the total number of bolts in the connection, <italic>d</italic> is the bolt-hole diameter, and <italic>t</italic> is the thickness of the critical plate. This normalization enables standardized comparisons across various scenarios and conditions, ensuring more consistent and reliable analyses. This study utilizes three performance metrics: the <italic>RMSE</italic>, expressed in terms of the normalized bearing capacity (<italic>F</italic>
<sub>max</sub>
<italic>/f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic>), <italic>R</italic>
<sup>2</sup>, and <italic>a</italic>
<sub>20</sub> index. As <italic>F</italic>
<sub>max</sub>
<italic>/f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> is dimensionless, the <italic>RMSE</italic> values reported in this study are also dimensionless. <xref ref-type="table" rid="T2">Table 2</xref> outlines the detailed ranges for each input parameter, ensuring that the data utilized for the analysis adheres to the specified limits and criteria.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Statistical attributes of dataset.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Attribute</th>
<th align="center">Attribute</th>
<th align="center">Minimum</th>
<th align="center">Maximum</th>
<th align="center">Average</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Normalized end distance</td>
<td align="center">
<italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>
</td>
<td align="center">0.8</td>
<td align="center">5</td>
<td align="center">2.9</td>
</tr>
<tr>
<td align="left">Normalized edge distance</td>
<td align="center">
<italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>
</td>
<td align="center">0.8</td>
<td align="center">7.5</td>
<td align="center">4.15</td>
</tr>
<tr>
<td align="left">Normalized pitch parallel to loading</td>
<td align="center">
<italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>
</td>
<td align="center">0</td>
<td align="center">4</td>
<td align="center">2</td>
</tr>
<tr>
<td align="left">Normalized pitch perpendicular to loading</td>
<td align="center">
<italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>
</td>
<td align="center">0</td>
<td align="center">8.4</td>
<td align="center">4.2</td>
</tr>
<tr>
<td align="left">Number of bolt rows</td>
<td align="center">
<italic>N</italic>
<sub>
<italic>r</italic>
</sub>
</td>
<td align="center">1</td>
<td align="center">4</td>
<td align="center">2.5</td>
</tr>
<tr>
<td align="left">Ultimate to yield stress ratio of the critical steel plate</td>
<td align="center">
<italic>fu</italic>/<italic>fy</italic>
</td>
<td align="center">1.04</td>
<td align="center">1.7</td>
<td align="center">1.37</td>
</tr>
<tr>
<td align="left">Normalized bearing capacity</td>
<td align="center">
<italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic>
</td>
<td align="center">0.28</td>
<td align="center">5.44</td>
<td align="center">2.86</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The distribution and coverage of the dataset were examined using the scatterplot matrix shown in <xref ref-type="fig" rid="F3">Figure 3</xref>, which reports histograms of each variable on the diagonal and pairwise scatter plots off the diagonal. The plots indicate that all input features and the normalized bearing capacity span a broad portion of their admissible ranges, with no single narrow interval dominating the sample. While some clustering is visible at practically preferred detailing values (e.g., discrete levels of pitch and bolt rows), the observations are well spread across the joint feature space. In addition, all models are trained and assessed using 10-fold cross-validation with random shuffling, ensuring that each fold contains a representative mix of the feature ranges and reducing the risk of biased performance due to data imbalance.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Scatterplot matrix showing empirical distributions.</p>
</caption>
<graphic xlink:href="fbuil-12-1753382-g003.tif">
<alt-text content-type="machine-generated">Scatter plot matrix displaying relationships between multiple variables, including e1/d0, e2/d0, p1/d0, p2/d0, Nr, fu/y, and Fmax/fundt. Each cell represents scatter plots or histograms of these variable pairs.</alt-text>
</graphic>
</fig>
<p>The range and distribution spread of each variable were examined to assess their variability across the dataset and to critically evaluate the coverage of the input space. This analysis confirmed that the data span a broad and continuous range of values for each input parameter. To further validate the representativeness of the dataset, a sensitivity analysis was conducted to identify and evaluate the most influential input parameters (<xref ref-type="bibr" rid="B60">Sarir et al., 2021b</xref>). Our variance-based global sensitivity analysis revealed balanced contributions from all input variables, supporting the conclusion of comprehensive data coverage. Moreover, interpretability techniques such as PDPs and ALEs demonstrated smooth and interpretable relationships across the entire domain of each variable, indicating that the model had sufficient data to learn from throughout the feature space. These insights confirm the robustness of the dataset and its suitability for training predictive ML models.</p>
<p>A correlation matrix was created to gain a deeper understanding of the relationships and interactions among the input parameters, as shown in <xref ref-type="fig" rid="F4">Figure 4</xref>. Each correlation coefficient within the matrix represents the strength of the interaction between two parameters, providing insight into their interdependencies. The analysis revealed several noteworthy associations. A strong correlation coefficient of 0.84 was identified between the number of bolt rows and the normalized pitch parallel to loading, indicating that an increase in the number of bolt rows tends to correspond with an increase in the pitch parallel to loading. A moderate correlation of 0.46 was found between the number of bolt rows and the normalized edge distance, suggesting a moderate relationship where changes in the number of bolt rows can influence the edge distance to some extent. A correlation of 0.44 was observed between the normalized pitch parallel to loading and the normalized edge distance, indicating a moderate relationship where changes in one may affect the other. No significant correlations were found for the other parameters, suggesting that they exhibit independent behaviour and make distinct contributions to the system being studied.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Correlation matrix for input variables.</p>
</caption>
<graphic xlink:href="fbuil-12-1753382-g004.tif">
<alt-text content-type="machine-generated">Correlation matrix heatmap displaying relationships among six variables: e1/do, e2/do, p1/do, p2/do, Nr, and fu/fy. Color scale ranges from dark blue for negative correlation to yellow for strong positive correlation, with values labeled in each cell.</alt-text>
</graphic>
</fig>
<sec id="s5-1">
<label>5.1</label>
<title>Preprocessing and hyperparameters fine-tuning</title>
<p>Effective ML model development requires rigorous data preprocessing and systematic hyperparameter tuning to ensure robust and generalizable performance. In this study, all input features were normalized with respect to the bolt diameter <italic>d</italic>
<sub>0</sub> or relevant physical properties to ensure comparability across different test cases and avoid scale-related bias in models sensitive to input magnitude. Specifically, geometric inputs such as end distance, edge distance, and pitch dimensions were divided by <italic>d</italic>
<sub>0</sub> to create dimensionless variables. This representation aligns with established bearing and shear-out design formulations and enables the model to generalise across connections with different bolt sizes. From a numerical standpoint, using normalized features mitigates scale-related bias in algorithms that are sensitive to feature magnitude, improving optimization stability and reducing the risk that variables with larger numerical ranges dominate the learning process. If raw, non-normalized geometric dimensions were used instead, the model would be forced to infer the relevant geometric ratios implicitly, potentially reducing generalisation quality across bolt diameters and degrading performance for scale-sensitive algorithms, without offering a clear advantage in predictive accuracy. This standardization improves convergence and stability across ML algorithms. The dataset compiled from 443 experimental results was reviewed for completeness. Since the source data was manually extracted from published studies, any entries with incomplete feature sets were excluded from analysis to avoid bias introduced by imputation. As a result, the dataset used in this study contained no missing values.</p>
<p>Tuning hyperparameters is a crucial process in refining ML models to achieve optimal performance. It entails modifying the settings of the model to obtain the best results for a given dataset. A grid search method is often used for this purpose, where a range of hyperparameter values is systematically explored to find the combination that provides the highest performance. 10-fold CV is incorporated to ensure the selected hyperparameters are reliable and to prevent overfitting. In this technique, the dataset is divided into 10 roughly equal-sized subsets. The model is trained and evaluated 10 times, with each iteration selecting a different subset as the validation set while the remaining subsets are used for training. This approach systematically explored combinations of hyperparameters and assessed their impact on model performance across multiple data splits. The robustness of this methodology lies not only in its breadth of parameter search but also in the multi-metric evaluation strategy used to determine the best-performing models. This approach guarantees that the performance of the model is assessed across various data splits, offering a more dependable measure of its ability to generalize. For each model and parameter combination, two primary metrics were computed: <italic>RMSE</italic> and <italic>R</italic>
<sup>2</sup>. Both metrics were computed for each fold, and their average values across the 10 folds were used to evaluate each hyperparameter set. The following criteria were applied:<list list-type="bullet">
<list-item>
<p>Primary selection metric: The optimal configuration minimized the average <italic>RMSE</italic> across the 10 folds, ensuring minimal prediction error.</p>
</list-item>
<list-item>
<p>Secondary validation metric: The selected configuration was also required to yield a high average <italic>R</italic>
<sup>2</sup>, ensuring that the model not only minimized error but also captured underlying data structure.</p>
</list-item>
<list-item>
<p>Stability criterion: Configurations that exhibited low variance in <italic>RMSE</italic> across folds were favored, indicating model consistency and robustness.</p>
</list-item>
</list>
</p>
<p>Different models were tuned with specific parameter ranges, and selections reflected trade-offs between bias and variance. <xref ref-type="table" rid="T3">Table 3</xref> presents the optimal hyperparameter values for each ML algorithm, which were selected based on their ability to maximize predictive accuracy while balancing bias and variance.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Optimal hyperparameters for <italic>f</italic>(<italic>x</italic>).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Model</th>
<th align="left">Optimal configuration</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">LR</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">RR</td>
<td align="left">Alpha &#x3d; 1</td>
</tr>
<tr>
<td align="left">SVR</td>
<td align="left">Kernel &#x3d; RBF, C &#x3d; 5, degree &#x3d; 1, epsilon &#x3d; 0.1</td>
</tr>
<tr>
<td align="left">KNN</td>
<td align="left">Leaf_size &#x3d; 20, n_neighbors &#x3d; 2, p &#x3d; 1</td>
</tr>
<tr>
<td align="left">DT</td>
<td align="left">Max_depth &#x3d; 6, min_samples_leaf &#x3d; 1, min_samples_split &#x3d; 3, random_state &#x3d; 2</td>
</tr>
<tr>
<td align="left">RF</td>
<td align="left">Max_depth &#x3d; 9, max_features &#x3d; 3, n_estimators &#x3d; 100</td>
</tr>
<tr>
<td align="left">AdaBoost</td>
<td align="left">Learning_rate &#x3d; 1, n_estimators &#x3d; 1,000, random_state &#x3d; 0</td>
</tr>
<tr>
<td align="left">XGBoost</td>
<td align="left">Colsample_bytree &#x3d; 0.5, learning_rate &#x3d; 0.1, max_depth &#x3d; 5, n_estimators &#x3d; 500</td>
</tr>
<tr>
<td align="left">CatBoost</td>
<td align="left">Depth &#x3d; 7, iterations &#x3d; 500, learning_rate &#x3d; 0.1</td>
</tr>
<tr>
<td align="left">LightGBM</td>
<td align="left">Colsample_bytree &#x3d; 0.9, learning_rate &#x3d; 0.1, max_depth &#x3d; 8, n_estimators &#x3d; 1,000</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s5-2">
<label>5.2</label>
<title>Sensitivity analysis</title>
<p>The sensitivity indices for the first-order (<italic>S</italic>
<sub>
<italic>i</italic>
</sub>) and total effect (<italic>S</italic>
<sub>
<italic>T</italic>
</sub>) of the input variables are determined using the regression models, as shown in <xref ref-type="table" rid="T4">Table 4</xref> and <xref ref-type="fig" rid="F5">Figure 5</xref>. These indices illustrate the contributions of the input variables to the variance of the model output. It can be seen that the variables <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> and <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> consistently show high contributions to the variance of the model, especially for <italic>S</italic>
<sub>
<italic>i</italic>
</sub>, across most models. The variable <italic>fu</italic>/<italic>fy</italic> generally has the lowest sensitivity indices, indicating a negligible influence on the variance of the model. The sensitivity of <italic>N</italic>
<sub>
<italic>r</italic>
</sub> varies across models, being significant in some cases (e.g., for SVR and RF) but less so in others. The LR and RR models exhibit similar sensitivity patterns, with <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> and <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> contributing the most to the output variance (<italic>S</italic>
<sub>
<italic>i</italic>
</sub> &#x2248; 0.35&#x2013;0.39). For the SVR model, sensitivity indices are more evenly distributed among variables, with <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> and <italic>N</italic>
<sub>
<italic>r</italic>
</sub> showing significant contributions. In KNN, <italic>S</italic>
<sub>
<italic>T</italic>
</sub> indicates high combined interactions (<italic>S</italic>
<sub>
<italic>T</italic>
</sub> values exceed 1.9). DT shows relatively even sensitivity, with moderate values for <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> and <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>. RF highlights strong contributions from <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>, <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>, and interactions among other variables (<italic>S</italic>
<sub>
<italic>T</italic>
</sub> &#x2248; 1.1). For the AdaBoost model, variables <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> and <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> dominate both <italic>S</italic>
<sub>
<italic>i</italic>
</sub> and <italic>S</italic>
<sub>
<italic>T</italic>
</sub>. XGBoost, CatBoost, and LightGBM generally show high interaction effects, with <italic>S</italic>
<sub>
<italic>T</italic>
</sub> often exceeding 1. The <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> variable consistently has the highest impact. The total variance contribution for first-order indices (&#x2211;<italic>S</italic>
<sub>
<italic>i</italic>
</sub>) is less than or close to 1 in most cases, indicating that individual variables largely explain the model variance. For total effects (&#x2211;<italic>S</italic>
<sub>
<italic>T</italic>
</sub>), values exceed 1 for certain models (e.g., KNN, LightGBM), suggesting significant interaction effects among the input variables.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>First-order (<italic>S</italic>
<sub>
<italic>i</italic>
</sub>) and total effect (<italic>S</italic>
<sub>
<italic>T</italic>
</sub>) sensitivity indices for <italic>f</italic>(<italic>x</italic>).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Model</th>
<th align="left">Sensitivity index</th>
<th align="center">
<italic>e</italic>
<sub>1</sub>
<italic>/d</italic>
<sub>0</sub>
</th>
<th align="center">
<italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>
</th>
<th align="center">
<italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>
</th>
<th align="center">
<italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>
</th>
<th align="center">
<italic>N</italic>
<sub>
<italic>r</italic>
</sub>
</th>
<th align="center">
<italic>fu</italic>/<italic>fy</italic>
</th>
<th align="center">&#x2211;</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="2" align="left">LR</td>
<td align="left">
<italic>S</italic>
<sub>
<italic>i</italic>
</sub>
</td>
<td align="center">0.355365</td>
<td align="center">0.393051</td>
<td align="center">0.002714</td>
<td align="center">0.017332</td>
<td align="center">0.228118</td>
<td align="center">0.007624</td>
<td align="center">1.004204</td>
</tr>
<tr>
<td align="left">
<italic>S</italic>
<sub>
<italic>T</italic>
</sub>
</td>
<td align="center">0.354044</td>
<td align="center">0.394288</td>
<td align="center">0.00286</td>
<td align="center">0.017706</td>
<td align="center">0.227382</td>
<td align="center">0.007668</td>
<td align="center">1.003948</td>
</tr>
<tr>
<td rowspan="2" align="left">RR</td>
<td align="left">
<italic>S</italic>
<sub>
<italic>i</italic>
</sub>
</td>
<td align="center">0.356894</td>
<td align="center">0.395383</td>
<td align="center">0.002182</td>
<td align="center">0.018554</td>
<td align="center">0.224645</td>
<td align="center">0.006471</td>
<td align="center">1.004129</td>
</tr>
<tr>
<td align="left">
<italic>S</italic>
<sub>
<italic>T</italic>
</sub>
</td>
<td align="center">0.355674</td>
<td align="center">0.39668</td>
<td align="center">0.002312</td>
<td align="center">0.018931</td>
<td align="center">0.223881</td>
<td align="center">0.006512</td>
<td align="center">1.00399</td>
</tr>
<tr>
<td rowspan="2" align="left">SVR</td>
<td align="left">
<italic>S</italic>
<sub>
<italic>i</italic>
</sub>
</td>
<td align="center">0.059165</td>
<td align="center">0.154455</td>
<td align="center">0.014107</td>
<td align="center">0.149582</td>
<td align="center">0.178913</td>
<td align="center">0.00158</td>
<td align="center">0.557802</td>
</tr>
<tr>
<td align="left">
<italic>S</italic>
<sub>
<italic>T</italic>
</sub>
</td>
<td align="center">0.307411</td>
<td align="center">0.288294</td>
<td align="center">0.100557</td>
<td align="center">0.495602</td>
<td align="center">0.282168</td>
<td align="center">0.004543</td>
<td align="center">1.478575</td>
</tr>
<tr>
<td rowspan="2" align="left">KNN</td>
<td align="left">
<italic>S</italic>
<sub>
<italic>i</italic>
</sub>
</td>
<td align="center">0.013174</td>
<td align="center">0.229624</td>
<td align="center">0.038107</td>
<td align="center">0.131202</td>
<td align="center">0.025796</td>
<td align="center">0.002347</td>
<td align="center">0.44025</td>
</tr>
<tr>
<td align="left">
<italic>S</italic>
<sub>
<italic>T</italic>
</sub>
</td>
<td align="center">0.350931</td>
<td align="center">0.526861</td>
<td align="center">0.219344</td>
<td align="center">0.543754</td>
<td align="center">0.271965</td>
<td align="center">0.023632</td>
<td align="center">1.936487</td>
</tr>
<tr>
<td rowspan="2" align="left">DT</td>
<td align="left">
<italic>S</italic>
<sub>
<italic>i</italic>
</sub>
</td>
<td align="center">0.104547</td>
<td align="center">0.306555</td>
<td align="center">0.001471</td>
<td align="center">0.000618</td>
<td align="center">0.214867</td>
<td align="center">0.069672</td>
<td align="center">0.69773</td>
</tr>
<tr>
<td align="left">
<italic>S</italic>
<sub>
<italic>T</italic>
</sub>
</td>
<td align="center">0.27974</td>
<td align="center">0.510827</td>
<td align="center">0.012023</td>
<td align="center">0.024494</td>
<td align="center">0.378748</td>
<td align="center">0.200154</td>
<td align="center">1.405986</td>
</tr>
<tr>
<td rowspan="2" align="left">RF</td>
<td align="left">
<italic>S</italic>
<sub>
<italic>i</italic>
</sub>
</td>
<td align="center">0.321323</td>
<td align="center">0.31375</td>
<td align="center">0.060056</td>
<td align="center">0.118243</td>
<td align="center">0.095438</td>
<td align="center">0.002225</td>
<td align="center">0.911035</td>
</tr>
<tr>
<td align="left">
<italic>S</italic>
<sub>
<italic>T</italic>
</sub>
</td>
<td align="center">0.379783</td>
<td align="center">0.348437</td>
<td align="center">0.068097</td>
<td align="center">0.151227</td>
<td align="center">0.119505</td>
<td align="center">0.025398</td>
<td align="center">1.092447</td>
</tr>
<tr>
<td rowspan="2" align="left">AdaBoost</td>
<td align="left">
<italic>S</italic>
<sub>
<italic>i</italic>
</sub>
</td>
<td align="center">0.235724</td>
<td align="center">0.327846</td>
<td align="center">0.026983</td>
<td align="center">0.046607</td>
<td align="center">0.127748</td>
<td align="center">0.00376</td>
<td align="center">0.768668</td>
</tr>
<tr>
<td align="left">
<italic>S</italic>
<sub>
<italic>T</italic>
</sub>
</td>
<td align="center">0.429271</td>
<td align="center">0.407525</td>
<td align="center">0.046982</td>
<td align="center">0.07014</td>
<td align="center">0.275602</td>
<td align="center">0.026335</td>
<td align="center">1.255855</td>
</tr>
<tr>
<td rowspan="2" align="left">XGBoost</td>
<td align="left">
<italic>S</italic>
<sub>
<italic>i</italic>
</sub>
</td>
<td align="center">0.336854</td>
<td align="center">0.358612</td>
<td align="center">0.047908</td>
<td align="center">0.012907</td>
<td align="center">0.070186</td>
<td align="center">0.02074</td>
<td align="center">0.847207</td>
</tr>
<tr>
<td align="left">
<italic>S</italic>
<sub>
<italic>T</italic>
</sub>
</td>
<td align="center">0.407268</td>
<td align="center">0.421556</td>
<td align="center">0.087578</td>
<td align="center">0.021108</td>
<td align="center">0.111595</td>
<td align="center">0.047171</td>
<td align="center">1.096276</td>
</tr>
<tr>
<td rowspan="2" align="left">CatBoost</td>
<td align="left">
<italic>S</italic>
<sub>
<italic>i</italic>
</sub>
</td>
<td align="center">0.166621</td>
<td align="center">0.273095</td>
<td align="center">0.037458</td>
<td align="center">0.168462</td>
<td align="center">0.062072</td>
<td align="center">0.006087</td>
<td align="center">0.713795</td>
</tr>
<tr>
<td align="left">
<italic>S</italic>
<sub>
<italic>T</italic>
</sub>
</td>
<td align="center">0.355354</td>
<td align="center">0.339181</td>
<td align="center">0.10215</td>
<td align="center">0.314361</td>
<td align="center">0.138233</td>
<td align="center">0.059549</td>
<td align="center">1.308828</td>
</tr>
<tr>
<td rowspan="2" align="left">LightGBM</td>
<td align="left">
<italic>S</italic>
<sub>
<italic>i</italic>
</sub>
</td>
<td align="center">0.20592</td>
<td align="center">0.506169</td>
<td align="center">0.015883</td>
<td align="center">0.002926</td>
<td align="center">0.094909</td>
<td align="center">0.010239</td>
<td align="center">0.836046</td>
</tr>
<tr>
<td align="left">
<italic>S</italic>
<sub>
<italic>T</italic>
</sub>
</td>
<td align="center">0.333635</td>
<td align="center">0.600722</td>
<td align="center">0.031425</td>
<td align="center">0.020936</td>
<td align="center">0.143146</td>
<td align="center">0.030892</td>
<td align="center">1.160756</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Sensitivity analysis of <italic>f</italic>(<italic>x</italic>): <bold>(a)</bold> First-order; <bold>(b)</bold> Total-effect.</p>
</caption>
<graphic xlink:href="fbuil-12-1753382-g005.tif">
<alt-text content-type="machine-generated">Two bar charts labeled (a) and (b) compare performance metrics, with a vertical axis from 0 to 1 and categories e1/do, e2/do, p1/do, p2/do, Nr, and fully on the horizontal axis. Multiple methods such as LR, RR, SVM, KNN, DT, RF, Adaboost, XGBOOST, CatBoost, and lightgbm are represented in different colors.</alt-text>
</graphic>
</fig>
<p>In general, this SA is essential for identifying critical inputs and their interactions, guiding feature selection or model improvement strategies. The consistently high indices of <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> and <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> confirm them as primary design drivers, which can therefore be prioritised in the analyses. By contrast, the systematically low first-order and total-effect indices of <italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> indicate that it is a secondary descriptor; it can be fixed or omitted in simplified surrogate models with limited expected impact on predictive performance, although it is retained here for physical completeness. From a model-development standpoint, the pronounced gaps between <italic>S</italic>
<sub>
<italic>i</italic>
</sub> and <italic>S</italic>
<sub>
<italic>T</italic>
</sub> &#x200b; for KNN, RF, XGBoost, CatBoost, and LightGBM confirm the presence of strong interaction effects, justifying the adoption of tree-based ensemble models that can capture higher-order interactions and guiding both the selection of CatBoost as the final surrogate and the design of the two-variable contour analyses.</p>
</sec>
<sec id="s5-3">
<label>5.3</label>
<title>Regression model</title>
<p>
<xref ref-type="fig" rid="F6">Figure 6</xref> presents a set of regression plots comparing predicted and measured values across different ML models. Each subfigure contains four plots:<list list-type="alpha-lower">
<list-item>
<p>Histogram of Residuals: Displays the distribution of residuals (measured-predicted), showing the error spread;</p>
</list-item>
<list-item>
<p>Residuals vs. Predicted Values: A scatterplot highlighting how residuals vary across the predicted range. Red and blue points distinguish between two datasets or conditions;</p>
</list-item>
<list-item>
<p>Predicted vs. Measured Scatterplot: A scatterplot comparing predicted values against actual measured values, with a diagonal line (<italic>y</italic> &#x3d; <italic>x</italic>) as a reference for perfect predictions;</p>
</list-item>
<list-item>
<p>Histogram of Predictions: Displays the distribution of predicted values in relation to the actual measured range.</p>
</list-item>
</list>
</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Regression plot of <italic>f</italic>(<italic>x</italic>): <bold>(a)</bold> LR; <bold>(b)</bold> RR; <bold>(c)</bold> SVR; <bold>(d)</bold> KNN; <bold>(e)</bold> DT; <bold>(f)</bold> RF; <bold>(g)</bold> AdaBoost; <bold>(h)</bold> XGBoost; <bold>(i)</bold> CatBoost; <bold>(j)</bold> LightGBM.</p>
</caption>
<graphic xlink:href="fbuil-12-1753382-g006.tif">
<alt-text content-type="machine-generated">A series of plots labeled from (a) to (j) show the residuals and predicted versus measured values of a dataset. Each plot has three sections: a histogram of residuals, a scatter plot with residuals and a linear fit line, and a histogram of predicted versus measured values. Data points are colored blue for the train set and red for the test set. The plots vary slightly in distribution and spread, indicating different data behaviors in each case.</alt-text>
</graphic>
</fig>
<p>The diagonal line in the predicted vs. measured scatterplot serves as a reference to evaluate prediction accuracy. Points closer to the line indicate better performance. The spread in the residuals vs. predicted values plot reveals whether the model has systematic bias (e.g., over- or under-prediction for certain ranges). Differences in the residual and prediction histograms reflect variations in model accuracy and distribution alignment. The figure highlights how different models behave in terms of prediction accuracy, bias, and variance. Color coding (e.g., red and blue points) differentiates between subsets of data or categories, indicating performance consistency across different data types.</p>
<p>From visual inspection, XGBoost, CatBoost, and LightGBM appear to perform better. The residual histograms for these models are narrower and more centred around 0 compared to others. In the residual vs. predicted plots, no clear patterns or trends are visible, indicating reduced systematic error. The scatterplot of predicted vs. measured values shows that points are closer to the diagonal line, reflecting better prediction accuracy. LR and KNN seem to perform the worst, as they have wider residual distributions and systematic patterns in the residuals vs. predicted plots, suggesting poorer performance.</p>
<p>In general, XGBoost, CatBoost, and LightGBM leverage gradient boosting techniques, which combine the strengths of multiple weak learners (e.g., decision trees) to iteratively minimize error. When properly tuned, these models are robust to overfitting and effectively handle complex relationships and interactions within the data. In contrast, simpler models like LR or KNN may fail to capture non-linear relationships or interactions, leading to poorer performance.</p>
<p>In conclusion, the gradient boosting models (XGBoost, CatBoost, and LightGBM) perform better due to their superior ability to capture complex patterns, resulting in tighter residual distributions, minimal bias, and high alignment between predicted and measured values.</p>
<p>
<xref ref-type="fig" rid="F7">Figure 7</xref> shows Taylor Diagrams used to evaluate and compare the performance of different ML models during (a) the training phase and (b) the testing phase. Taylor diagrams provide a concise summary of how well a model matches observations by plotting three statistics simultaneously:<list list-type="bullet">
<list-item>
<p>Standard Deviation (horizontal axis): This indicates the variability of the predicted data relative to the observed data. Ideally, the standard deviation of the model (represented by the markers) should be close to that of the reference (red star).</p>
</list-item>
<list-item>
<p>Correlation Coefficient (curved axis): This measures the linear relationship between predicted and observed data. Values closer to 1 indicate a strong correlation (good agreement between predicted and observed).</p>
</list-item>
<list-item>
<p>Cantered Root Mean Square Error (<italic>CRMSE</italic>) (distance from the red star): This represents the overall error magnitude between predicted and observed data, excluding bias. Smaller distances imply better performance.</p>
</list-item>
</list>
</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Taylor Diagram of ML models: <bold>(a)</bold> training; <bold>(b)</bold> testing.</p>
</caption>
<graphic xlink:href="fbuil-12-1753382-g007.tif">
<alt-text content-type="machine-generated">Two correlation circles are shown, labeled (a) and (b), each plotting machine learning models with axes for standard deviation and correlation. Models are marked with colored circles and a key identifies each model: LR, RR, kNN, SVR, DT, RF, XG, ADA, LGBM, and CAT. The reference point is marked with a star.</alt-text>
</graphic>
</fig>
<p>
<xref ref-type="fig" rid="F7">Figure 7a</xref> shows the training phase. Most models cluster near the reference, indicating high correlation and standard deviation close to the reference during training. Certain models, such as XGBoost and LightGBM, performed better by showing higher correlation and smaller <italic>CRMSE</italic> compared to others. <xref ref-type="fig" rid="F7">Figure 7b</xref> presents the testing phase. The models show more variation in their correlation and standard deviation compared to the training phase. The scatter suggests that some models generalize better to unseen data (higher correlation and smaller <italic>CRMSE</italic>), while others may overfit the training data. It can be seen that the CatBoost model outperforms the others, as it maintains relatively high correlation coefficients compared to the other models, indicating good generalization on unseen data. Its standard deviation remains closer to the reference compared to the other models, and its <italic>CRMSE</italic> value (distance to the red star) is smaller than those of the other models, suggesting lower prediction errors on the testing data.</p>
<p>
<xref ref-type="table" rid="T5">Table 5</xref> evaluates the performance of various ML-based regression models for predicting <italic>f</italic>(<italic>x</italic>). The metrics used to compare the models are Average <italic>R</italic>
<sup>2</sup>, Average <italic>RMSE</italic>, and <italic>a</italic>
<sub>20</sub> index. Both LR and RR have low <italic>R</italic>
<sup>2</sup> values (&#x223c;0.286&#x2013;0.320), indicating poor explanatory power. The <italic>RMSE</italic> is relatively high (0.554&#x2013;0.556), suggesting less accurate predictions. Both SVR and KNN show significantly higher <italic>R</italic>
<sup>2</sup> values (&#x223c;0.685&#x2013;0.686), indicating better predictive accuracy. KNN has the lowest <italic>RMSE</italic> (0.334), suggesting it provides the most precise predictions among the models. DT presents moderate performance with <italic>R</italic>
<sup>2</sup> &#x3d; 0.320, <italic>RMSE</italic> &#x3d; 0.526, and <italic>a</italic>
<sub>20</sub> &#x3d; 0.805. RF shows better performance (<italic>R</italic>
<sup>2</sup> &#x3d; 0.703, <italic>RMSE</italic> &#x3d; 0.363, and <italic>a</italic>
<sub>20</sub> &#x3d; 0.827). CatBoost has the highest <italic>R</italic>
<sup>2</sup> (0.711), showing the best explanatory power among all models. XGBoost and CatBoost have relatively low <italic>RMSE</italic> values (0.359), indicating good predictive precision. AdaBoost and LightGBM perform moderately, with lower <italic>R</italic>
<sup>2</sup> values and higher <italic>RMSE</italic> compared to CatBoost and XGBoost. As a result, CatBoost has the highest <italic>R</italic>
<sup>2</sup> (0.711), indicating it explains the target variable variance the best.</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Performance of ML based regression models for <italic>f</italic>(<italic>x</italic>) prediction.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Regression method</th>
<th align="center">Average <italic>R</italic>
<sup>2</sup>
</th>
<th align="center">Average <italic>RMSE</italic>
</th>
<th align="center">
<italic>a</italic>
<sub>20</sub>
</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">LR</td>
<td align="center">0.286</td>
<td align="center">0.554</td>
<td align="center">0.511</td>
</tr>
<tr>
<td align="center">RR</td>
<td align="center">0.320</td>
<td align="center">0.556</td>
<td align="center">0.511</td>
</tr>
<tr>
<td align="center">SVR</td>
<td align="center">0.686</td>
<td align="center">0.362</td>
<td align="center">0.496</td>
</tr>
<tr>
<td align="center">KNN</td>
<td align="center">0.685</td>
<td align="center">0.360</td>
<td align="center">0.759</td>
</tr>
<tr>
<td align="center">DT</td>
<td align="center">0.320</td>
<td align="center">0.526</td>
<td align="center">0.805</td>
</tr>
<tr>
<td align="center">RF</td>
<td align="center">0.703</td>
<td align="center">0.363</td>
<td align="center">0.827</td>
</tr>
<tr>
<td align="center">AdaBoost</td>
<td align="center">0.522</td>
<td align="center">0.454</td>
<td align="center">0.617</td>
</tr>
<tr>
<td align="center">XGBoost</td>
<td align="center">0.688</td>
<td align="center">0.359</td>
<td align="center">0.857</td>
</tr>
<tr>
<td align="center">CatBoost</td>
<td align="center">
<bold>0.711</bold>
</td>
<td align="center">
<bold>0.359</bold>
</td>
<td align="center">
<bold>0.872</bold>
</td>
</tr>
<tr>
<td align="center">LightGBM</td>
<td align="center">0.573</td>
<td align="center">0.393</td>
<td align="center">0.835</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold values indicate the best performance.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Among all models, the CatBoost model achieves the highest average <italic>R</italic>
<sup>2</sup> and one of the lowest <italic>RMSE</italic> values. Given that the normalised bearing capacity ranges from 0.28 to 5.44 with an average of 2.86, this <italic>RMSE</italic> corresponds to an error of approximately 12%&#x2013;13% of the mean response and about 7% of the full range. Furthermore, the <italic>a</italic>
<sub>20</sub> index of 0.872 indicates that around 87% of predictions lie within &#xb1;20% of the experimental values. Although this does not eliminate variability, it represents a substantial improvement over simple linear models and is comparable to the scatter typically observed between design-code predictions and test results for bolted connections. On this basis, CatBoost is selected as the reference surrogate model for the interpretability and parametric analyses in the following sections, with the understanding that it is intended as a complementary, not stand-alone, design tool.</p>
</sec>
</sec>
<sec id="s6">
<label>6</label>
<title>Interpretable methods</title>
<p>
<xref ref-type="fig" rid="F8">Figure 8</xref> displays the feature importance derived from a CatBoost model. The bar chart shows the relative importance of features, ranked in descending order of their contribution to model predictions. The features and their corresponding relative importance values are as follows:<list list-type="bullet">
<list-item>
<p>
<italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>: The most important feature, with the highest relative importance of approximately 35%.</p>
</list-item>
<list-item>
<p>
<italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>: The second most important feature, slightly lower than <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>, with a relative importance of around 30%.</p>
</list-item>
<list-item>
<p>
<italic>N</italic>
<sub>
<italic>r</italic>
</sub>: The third feature, with a relative importance close to 20%.</p>
</list-item>
<list-item>
<p>
<italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>: A moderately significant feature, with a relative importance of about 10%.</p>
</list-item>
<list-item>
<p>
<italic>fu</italic>/<italic>fy</italic>: A less significant feature, with a relative importance around 9%.</p>
</list-item>
<list-item>
<p>
<italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>: The least significant feature, with a relative importance of about 8%.</p>
</list-item>
</list>
</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>CatBoost feature importance.</p>
</caption>
<graphic xlink:href="fbuil-12-1753382-g008.tif">
<alt-text content-type="machine-generated">Bar chart titled &#x22;Feature Importances&#x22; displaying the relative importance of various features. The features are listed on the y-axis: e1/do, e2/do, Nr, p1/do, fu/fy, and p2/do. The x-axis shows the range from zero to thirty-five. Feature e1/do has the highest importance, followed by e2/do, Nr, p1/do, fu/fy, and p2/do.</alt-text>
</graphic>
</fig>
<p>The chart effectively highlights the varying contributions of these features, helping to identify which factors most strongly influence the performance of the model. This insight can guide further analysis or refinement of the predictive model.</p>
<sec id="s6-1">
<label>6.1</label>
<title>Partial dependence plot</title>
<p>PDPs illustrate the relationship between a specific feature and the predicted outcome of a ML model, while keeping all other features fixed, as shown in <xref ref-type="fig" rid="F9">Figure 9</xref>. Each subplot corresponds to a different input variable affecting the output of the model. The <italic>x</italic>-axis represents the values of the specific input variables, and the <italic>y</italic>-axis represents the Partial Dependence, or how the predictions of the model vary with changes in that specific variable. The black ticks indicate the distribution of data points (or unique values) for that variable in the training dataset. Denser areas suggest more frequent values in the data.</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>PDP of input variables for <italic>f</italic>(<italic>x</italic>).</p>
</caption>
<graphic xlink:href="fbuil-12-1753382-g009.tif">
<alt-text content-type="machine-generated">Six line graphs display partial dependence plots for different variables: e1/d0, e2/d0, p1/d0, p2/d0, Nr, and fu/fy. Each graph shows variations in partial dependence, with unique trends for each variable. The y-axis represents partial dependence, while the x-axis represents the respective variable&#x27;s normalized value.</alt-text>
</graphic>
</fig>
<p>The observations for each variable are as follows:<list list-type="bullet">
<list-item>
<p>
<italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>: Partial dependence increases almost monotonically, indicating a positive relationship between <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> and the model output.</p>
</list-item>
<list-item>
<p>
<italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>: A general upward trend is observed, although there are some fluctuations, suggesting a positive but slightly nonlinear relationship.</p>
</list-item>
<list-item>
<p>
<italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>: A U-shaped relationship appears, indicating that intermediate values might negatively impact the prediction, while both higher and lower values increase the model output.</p>
</list-item>
<list-item>
<p>
<italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>: A similar U-shape is observed, but with less fluctuation compared to <italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>, as indicated by the smaller vertical range of the partial dependence values.</p>
</list-item>
<list-item>
<p>
<italic>N</italic>
<sub>
<italic>r</italic>
</sub>: The dependence decreases linearly, suggesting a strong negative relationship with the output.</p>
</list-item>
<list-item>
<p>
<italic>fu</italic>/<italic>fy</italic>: The relationship is irregular, with fluctuations indicating a complex interaction with the output variable.</p>
</list-item>
</list>
</p>
<p>Variables like <italic>N</italic>
<sub>
<italic>r</italic>
</sub> and <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> seem to have clearer trends, while <italic>fu</italic>/<italic>fy</italic> might require further investigation due to its variability.</p>
</sec>
<sec id="s6-2">
<label>6.2</label>
<title>Accumulated local effects</title>
<p>ALE represents the average effect of the input variable on the prediction of the model, accounting for interactions with other variables, as shown in <xref ref-type="fig" rid="F10">Figure 10</xref>. Positive ALE values suggest that the variable contributes positively to the prediction, while negative ALE values indicate a negative contribution. The <italic>x</italic>-axis represents the range of the respective input variable, normalized between 0 and 1.</p>
<fig id="F10" position="float">
<label>FIGURE 10</label>
<caption>
<p>ALE of input variables for <italic>f</italic>(<italic>x</italic>).</p>
</caption>
<graphic xlink:href="fbuil-12-1753382-g010.tif">
<alt-text content-type="machine-generated">Six line graphs display the accumulated local effects (ALE) for different variables, each on a grid. The variables are e1/do, e2/do, p1/do, p2/do, Nr, and fu/fy. All plots have ALE on the y-axis and the variable ratio on the x-axis, with blue lines indicating trends. Each graph shows varying interactions and effects on ALE, with some increasing, decreasing, or fluctuating across the x-axis.</alt-text>
</graphic>
</fig>
<p>Each plot corresponds to the effect of a specific input variable on the predictions of the model:<list list-type="bullet">
<list-item>
<p>
<italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>: Shows a positive correlation; as <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> increases, the ALE increases significantly, indicating a strong positive effect.</p>
</list-item>
<list-item>
<p>
<italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>: Exhibits non-linear fluctuations, suggesting complex behaviour.</p>
</list-item>
<list-item>
<p>
<italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>: Displays a moderate upward trend with a dip around 0.5.</p>
</list-item>
<list-item>
<p>
<italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>: Shows a U-shaped curve, indicating a decrease in ALE up to a certain point, followed by an increase.</p>
</list-item>
<list-item>
<p>
<italic>N</italic>
<sub>
<italic>r</italic>
</sub>: Displays a negative slope, indicating that <italic>N</italic>
<sub>
<italic>r</italic>
</sub> contributes negatively to the prediction.</p>
</list-item>
<list-item>
<p>
<italic>fu</italic>/<italic>fy</italic>: The ALE increases rapidly at lower values and then stabilizes, suggesting a diminishing positive effect.</p>
</list-item>
</list>
</p>
<p>Tick marks on the <italic>x</italic>-axis represent the distribution of data points for the respective variable, indicating where the model is well-supported by data.</p>
</sec>
<sec id="s6-3">
<label>6.3</label>
<title>Shapley additive explanations</title>
<p>
<xref ref-type="fig" rid="F11">Figure 11a</xref> presents the SHAP summary plot, where each point corresponds to a Shapley value associated with a specific parameter. The plot arranges samples into rows, each containing an equal number. The Shapley values are plotted along the <italic>x</italic>-axis, while the input variables are listed on the <italic>y</italic>-axis in order of importance, with the most significant ones at the top. Samples with identical SHAP values for a factor are spread horizontally. The variable values are color-coded, with high values in red and low values in blue. Red indicates values that increase the SHAP score and the associated estimate.</p>
<fig id="F11" position="float">
<label>FIGURE 11</label>
<caption>
<p>SHAP plots: <bold>(a)</bold> Shapley value; <bold>(b)</bold> Global importance factor.</p>
</caption>
<graphic xlink:href="fbuil-12-1753382-g011.tif">
<alt-text content-type="machine-generated">(a) A SHAP summary plot showing the impact of features on model output, with features labeled on the y-axis and SHAP values on the x-axis. Blue to pink dots represent low to high feature values. (b) A bar graph displaying the mean absolute SHAP values for features, indicating their average impact on model output, with longer bars for higher influence. Features are listed on the y-axis.</alt-text>
</graphic>
</fig>
<p>The data suggests that increasing the normalized pitch perpendicular to loading and the number of bolt rows leads to a decrease in the SHAP value, and thus a reduction in the normalized bearing capacity. Conversely, increasing the normalized end distance, normalized edge distance, or the ultimate-to-yield stress ratio of the critical steel plate results in an increase in the normalized bearing capacity. <xref ref-type="fig" rid="F11">Figure 11a</xref> ensures an even distribution of points across the rows. In <xref ref-type="fig" rid="F11">Figure 11b</xref>, the global significance factor is represented by the average absolute SHAP value for each factor. SHAP analysis identifies normalized end distance and edge distance as the most significant factors, aligning with the variable importance results from the CatBoost model.</p>
<p>
<xref ref-type="fig" rid="F12">Figure 12</xref> presents the SHAP dependence graph through scatter plots that show the SHAP value of one parameter against others. The colours in the graph signify how interactions with different variables affect the values on the horizontal axis, with many of these interactions being non-linear. In <xref ref-type="fig" rid="F12">Figure 12a</xref>, the SHAP values increase as <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> increases, indicating that higher values of <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> have a more positive contribution to the predictions of the model. The colour gradient represents <italic>N</italic>
<sub>
<italic>r</italic>
</sub>, where higher <italic>N</italic>
<sub>
<italic>r</italic>
</sub> values tend to amplify the impact of <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> on the predictions. In <xref ref-type="fig" rid="F12">Figure 12b</xref>, the SHAP values generally increase as <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> increases, although the relationship appears to level off or saturate at higher values. The colour gradient corresponds to <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>, showing that lower values of <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> are associated with smaller SHAP contributions, while higher <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> amplifies the SHAP values for <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>. In <xref ref-type="fig" rid="F12">Figure 12c</xref>, for most of the range of <italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>, the SHAP values are clustered near zero, but there is an increasing trend at higher <italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> values, indicating a positive contribution to predictions. The colour gradient for <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> shows that higher <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> values correspond to more positive SHAP values, suggesting an interaction between <italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> and <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>.</p>
<fig id="F12" position="float">
<label>FIGURE 12</label>
<caption>
<p>SHAP partial dependence plots: <bold>(a)</bold> e1/d0-Nr; <bold>(b)</bold> e2/d0-e1/d0; <bold>(c)</bold> p1/d0-e1/d0; <bold>(d)</bold> p2/d0-e1/d0; <bold>(e)</bold> Nr-e1/d0; <bold>(f)</bold> fu/fy-p2/d0.</p>
</caption>
<graphic xlink:href="fbuil-12-1753382-g012.tif">
<alt-text content-type="machine-generated">Scatter plots labeled (a) to (f) displaying SHAP values on the y-axes versus various parameters on the x-axes, such as e1/do, e2/do, p1/do, p2/do, Nr, and fu/fy. The points are color-coded on a gradient scale from blue to pink, representing different parameter ranges labeled in the legend, which include Nr and a1/do values.</alt-text>
</graphic>
</fig>
<p>In <xref ref-type="fig" rid="F12">Figure 12d</xref>, the SHAP values are distributed across a wide range, with noticeable clusters, suggesting non-linear relationships. There are regions where <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> has little to no effect (SHAP values near zero) and others where it contributes positively or negatively. The feature <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> interacts with <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>, with higher <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> amplifying the SHAP values. In <xref ref-type="fig" rid="F12">Figure 12e</xref>, the SHAP values exhibit a slight overall decreasing trend with <italic>N</italic>
<sub>
<italic>r</italic>
</sub>, indicating that larger <italic>N</italic>
<sub>
<italic>r</italic>
</sub> values generally contribute negatively to the predicted normalized bearing capacity. The colour gradient for <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> further shows that this negative influence is more pronounced for lower <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> and can be partially mitigated when <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> is large. In <xref ref-type="fig" rid="F12">Figure 12f</xref>, the SHAP values are scattered without a strong linear trend, indicating a complex or weak relationship between <italic>fu</italic>/<italic>fy</italic> and the predictions of the model. Certain clusters show positive or negative contributions. The colour bar for <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> suggests that the interaction between <italic>fu</italic>/<italic>fy</italic> and <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> is significant, with higher <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> values leading to more pronounced positive or negative SHAP contributions.</p>
<p>In general, the behaviour of SHAP values indicates that <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> and <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> have the strongest and most consistent positive impact on predictions. Features like <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> and <italic>fu</italic>/<italic>fy</italic> exhibit non-linear or scattered relationships, implying complex interactions within the model. The colour gradients (interaction effects) reveal secondary relationships between the displayed feature and auxiliary features, emphasizing the multi-feature dynamics of the model.</p>
<p>
<xref ref-type="fig" rid="F13">Figure 13</xref> shows a SHAP force plot, which visualizes how individual feature values influence the prediction of the model. The plot represents how different input variables contribute to pushing the prediction away from its base value (the expected value of the model output across the dataset). The red section represents features that push the prediction higher. The blue section represents features that push the prediction lower. The grey vertical line in the middle indicates the base value of the prediction. 1.73 is the expected value of the model output before considering the influence of individual feature values. Features shown in red, including <italic>fu</italic>/<italic>fy</italic>, <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>, <italic>N</italic>
<sub>
<italic>r</italic>
</sub>, <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>, increase the output from the base value. Features shown in blue, including e1/d0 decrease the output from the base value. In general, the feature interpretations are as follows:<list list-type="bullet">
<list-item>
<p>
<italic>fu</italic>/<italic>fy</italic> &#x3d; 0.8485: A feature that pushes the output upwards.</p>
</list-item>
<list-item>
<p>
<italic>N</italic>
<sub>
<italic>r</italic>
</sub> &#x3d; 0.3333: Also contributes positively.</p>
</list-item>
<list-item>
<p>
<italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> &#x3d; 0: Has a small or neutral effect.</p>
</list-item>
<list-item>
<p>
<italic>P</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> &#x3d; 0.55: provides a positive contribution.</p>
</list-item>
<list-item>
<p>
<italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> &#x3d; 1: Significantly increases the output.</p>
</list-item>
<list-item>
<p>
<italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> &#x3d; 0.04762: The only feature that pulls the model output lower.</p>
</list-item>
</list>
</p>
<fig id="F13" position="float">
<label>FIGURE 13</label>
<caption>
<p>SHAP force plot.</p>
</caption>
<graphic xlink:href="fbuil-12-1753382-g013.tif">
<alt-text content-type="machine-generated">A horizontal bar graph illustrates a range from 1.151 to 2.351. The bar is segmented into red and blue sections, with a base value at 1.751. The red section, labeled &#x22;higher,&#x22; includes details like p1/d0 = 0.55 and e2/d0 = 1. The blue section, labeled &#x22;lower,&#x22; includes e1/d0 = 0.04762.</alt-text>
</graphic>
</fig>
<p>
<xref ref-type="fig" rid="F14">Figure 14</xref> presents a SHAP decision plot. The order of features on the <italic>y</italic>-axis suggests their relative importance. Higher-ranked features, such as <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>, <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>, have a larger overall impact on the output. Features lower on the axis, such as <italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>, contribute less overall, indicating they may be less influential. The blue-to-red gradient shows how feature values relate to their SHAP impact. For <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>, higher values (red) mostly result in positive SHAP values, meaning high <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> pushes the output of the model to increase. For <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>, similarly, higher values tend to have a positive SHAP impact, but some variability suggests a non-linear relationship. Low feature values (blue) generally have a negative or neutral SHAP impact, pulling the output lower. Features like <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> and <italic>N</italic>
<sub>
<italic>r</italic>
</sub> show tightly clustered SHAP values around 0. This could mean their contributions are smaller and more uniform across the dataset. Conversely, features like <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> have a wider spread, indicating a more variable contribution to the output. In SHAP plots, overlapping colours and values suggest potential feature interactions. For example, the spread in <italic>N</italic>
<sub>
<italic>r</italic>
</sub> and <italic>fu</italic>/<italic>fy</italic> might indicate interactions with other features, where their SHAP values depend on the presence of other variables. The model output appears to centre near 2, suggesting that for most samples, predictions are not strongly influenced (in either positive or negative directions) by individual features. Features like <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> and <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> have notable impacts, pulling the output above or below this centre.</p>
<fig id="F14" position="float">
<label>FIGURE 14</label>
<caption>
<p>SHAP decision plot.</p>
</caption>
<graphic xlink:href="fbuil-12-1753382-g014.tif">
<alt-text content-type="machine-generated">Parallel coordinates plot displaying model output values from 0 to 4 on the horizontal axis. The vertical lines represent different parameters labeled e1/d0, e2/d0, p2/d0, Nr, fu/fy, p1/d0. Lines transition from blue to pink across the plot.</alt-text>
</graphic>
</fig>
<p>
<xref ref-type="fig" rid="F15">Figure 15</xref> shows a SHAP waterfall plot, which is commonly used to explain the contribution of different input variables to a ML prediction for a single instance. The top-left part of the plot states <italic>f</italic>(<italic>x</italic>) &#x3d; 1.729, which is the predicted output for the given instance. The bottom label shows <italic>E</italic> [<italic>f</italic>(<italic>X</italic>)] &#x3d; 1.751, which represents the expected prediction across all instances in the dataset. The largest impact comes from <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>, which decreases the prediction by 0.53. The second major factor is <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>, which increases the prediction by 0.37. Smaller contributions are made by <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> (&#x2b;0.07), <italic>N</italic>
<sub>
<italic>r</italic>
</sub> (&#x2b;0.03), <italic>fu</italic>/<italic>fy</italic> (&#x2b;0.03), and <italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> (&#x2b;0.01).</p>
<fig id="F15" position="float">
<label>FIGURE 15</label>
<caption>
<p>SHAP waterfall plot.</p>
</caption>
<graphic xlink:href="fbuil-12-1753382-g015.tif">
<alt-text content-type="machine-generated">Waterfall chart showing contributions to the value \( f(x) = 1.729 \). Factors listed: e1/d0, e2/d0, p2/d0, Nr, fu/fy, and p1/d0. Blue bar indicates a decrease of -0.53; red bars show increases of +0.37, +0.07, +0.03, +0.03, and +0.01. The expected value \( E[f(X)] = 1.751 \).</alt-text>
</graphic>
</fig>
</sec>
<sec id="s6-4">
<label>6.4</label>
<title>Parametric study</title>
<p>
<xref ref-type="fig" rid="F16">Figure 16</xref> shows a series of contour plots that represent the output of a two-variable exponential decay function. Each subplot displays the relationship between two input variables (on the axis) and the corresponding output of the decay function (indicated by the contour levels and the colour gradient). The <italic>x</italic>- and <italic>y</italic>-axis in each subplot represent different parameter pairs or input variables. Each subplot investigates how these variable pairs interact and influence the output of the function. The contour lines connect points of equal output values, providing a visual representation of the decay behaviour. The colour map indicates the magnitude of the output, where typically:<list list-type="bullet">
<list-item>
<p>Red shades signify higher output values.</p>
</list-item>
<list-item>
<p>Blue shades signify lower output values.</p>
</list-item>
</list>
</p>
<fig id="F16" position="float">
<label>FIGURE 16</label>
<caption>
<p>Two-variable exponential decay fit of output function: <bold>(a)</bold> e1/d0-e2/d0; <bold>(b)</bold> e1/d0-p1/d0; <bold>(c)</bold> e1/d0-p2/d0; <bold>(d)</bold> e1/d0-Nr; <bold>(e)</bold> e1/d0-fu/fy; <bold>(f)</bold> e2/d0-p1/d0; <bold>(g)</bold> e2/d0-p2/d0; <bold>(h)</bold> e2/d0-Nr; <bold>(i)</bold> e2/d0-fu/fy; <bold>(j)</bold> p1/d0-p2/d0; <bold>(k)</bold> p1/d0-Nr; <bold>(l)</bold> p1/p0-fu/fy; <bold>(m)</bold> p2/d0-Nr; <bold>(n)</bold> p2/d0-fu/fy; <bold>(o)</bold> Nr-fu/fy.</p>
</caption>
<graphic xlink:href="fbuil-12-1753382-g016.tif">
<alt-text content-type="machine-generated">A series of contour plots labeled from (a) to (o) illustrating relationships between different variables. The x- and y-axes represent various ratios, such as \( e1/d0 \), \( e2/d0 \), \( p1/d0 \), \( p2/d0 \), and \( Nr \). The color gradient from blue to red indicates varying values of \( Fmax/\text{unit} \), \( Fmax/\( Nhr \) \), or \( Fuv \). Each plot shows different parameter interactions and trends through the color changes.</alt-text>
</graphic>
</fig>
<p>The objective is likely to analyse and fit a two-variable exponential decay model to some data. The plots help visualize how different parameter combinations affect the output, which may be useful for identifying trends, dependencies, or sensitivity of the function to specific variables. Peaks (red areas) and troughs (blue areas) in the contour plots highlight regions of high and low output values, respectively. Smooth transitions or sharp gradients in colour and contour lines indicate the rate of change in the output with respect to changes in the input variables.</p>
<p>
<xref ref-type="fig" rid="F16">Figure 16a</xref> illustrates the relationship between the parameters <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>, <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>, and the normalized variable <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic>. As <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> and <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> increase, <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> also increases, indicating a direct correlation between these variables. The gradient of the contour lines suggests a nonlinear interaction, with a steeper increase in <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> for higher values of <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>. <xref ref-type="fig" rid="F16">Figure 16b</xref> represents the variation of the parameter <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> with respect to <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> and <italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>. The contours show that <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> increases as both <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> and <italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> increase, with a steeper gradient in regions of higher <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>. The behaviour suggests a nonlinear relationship, where changes in <italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> influence <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> more strongly for larger <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> values. <xref ref-type="fig" rid="F16">Figure 16c</xref> illustrates the relationship between two normalized parameters, <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> and <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>, and their influence on the response parameter <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic>. As <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> increases, the response parameter <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> grows significantly, particularly for higher values of <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>. The behaviour suggests a nonlinear dependency, with a steep gradient for lower <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> transitioning to a smoother variation at higher values.</p>
<p>
<xref ref-type="fig" rid="F16">Figure 16d</xref> shows that the variable <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> increases with both <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> and <italic>N</italic>
<sub>
<italic>r</italic>
</sub>, indicating a direct relationship between these parameters and the maximum normalized force. The gradient is steeper along the <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> axis compared to the <italic>N</italic>
<sub>
<italic>r</italic>
</sub> axis, suggesting that <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> has a more significant influence on <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic>. Regions with higher <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> values are concentrated towards the top-right corner, emphasizing the combined effect of high <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> and <italic>N</italic>
<sub>
<italic>r</italic>
</sub>. <xref ref-type="fig" rid="F16">Figure 16e</xref> displays the relationship between the parameters <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>, <italic>fu</italic>/<italic>fy</italic>, and the normalized value <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic>. As <italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> increases, <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> generally rises, indicating a strong dependency on this ratio. Similarly, higher values of <italic>fu</italic>/<italic>fy</italic> correspond to increased <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic>, showcasing its influence on the performance of the system. <xref ref-type="fig" rid="F16">Figure 16f</xref> shows the parametric relationship between the variables <italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>, <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>, and <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic>, represented as contour levels. The behaviour indicates that as <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> and <italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> increase, <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> rises steadily, transitioning from lower values to higher values. The gradient suggests a nonlinear correlation, where <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> is more sensitive to changes in <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> at higher values of <italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>.</p>
<p>
<xref ref-type="fig" rid="F16">Figure 16g</xref> shows that as <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> and <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> increase, <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> generally increases, suggesting a direct influence of these parameters on the force ratio. The steep gradients at lower values of <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> and <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> indicate higher sensitivity of <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> to changes in these regions. <xref ref-type="fig" rid="F16">Figure 16h</xref> illustrates that as <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> and <italic>N</italic>
<sub>
<italic>r</italic>
</sub> increase, the value of <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> rises, transitioning from blue (low values) to red (high values), indicating a strong positive correlation. The plot reveals that <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> grows more rapidly at higher values of both <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> and <italic>N</italic>
<sub>
<italic>r</italic>
</sub>, suggesting nonlinear behaviour in this region. <xref ref-type="fig" rid="F16">Figure 16i</xref> shows that as <italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> increases, the normalized <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> also increases significantly, indicating a stronger structural response with higher ratios of eccentricity to diameter. Similarly, higher values of <italic>fu</italic>/<italic>fy</italic> correlate with increased <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic>, suggesting that higher ultimate-to-yield strength ratios lead to improved maximum force response.</p>
<p>
<xref ref-type="fig" rid="F16">Figure 16j</xref> indicates a sharp transition from higher values to lower values as <italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> increases beyond a threshold, particularly at lower <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> values. The behaviour stabilizes at consistently lower values for larger <italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> and <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>, suggesting diminishing sensitivity in these regions. <xref ref-type="fig" rid="F16">Figure 16k</xref> shows that as <italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> and <italic>N</italic>
<sub>
<italic>r</italic>
</sub> increase, the value of <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> generally increases, transitioning from blue to red regions. The gradient patterns suggest a nonlinear interaction where both parameters significantly influence the outcome, with higher values concentrated in the upper-right corner. <xref ref-type="fig" rid="F16">Figure 16l</xref> shows that the highest values (<italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> &#x2248; 1.8) occur at very low values of <italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> and slightly higher <italic>fu</italic>/<italic>fy</italic> values, suggesting sensitivity to these parameters. As <italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub> increases, <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> decreases significantly, indicating reduced structural performance in these regions.</p>
<p>
<xref ref-type="fig" rid="F16">Figure 16m</xref> indicates that the feature <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> shows a significant gradient along the <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> axis near zero, with values exceeding 2, indicating sensitivity to changes in <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> at lower values. Beyond a certain threshold of <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>, <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> stabilizes around 1 for most of the <italic>N</italic>
<sub>
<italic>r</italic>
</sub> range, suggesting a reduced influence of <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> at higher values. <xref ref-type="fig" rid="F16">Figure 16n</xref> shows that the strong gradient near <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub> &#x3d; 0 suggests significant sensitivity in this region, with the ratio <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> increasing rapidly. For larger values of <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>, the ratio stabilizes, and <italic>fu</italic>/<italic>fy</italic> appears to have a weaker influence overall, with the region dominated by a uniform lower value. <xref ref-type="fig" rid="F16">Figure 16o</xref> shows that as <italic>N</italic>
<sub>
<italic>r</italic>
</sub> increases, <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> decreases, indicating an inverse relationship. Similarly, as <italic>fu</italic>/<italic>fy</italic> increases, <italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic> also increases, suggesting a direct relationship between these parameters.</p>
</sec>
<sec id="s6-5">
<label>6.5</label>
<title>Design implications</title>
<p>From a practical design standpoint, the interpretable CatBoost model and the associated interpretability methods provide several qualitative recommendations for detailing double-shear, bearing-type bolted connections within the investigated domain. First, the contour plots confirm that increasing the normalized end and edge distances systematically enhances the normalized bearing capacity, with the highest values concentrated in regions where both ratios are simultaneously large. This observation highlights the benefit of providing end and edge distances that exceed minimum code requirements rather than merely satisfying them. Second, the transverse pitch ratio should not be selected too small: the response surfaces show a steep increase in normalized bearing capacity as the transverse pitch ratio increases from very low values, followed by a plateau at moderate spacings. This indicates that excessively tight transverse spacing should be avoided, while very large transverse pitch ratios offer limited additional benefit. Third, the longitudinal pitch ratio exhibits a non-monotonic influence. Relatively small longitudinal pitch ratios, when combined with suitable material properties, yield the highest normalized capacities, whereas excessively large longitudinal pitch ratios lead to a marked reduction in normalized bearing capacity. Finally, the effect of the number of bolt rows is secondary to, and interacts with, the geometric parameters: increasing the number of bolt rows can be beneficial when accompanied by sufficient end and edge distances and appropriate pitch values, but it does not guarantee higher normalized capacity on its own. These insights are intended as qualitative guidance to support detailing decisions and rapid design exploration using the proposed ML model. They remain constrained by the range of geometries and material properties represented in the experimental dataset and should be applied in conjunction with existing code provisions and checks for other governing failure modes.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s7">
<label>7</label>
<title>Conclusion</title>
<p>This study investigated the application of ML techniques to predict the bearing capacity of double shear-bolted connections in structural steel, an area of critical importance for ensuring safety and efficiency in engineering design. Leveraging a dataset of 443 experimental results, 10&#xa0;ML algorithms, including LR, RR, SVR, KNN, DT, RF, and advanced gradient boosting models, were systematically trained, validated, and assessed. Among these, CatBoost emerged as the most robust performer, achieving high predictive accuracy while maintaining interpretability, a key requirement for practical engineering applications.</p>
<p>Key findings from this research emphasize the pivotal roles of normalized end distance (<italic>e</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>) and edge distance (<italic>e</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>&#x200b;) as primary factors influencing the normalized bearing capacity (<italic>F</italic>
<sub>max</sub>/<italic>f</italic>
<sub>
<italic>u</italic>
</sub>
<italic>ndt</italic>). This conclusion was consistently supported across various interpretability frameworks, such as PDP, ALE, and SHAP. These methods provided a transparent and nuanced understanding of the ML models, highlighting their decision-making processes and the complex, nonlinear interactions between critical input variables. The findings also revealed significant contributions from the number of bolt rows (<italic>N</italic>
<sub>
<italic>r</italic>
</sub>&#x200b;) and pitch distances (<italic>p</italic>
<sub>1</sub>/<italic>d</italic>
<sub>0</sub>, <italic>p</italic>
<sub>2</sub>/<italic>d</italic>
<sub>0</sub>&#x200b;), reinforcing their importance in multi-bolt connection design.</p>
<p>The research not only demonstrated the effectiveness of ML in addressing the complexity of bearing capacity predictions but also showcased the value of integrating interpretable models into structural engineering workflows. This approach bridges the gap between black-box ML models and the engineering need for actionable and explainable results by offering clear insights into how key parameters influence predictions. This transparency is vital for fostering trust and adoption of ML-driven design methodologies in structural engineering.</p>
<p>Despite these advances, the study has limitations that present opportunities for future research. It is important to emphasise that, despite the relatively good performance of the CatBoost model, the remaining scatter is non-negligible for safety-critical applications. The present model is therefore not proposed as a direct replacement for codified design equations, but rather as a complementary tool for parametric exploration, preliminary assessment, and insight generation. Any future deployment of such models as part of formal design checks would require a dedicated reliability-based calibration, including the derivation of suitable resistance factors or safety margins that explicitly account for model uncertainty.</p>
<p>In addition, the reliance on experimental datasets, while providing valuable insights, may introduce biases due to variability in test setups and conditions. Expanding the dataset with more diverse scenarios and material properties, including high-strength and composite steels, could enhance the generalizability of the models. Furthermore, future exploration involves the integration of physics-informed constraints or hybrid modeling approaches that combine data-driven learning with mechanics-based principles. However, implementing such models presents notable challenges. Accurately embedding physical laws, such as equilibrium conditions or constitutive relationships, into ML architectures requires careful formulation and often domain-specific customization. Additionally, while this study focused on double shear-bolted connections, future work could explore ML-driven predictions for other connection configurations, such as single shear, staggered, or multi-row bolt arrangements, under varying loading conditions.</p>
<p>Moreover, high-fidelity FEA calibrated against the existing tests could be employed to generate synthetic data and enrich the coverage of the parameter space (e.g., larger bolt patterns, alternative plate thicknesses, or rarely tested combinations of end distance, edge distance, and pitch). Such a hybrid experimental-numerical dataset would enable more robust training, facilitate reliability-based calibration of the surrogate model, and ultimately support safer and more comprehensive design recommendations. Furthermore, exploring dynamic loading and fatigue conditions could extend the applicability of ML frameworks to broader engineering challenges. A natural next step will be to embed the proposed interpretable ML model into BIM- and IFC-based design environments and operational digital twins, to empirically validate the end-to-end workflow from automated geometry extraction to real-time connection assessment in practical projects.</p>
<p>In conclusion, this research underscores the transformative potential of ML in advancing structural design and analysis. ML enables more data-driven, efficient, and reliable engineering practices by enhancing prediction accuracy and interpretability. The integration of these methods into industrial workflows not only optimizes design processes but also promotes sustainability, safety, and innovation in structural engineering. This study serves as a foundation for further exploration into the intersection of ML and engineering, aiming to develop adaptable, intelligent tools for the next-generation of structural systems.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s8">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: The dataset was compiled from the published studies cited in the paper: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/J.JCSR.2013.12.002">https://doi.org/10.1016/J.JCSR.2013.12.002</ext-link>, <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/J.JCSR.2010.03.009">https://doi.org/10.1016/J.JCSR.2010.03.009</ext-link>, <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/J.JCSR.2017.06.001">https://doi.org/10.1016/J.JCSR.2017.06.001</ext-link>, <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/J.JCSR.2019.05.023">https://doi.org/10.1016/J.JCSR.2019.05.023</ext-link>, <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1061/(ASCE)0733-9445(2003)129:6(792)">https://doi.org/10.1061/(ASCE)0733-9445(2003)129:6(792)</ext-link>, <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/S0143-974X(98)00220-X">https://doi.org/10.1016/S0143-974X(98)00220-X</ext-link>, <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/S13296-013-4005-Y/METRICS">https://doi.org/10.1007/S13296-013-4005-Y/METRICS</ext-link>, <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/J.ENGSTRUCT.2019.03.060">https://doi.org/10.1016/J.ENGSTRUCT.2019.03.060</ext-link>, <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/J.JCSR.2010.10.007">https://doi.org/10.1016/J.JCSR.2010.10.007</ext-link>, <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1061">https://doi.org/10.1061</ext-link>, <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1061">https://doi.org/10.1061</ext-link>, <ext-link ext-link-type="uri" xlink:href="https://research.tudelft.nl/en/publications/experimental-research-on-single-bolt-connections-for-high-strengt">https://research.tudelft.nl/en/publications/experimental-research-on-single-bolt-connections-for-high-strengt</ext-link>, <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/J.ISTRUC.2019.12.002">https://doi.org/10.1016/J.ISTRUC.2019.12.002</ext-link>, <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/S0143-974X(00)00017-1">https://doi.org/10.1016/S0143-974X(00)00017-1</ext-link>, <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/J.JCSR.2018.04.006">https://doi.org/10.1016/J.JCSR.2018.04.006</ext-link>.</p>
</sec>
<sec sec-type="author-contributions" id="s9">
<title>Author contributions</title>
<p>SK: Writing &#x2013; original draft, Methodology, Software, Conceptualization. HL: Writing &#x2013; review and editing. TD: Visualization, Writing &#x2013; review and editing. AM: Writing &#x2013; review and editing, Visualization. IB: Writing &#x2013; review and editing.</p>
</sec>
<sec sec-type="COI-statement" id="s11">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s12">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s13">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ahmed</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Teh</surname>
<given-names>L. H.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Thread effects on the stiffness of bolted shear connections</article-title>. <source>J. Constr. Steel Res.</source> <volume>160</volume>, <fpage>77</fpage>&#x2013;<lpage>88</lpage>. <pub-id pub-id-type="doi">10.1016/j.jcsr.2019.05.023</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alavi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kookalani</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Rahimian</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Forcada</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2024a</year>). <article-title>Introduction of methodology for BIM and DSS</article-title>. <source>Integr. Build. Intell.</source> <pub-id pub-id-type="doi">10.1007/978-3-031-68865-2_3</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alavi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kookalani</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Rahimian</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Forcada</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2024b</year>). <article-title>BIM-based augmented reality for facility maintenance management</article-title>. <source>Integr. Build. Intell.</source> <pub-id pub-id-type="doi">10.1007/978-3-031-68865-2_7</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alavi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kookalani</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Rahimian</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Forcada</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2024c</year>). <article-title>BIM-based DSS for enhancing occupants&#x2019; comfort</article-title>. <source>Integr. Build. Intell.</source>, <fpage>79</fpage>&#x2013;<lpage>99</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-031-68865-2_6</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alavi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kookalani</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Rahimian</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Forcada</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2024d</year>). <article-title>BIM-based DSS for building condition assessment</article-title>. <source>Integr. Build. Intell.</source>, <fpage>59</fpage>&#x2013;<lpage>78</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-031-68865-2_5</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alavi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kookalani</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Rahimian</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Forcada</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2024e</year>). <article-title>BIM-based DSS for HVAC root-cause detection</article-title>. <source>Integr. Build. Intell.</source>, <fpage>43</fpage>&#x2013;<lpage>57</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-031-68865-2_4</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="web">
<person-group person-group-type="author">
<name>
<surname>Alavi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kookalani</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Rahimian</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Forcada</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2024f</year>). <article-title>Integrated building intelligence</article-title>. <pub-id pub-id-type="doi">10.1007/978-3-031-68865-2</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Apley</surname>
<given-names>D. W.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Visualizing the effects of predictor variables in black box supervised learning models</article-title>. <source>J. R. Stat. Soc. Ser. B Stat. Methodol.</source> <volume>82</volume> (<issue>4</issue>), <fpage>1059</fpage>&#x2013;<lpage>1086</lpage>. <pub-id pub-id-type="doi">10.1111/rssb.12377</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ashrafian</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Panahi</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Salehi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Karoglou</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Asteris</surname>
<given-names>P. G.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Mapping the strength of agro-ecological lightweight concrete containing oil palm by-product using artificial intelligence techniques</article-title>. <source>Structures</source> <volume>48</volume>, <fpage>1209</fpage>&#x2013;<lpage>1229</lpage>. <pub-id pub-id-type="doi">10.1016/j.istruc.2022.12.108</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Asteris</surname>
<given-names>P. G.</given-names>
</name>
<name>
<surname>Tsavdaridis</surname>
<given-names>K. D.</given-names>
</name>
<name>
<surname>Lemonis</surname>
<given-names>M. E.</given-names>
</name>
<name>
<surname>Ferreira</surname>
<given-names>F. P. V.</given-names>
</name>
<name>
<surname>Le</surname>
<given-names>T. T.</given-names>
</name>
<name>
<surname>Gantes</surname>
<given-names>C. J.</given-names>
</name>
<etal/>
</person-group> (<year>2024a</year>). <article-title>AI-powered GUI for prediction of axial compression capacity in concrete-filled steel tube columns</article-title>. <source>Neural Comput. Appl.</source> <pub-id pub-id-type="doi">10.1007/s00521-024-10405-w</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Asteris</surname>
<given-names>P. G.</given-names>
</name>
<name>
<surname>Karoglou</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Skentou</surname>
<given-names>A. D.</given-names>
</name>
<name>
<surname>Vasconcelos</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Bakolas</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2024b</year>). <article-title>Predicting uniaxial compressive strength of rocks using ANN models: incorporating porosity, compressional wave velocity, and schmidt hammer data</article-title>. <source>Ultrasonics</source> <volume>141</volume>, <fpage>107347</fpage>. <pub-id pub-id-type="doi">10.1016/j.ultras.2024.107347</pub-id>
<pub-id pub-id-type="pmid">38781796</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barkhordari</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Armaghani</surname>
<given-names>D. J.</given-names>
</name>
<name>
<surname>Asteris</surname>
<given-names>P. G.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Structural damage identification using ensemble deep convolutional neural network models</article-title>. <source>CMES - Comput. Model. Eng. Sci.</source> <volume>134</volume> (<issue>2</issue>), <fpage>835</fpage>&#x2013;<lpage>855</lpage>. <pub-id pub-id-type="doi">10.32604/cmes.2022.020840</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cabrera</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ninic</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Tizani</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Fusion of experimental and synthetic data for reliable prediction of steel connection behaviour using machine learning</article-title>. <source>Eng. Comput.</source> <volume>39</volume> (<issue>6</issue>), <fpage>3993</fpage>&#x2013;<lpage>4011</lpage>. <pub-id pub-id-type="doi">10.1007/s00366-023-01864-1</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Guestrin</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>XGBoost: a scalable tree boosting system</article-title>,&#x201d; in <source>Proceedings of the ACM SIGKDD international conference on knowledge discovery and data mining</source>, <fpage>785</fpage>&#x2013;<lpage>794</lpage>.</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Benesty</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Khotilovich</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Cho</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>XGBoost: extreme Gradient boosting</article-title>. <source>R. Package Version 071-2</source> <volume>1</volume> (<issue>4</issue>), <fpage>1</fpage>&#x2013;<lpage>4</lpage>.</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Clements</surname>
<given-names>D. D. A.</given-names>
</name>
<name>
<surname>Teh</surname>
<given-names>L. H.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Active shear planes of bolted connections failing in block shear</article-title>. <source>J. Struct. Eng. (United States)</source> <volume>139</volume> (<issue>3</issue>), <fpage>320</fpage>&#x2013;<lpage>327</lpage>. <pub-id pub-id-type="doi">10.1061/(asce)st.1943-541x.0000626</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cover</surname>
<given-names>T. M.</given-names>
</name>
<name>
<surname>Hart</surname>
<given-names>P. E.</given-names>
</name>
</person-group> (<year>1967</year>). <article-title>Nearest neighbor pattern classification</article-title>. <source>IEEE Trans. Inf. Theory</source> <volume>13</volume> (<issue>1</issue>), <fpage>21</fpage>&#x2013;<lpage>27</lpage>. <pub-id pub-id-type="doi">10.1109/tit.1967.1053964</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Dietterich</surname>
<given-names>T. G.</given-names>
</name>
</person-group> (<year>2000</year>). &#x201c;<article-title>Ensemble methods in machine learning</article-title>,&#x201d; in <source>Lecture notes in computer science (including subseries lecture notes in artificial intelligence and lecture notes in bioinformatics)</source>.</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dorogush Veronika</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ershov</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Gulin</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>CatBoost: gradient boosting with categorical features support</article-title>. <comment>arXiv preprint arXiv:181011363</comment>.</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Freitas</surname>
<given-names>S. T. de</given-names>
</name>
<name>
<surname>Vries</surname>
<given-names>P. de</given-names>
</name>
<name>
<surname>Bijlaard</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Experimental research on single bolt connections for high strength steel S690</article-title>. <source>cmm - Assoc. Port. construcao Met. mista</source>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://research.tudelft.nl/en/publications/experimental-research-on-single-bolt-connections-for-high-strengt">https://research.tudelft.nl/en/publications/experimental-research-on-single-bolt-connections-for-high-strengt</ext-link>.</comment>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Freund</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Schapire</surname>
<given-names>R. E.</given-names>
</name>
</person-group> (<year>1997</year>). <article-title>A decision-theoretic generalization of On-Line learning and an application to boosting</article-title>. <source>J. Comput. Syst. Sci.</source> <volume>55</volume> (<issue>1</issue>), <fpage>119</fpage>&#x2013;<lpage>139</lpage>. <pub-id pub-id-type="doi">10.1006/jcss.1997.1504</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Friedman</surname>
<given-names>J. H.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Greedy function approximation: a gradient boosting machine</article-title>. <source>Ann. Stat.</source>, <fpage>1189</fpage>&#x2013;<lpage>1232</lpage>. <pub-id pub-id-type="doi">10.1214/aos/1013203451</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Geyer</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>M. M.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Explainable AI for engineering design: a unified approach of systems engineering and component-based deep learning demonstrated by energy-efficient building design</article-title>. <source>Adv. Eng. Inf.</source> <volume>62</volume>, <fpage>102843</fpage>. <pub-id pub-id-type="doi">10.1016/j.aei.2024.102843</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ghanizadeh</surname>
<given-names>A. R.</given-names>
</name>
<name>
<surname>Ghanizadeh</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Asteris</surname>
<given-names>P. G.</given-names>
</name>
<name>
<surname>Fakharian</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Armaghani</surname>
<given-names>D. J.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Developing bearing capacity model for geogrid-reinforced stone columns improved soft clay utilizing MARS-EBS hybrid method</article-title>. <source>Transp. Geotech.</source> <volume>38</volume>, <fpage>100906</fpage>. <pub-id pub-id-type="doi">10.1016/j.trgeo.2022.100906</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guo</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Study on mechanical behavior of Q690D high strength steel bearing-type bolted connections</article-title>. <source>Structures</source> <volume>23</volume>, <fpage>588</fpage>&#x2013;<lpage>601</lpage>. <pub-id pub-id-type="doi">10.1016/j.istruc.2019.12.002</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hai</surname>
<given-names>L. T.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>G. Q.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y. B.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>F. F.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>H. J.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Experimental investigation on cyclic behavior of Q690D high strength steel H-section beam-columns about strong axis</article-title>. <source>Eng. Struct.</source> <volume>189</volume>, <fpage>157</fpage>&#x2013;<lpage>173</lpage>. <pub-id pub-id-type="doi">10.1016/j.engstruct.2019.03.060</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hoerl</surname>
<given-names>A. E.</given-names>
</name>
<name>
<surname>Kennard</surname>
<given-names>R. W.</given-names>
</name>
</person-group> (<year>1970</year>). <article-title>Ridge regression: biased estimation for nonorthogonal problems</article-title>. <source>Technometrics</source> <volume>12</volume> (<issue>1</issue>), <fpage>55</fpage>&#x2013;<lpage>67</lpage>. <pub-id pub-id-type="doi">10.1080/00401706.1970.10488634</pub-id>
</mixed-citation>
</ref>
<ref id="B78">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Kalab</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>When a shear connection transmits a bending moment</article-title>. <publisher-name>IDEA StatiCa</publisher-name>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.ideastatica.com/blog/when-a-shear-connection-transmits-a-bending-moment">https://www.ideastatica.com/blog/when-a-shear-connection-transmits-a-bending-moment</ext-link>
</comment>.</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Ke</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Meng</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Finley</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>W.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). &#x201c;<article-title>LightGBM: a highly efficient gradient boosting decision tree</article-title>,&#x201d; in <source>Advances in neural information processing systems</source>.</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kim</surname>
<given-names>D. K.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>C. H.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Generalized load deformation relationship for bearing-type single-bolted connections</article-title>. <source>J. Struct. Eng.</source> <volume>146</volume> (<issue>7</issue>), <fpage>04020116</fpage>. <pub-id pub-id-type="doi">10.1061/(asce)st.1943-541x.0002640</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kim</surname>
<given-names>H. J.</given-names>
</name>
<name>
<surname>Yura</surname>
<given-names>J. A.</given-names>
</name>
</person-group> (<year>1999</year>). <article-title>The effect of ultimate-to-yield ratio on the bearing strength of bolted connections</article-title>. <source>J. Constr. Steel Res.</source> <volume>49</volume> (<issue>3</issue>), <fpage>255</fpage>&#x2013;<lpage>269</lpage>. <pub-id pub-id-type="doi">10.1016/s0143-974x(98)00220-x</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kookalani</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Structural analysis of GFRP elastic gridshell structures by particle swarm optimization and least square support vector machine algorithms</article-title>. <source>J. Civ. Eng. Mater. Appl.</source> <volume>5</volume> (<issue>3</issue>), <fpage>139</fpage>&#x2013;<lpage>150</lpage>. <pub-id pub-id-type="doi">10.22034/jcema.2021.304981.1064</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kookalani</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Xiang</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Shape optimization of GFRP elastic gridshells by the weighted lagrange &#x3b5;-twin support vector machine and multi-objective particle swarm optimization algorithm considering structural weight</article-title>. <source>Structures</source> <volume>33</volume>, <fpage>2066</fpage>&#x2013;<lpage>2084</lpage>. <pub-id pub-id-type="doi">10.1016/j.istruc.2021.05.077</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kookalani</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>L. L.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Lindsey</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2022a</year>). <article-title>An overview of optimal damper placement methods in structures</article-title>. <source>Trans. Civ. Eng.</source> <volume>46</volume>, <fpage>1785</fpage>&#x2013;<lpage>1804</lpage>. <pub-id pub-id-type="doi">10.1007/s40996-021-00752-2</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kookalani</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Nyunn</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Xiang</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2022b</year>). <article-title>Form-finding of lifting self-forming GFRP elastic gridshells based on machine learning interpretability methods</article-title>. <source>Struct. Eng. Mech.</source> <volume>84</volume> (<issue>5</issue>), <fpage>605</fpage>&#x2013;<lpage>618</lpage>. <pub-id pub-id-type="doi">10.12989/sem.2022.84.5.605</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kookalani</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Torres</surname>
<given-names>J. L. C.</given-names>
</name>
</person-group> (<year>2022c</year>). <article-title>Structural performance assessment of GFRP elastic gridshells by machine learning interpretability methods</article-title>. <source>Front. Struct. Civ. Eng.</source> <volume>16</volume> (<issue>10</issue>), <fpage>1249</fpage>&#x2013;<lpage>1266</lpage>. <pub-id pub-id-type="doi">10.1007/s11709-022-0858-5</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kookalani</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Parn</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Brilakis</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Dirar</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Theofanous</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Faramarzi</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Trajectory of building and structural design automation from generative design towards the integration of deep generative models and optimization: a review</article-title>. <source>J. Build. Eng.</source> <volume>97</volume>, <fpage>110972</fpage>. <pub-id pub-id-type="doi">10.1016/j.jobe.2024.110972</pub-id>
</mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Kookalani</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Alavi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Rahimian</surname>
<given-names>F. P.</given-names>
</name>
</person-group> (<year>2025</year>). <source>Structural Design and Optimization of Lifting Self-forming GFRP Elastic Gridshells based on Machine Learning</source>. <publisher-name>London, Newyork: Routledge</publisher-name>. <pub-id pub-id-type="doi">10.1201/9781003565055</pub-id>
</mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Qi</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Jeon</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A LightGBM-based strategy to predict tunnel rockmass class from TBM construction data for building control</article-title>. <source>Adv. Eng. Inf.</source> <volume>58</volume>, <fpage>102130</fpage>. <pub-id pub-id-type="doi">10.1016/j.aei.2023.102130</pub-id>
</mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Improved estimation in multiple linear regression models with measurement error and general constraint</article-title>. <source>J. Multivar. Anal.</source> <volume>100</volume> (<issue>4</issue>), <fpage>726</fpage>&#x2013;<lpage>741</lpage>. <pub-id pub-id-type="doi">10.1016/j.jmva.2008.08.003</pub-id>
<pub-id pub-id-type="pmid">20160857</pub-id>
</mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lim</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Chi</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Xgboost application on bridge management systems for proactive damage estimation</article-title>. <source>Adv. Eng. Inf.</source>, <fpage>41</fpage>. <pub-id pub-id-type="doi">10.1016/j.aei.2019.100922</pub-id>
</mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Vu-Bac</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Zhuang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Rabczuk</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Stochastic multiscale modeling of heat conductivity of polymeric clay nanocomposites</article-title>. <source>Mech. Mater.</source> <pub-id pub-id-type="doi">10.1016/j.mechmat.2019.103280</pub-id>
</mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kookalani</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lei</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Attention-enhanced spatiotemporal deep learning-based automatic warning model with uncertainty estimation in dam safety</article-title>. <source>Struct. Health Monit</source>. <pub-id pub-id-type="doi">10.1177/14759217251358537</pub-id>
</mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Love</surname>
<given-names>P. E. D.</given-names>
</name>
<name>
<surname>Fang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Matthews</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Porter</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Explainable artificial intelligence (XAI): precepts, models, and opportunities for research in construction</article-title>. <source>Adv. Eng. Inf.</source> <volume>57</volume> <fpage>102024</fpage>. <pub-id pub-id-type="doi">10.1016/j.aei.2023.102024</pub-id>
</mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lundberg</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>S. I.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>A unified approach to interpreting model predictions</article-title>. <source>Adv. Neural Inf. Process Syst.</source> <volume>30</volume>. <pub-id pub-id-type="doi">10.48550/arXiv.1705.07874</pub-id>
</mixed-citation>
</ref>
<ref id="B45">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lyu</surname>
<given-names>Y. F.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y. B.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>G. Q.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Numerical analysis on the ultimate bearing resistance of single-bolt connection with high strength steels</article-title>. <source>J. Constr. Steel Res.</source> <volume>153</volume>, <fpage>118</fpage>&#x2013;<lpage>129</lpage>. <pub-id pub-id-type="doi">10.1016/j.jcsr.2018.10.006</pub-id>
</mixed-citation>
</ref>
<ref id="B46">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lyu</surname>
<given-names>Y. F.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>G. Q.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y. B.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y. Z.</given-names>
</name>
</person-group> (<year>2020a</year>). <article-title>Bearing behavior of multi-bolt high strength steel connections</article-title>. <source>Eng. Struct.</source> <volume>212</volume>, <fpage>110510</fpage>. <pub-id pub-id-type="doi">10.1016/j.engstruct.2020.110510</pub-id>
</mixed-citation>
</ref>
<ref id="B47">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lyu</surname>
<given-names>Y. F.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>G. Q.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y. B.</given-names>
</name>
</person-group> (<year>2020b</year>). <article-title>Behavior-based resistance model for bearing-type connection in high-strength steels</article-title>. <source>J. Struct. Eng.</source> <volume>146</volume> (<issue>7</issue>), <fpage>04020109</fpage>. <pub-id pub-id-type="doi">10.1061/(asce)st.1943-541x.0002639</pub-id>
</mixed-citation>
</ref>
<ref id="B48">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ma</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Flanigan</surname>
<given-names>K. A.</given-names>
</name>
<name>
<surname>Berg&#xe9;s</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>State-of-the-art review and synthesis: a requirement-based roadmap for standardized predictive maintenance automation using digital twin technologies</article-title>. <source>Adv. Eng. Inf.</source>, <fpage>62</fpage>. <pub-id pub-id-type="doi">10.1016/j.aei.2024.102800</pub-id>
</mixed-citation>
</ref>
<ref id="B49">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mahdavipour</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Faramarzi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Dirar</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Theofanous</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Jowhari Moghadam</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>Q.</given-names>
</name>
<etal/>
</person-group> (<year>2026</year>). <article-title>Optimised steel frame design using reclaimed steel: logistics impact on reuse efficiency</article-title>. <source>J. Constr. Steel Res.</source> <volume>236</volume>, <fpage>110046</fpage>. <pub-id pub-id-type="doi">10.1016/j.jcsr.2025.110046</pub-id>
</mixed-citation>
</ref>
<ref id="B50">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mahmood</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Mohammed</surname>
<given-names>A. S.</given-names>
</name>
<name>
<surname>Sihag</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Asteris</surname>
<given-names>P. G.</given-names>
</name>
<name>
<surname>Ahmed</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Interpreting the experimental results of compressive strength of hand-mixed cement-grouted sands using various mathematical approaches</article-title>. <source>Archives Civ. Mech. Eng.</source> <volume>22</volume> (<issue>1</issue>), <fpage>19</fpage>. <pub-id pub-id-type="doi">10.1007/s43452-021-00341-0</pub-id>
</mixed-citation>
</ref>
<ref id="B51">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Moe</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Beg</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Investigation of high strength steel connections with several bolts in double shear</article-title>. <source>J. Constr. Steel Res.</source> <volume>67</volume> (<issue>3</issue>), <fpage>333</fpage>&#x2013;<lpage>347</lpage>. <pub-id pub-id-type="doi">10.1016/j.jcsr.2010.10.007</pub-id>
</mixed-citation>
</ref>
<ref id="B52">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mo&#x17e;e</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Bearing strength at bolt holes in connections with large end distance and bolt pitch</article-title>. <source>J. Constr. Steel Res.</source> <volume>147</volume>, <fpage>132</fpage>&#x2013;<lpage>144</lpage>. <pub-id pub-id-type="doi">10.1016/j.jcsr.2018.04.006</pub-id>
</mixed-citation>
</ref>
<ref id="B53">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mo&#x17e;e</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Beg</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>High strength steel tension splices with one or two bolts</article-title>. <source>J. Constr. Steel Res.</source> <volume>66</volume> (<issue>8&#x2013;9</issue>), <fpage>1000</fpage>&#x2013;<lpage>1010</lpage>. <pub-id pub-id-type="doi">10.1016/j.jcsr.2010.03.009</pub-id>
</mixed-citation>
</ref>
<ref id="B54">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mo&#x17e;e</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Beg</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>A complete study of bearing stress in single bolt connections</article-title>. <source>J. Constr. Steel Res.</source> <volume>95</volume>, <fpage>126</fpage>&#x2013;<lpage>140</lpage>. <pub-id pub-id-type="doi">10.1016/j.jcsr.2013.12.002</pub-id>
</mixed-citation>
</ref>
<ref id="B55">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Puthli</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Fleischer</surname>
<given-names>O.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Investigations on bolted connections for high strength steel members</article-title>. <source>J. Constr. Steel Res.</source> <volume>57</volume> (<issue>3</issue>), <fpage>313</fpage>&#x2013;<lpage>326</lpage>. <pub-id pub-id-type="doi">10.1016/s0143-974x(00)00017-1</pub-id>
</mixed-citation>
</ref>
<ref id="B56">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rex</surname>
<given-names>C. O.</given-names>
</name>
<name>
<surname>Easterling</surname>
<given-names>W. S.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>Behavior and modeling of a bolt bearing on a single plate</article-title>. <source>J. Struct. Eng.</source> <volume>129</volume> (<issue>6</issue>), <fpage>792</fpage>&#x2013;<lpage>800</lpage>. <pub-id pub-id-type="doi">10.1061/(asce)0733-9445(2003)129:6(792)</pub-id>
</mixed-citation>
</ref>
<ref id="B57">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sadrossadat</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Basarir</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Karrech</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Elchalakani</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Multi-objective mixture design and optimisation of steel fiber reinforced UHPC using machine learning algorithms and metaheuristics</article-title>. <source>Eng. Comput.</source> <volume>38</volume> (<issue>3</issue>), <fpage>2569</fpage>&#x2013;<lpage>2582</lpage>. <pub-id pub-id-type="doi">10.1007/s00366-021-01403-w</pub-id>
</mixed-citation>
</ref>
<ref id="B58">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Saltelli</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ratto</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Andres</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Campolongo</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Cariboni</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Gatelli</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2008</year>). <article-title>Global sensitivity analysis: the primer</article-title>. <source>Glob. Sensit. Analysis Primer</source>. <pub-id pub-id-type="doi">10.1002/9780470725184</pub-id>
</mixed-citation>
</ref>
<ref id="B59">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sarir</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>S. L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z. F.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Horpibulsuk</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Pham</surname>
<given-names>B. T.</given-names>
</name>
</person-group> (<year>2021a</year>). <article-title>Optimum model for bearing capacity of concrete-steel columns with AI technology <italic>via</italic> incorporating the algorithms of IWO and ABC</article-title>. <source>Eng. Comput.</source> <volume>37</volume> (<issue>2</issue>), <fpage>797</fpage>&#x2013;<lpage>807</lpage>. <pub-id pub-id-type="doi">10.1007/s00366-019-00855-5</pub-id>
</mixed-citation>
</ref>
<ref id="B60">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sarir</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Asteris</surname>
<given-names>P. G.</given-names>
</name>
<name>
<surname>Armaghani</surname>
<given-names>D. J.</given-names>
</name>
<name>
<surname>Tahir</surname>
<given-names>M. M.</given-names>
</name>
</person-group> (<year>2021b</year>). <article-title>Developing GEP tree-based, neuro-swarm, and whale optimization models for evaluation of bearing capacity of concrete-filled steel tube columns</article-title>. <source>Eng. Comput.</source> <volume>37</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>19</lpage>. <pub-id pub-id-type="doi">10.1007/s00366-019-00808-y</pub-id>
</mixed-citation>
</ref>
<ref id="B61">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Schapire</surname>
<given-names>R. E.</given-names>
</name>
</person-group> (<year>2013</year>). &#x201c;<article-title>Explaining adaboost</article-title>,&#x201d; in <source>Empirical inference: festschrift in honor of Vladimir N vapnik</source> (<publisher-loc>Berlin, Heidelberg</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>37</fpage>&#x2013;<lpage>52</lpage>.</mixed-citation>
</ref>
<ref id="B62">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schapire</surname>
<given-names>R. E.</given-names>
</name>
<name>
<surname>Singer</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>1999</year>). <article-title>Improved boosting algorithms using confidence-rated predictions</article-title>. <source>Mach. Learn</source> <volume>37</volume> (<issue>3</issue>), <fpage>297</fpage>&#x2013;<lpage>336</lpage>. <pub-id pub-id-type="doi">10.1023/a:1007614523901</pub-id>
</mixed-citation>
</ref>
<ref id="B63">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Smola</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Sch&#xf6;lkopf</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>A tutorial on support vector regression</article-title>. <source>Statistics Comput.</source> <pub-id pub-id-type="doi">10.1023/B:STCO.0000035301.49549.88</pub-id>
</mixed-citation>
</ref>
<ref id="B64">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Teh</surname>
<given-names>L. H.</given-names>
</name>
<name>
<surname>Clements</surname>
<given-names>D. D. A.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Block shear capacity of bolted connections in cold-reduced steel sheets</article-title>. <source>J. Struct. Eng.</source> <volume>138</volume> (<issue>4</issue>), <fpage>459</fpage>&#x2013;<lpage>467</lpage>. <pub-id pub-id-type="doi">10.1061/(asce)st.1943-541x.0000478</pub-id>
</mixed-citation>
</ref>
<ref id="B65">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Teh</surname>
<given-names>L. H.</given-names>
</name>
<name>
<surname>Uz</surname>
<given-names>M. E.</given-names>
</name>
</person-group> (<year>2015a</year>). <article-title>Block shear failure planes of bolted connections &#x2014; direct experimental verifications</article-title>. <source>J. Constr. Steel Res.</source> <volume>111</volume>, <fpage>70</fpage>&#x2013;<lpage>74</lpage>. <pub-id pub-id-type="doi">10.1016/j.jcsr.2015.04.006</pub-id>
</mixed-citation>
</ref>
<ref id="B66">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Teh</surname>
<given-names>L. H.</given-names>
</name>
<name>
<surname>Uz</surname>
<given-names>M. E.</given-names>
</name>
</person-group> (<year>2015b</year>). <article-title>Ultimate shear-out capacities of structural-steel bolted connections</article-title>. <source>J. Struct. Eng.</source> <volume>141</volume> (<issue>6</issue>), <fpage>04014152</fpage>. <pub-id pub-id-type="doi">10.1061/(ASCE)ST.1943-541X.0001105</pub-id>
</mixed-citation>
</ref>
<ref id="B67">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Teh</surname>
<given-names>L. H.</given-names>
</name>
<name>
<surname>Uz</surname>
<given-names>M. E.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Combined bearing and shear-out capacity of structural steel bolted connections</article-title>. <source>J. Struct. Eng.</source> <volume>142</volume> (<issue>11</issue>), <fpage>04016098</fpage>. <pub-id pub-id-type="doi">10.1061/(ASCE)ST.1943-541X.0001573</pub-id>
</mixed-citation>
</ref>
<ref id="B68">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Torres</surname>
<given-names>J. L. C.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Kookalani</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Ecological engineering contributions to slope stability through root reinforcement in diverse soils</article-title>. <source>Appl. Sci.</source> <volume>15</volume>, <fpage>11810</fpage>. <pub-id pub-id-type="doi">10.3390/app152111810</pub-id>
</mixed-citation>
</ref>
<ref id="B69">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Y. B.</given-names>
</name>
<name>
<surname>Lyu</surname>
<given-names>Y. F.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>G. Q.</given-names>
</name>
<name>
<surname>Liew</surname>
<given-names>J. Y. R.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Behavior of single bolt bearing on high strength steel plate</article-title>. <source>J. Constr. Steel Res.</source> <volume>137</volume>, <fpage>19</fpage>&#x2013;<lpage>30</lpage>. <pub-id pub-id-type="doi">10.1016/j.jcsr.2017.06.001</pub-id>
</mixed-citation>
</ref>
<ref id="B70">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Kookalani</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020a</year>). <article-title>An analytic solution for form finding of GFRP elastic gridshells during lifting construction</article-title>. <source>Compos Struct.</source> <volume>244</volume>, <fpage>244</fpage>. <pub-id pub-id-type="doi">10.1016/j.compstruct.2020.112290</pub-id>
</mixed-citation>
</ref>
<ref id="B71">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Zou</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Kookalani</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020b</year>). <article-title>An integrated approach of form finding and construction simulation for glass fiber-reinforced polymer elastic gridshells</article-title>. <source>Struct. Des. Tall Special Build.</source> <volume>29</volume> (<issue>5</issue>), <fpage>e1698</fpage>. <pub-id pub-id-type="doi">10.1002/tal.1698</pub-id>
</mixed-citation>
</ref>
<ref id="B72">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Kookalani</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>An analytic approach to predict the shape and internal forces of barrel vault elastic gridshells during lifting construction</article-title>. <source>Structures</source> <volume>29</volume>, <fpage>628</fpage>&#x2013;<lpage>637</lpage>. <pub-id pub-id-type="doi">10.1016/j.istruc.2020.11.032</pub-id>
</mixed-citation>
</ref>
<ref id="B73">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>K. C.</given-names>
</name>
<name>
<surname>Hsu</surname>
<given-names>R. J.</given-names>
</name>
<name>
<surname>Hsu</surname>
<given-names>C. F.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Effect of end distance and bolt number on bearing strength of bolted connections at elevated temperature</article-title>. <source>Int. J. Steel Struct.</source> <volume>13</volume> (<issue>4</issue>), <fpage>635</fpage>&#x2013;<lpage>644</lpage>. <pub-id pub-id-type="doi">10.1007/s13296-013-4005-y</pub-id>
</mixed-citation>
</ref>
<ref id="B74">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zakir</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Sakil Ahmed</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Imtiaz Khan</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Ahmed</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Nehdi</surname>
<given-names>M. L.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Predicting bearing capacity of double shear bolted connections using machine learning</article-title>. <source>Eng. Struct.</source> <volume>251</volume>, <fpage>113497</fpage>. <pub-id pub-id-type="doi">10.1016/j.engstruct.2021.113497</pub-id>
</mixed-citation>
</ref>
<ref id="B75">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2012</year>). <source>Ensemble machine learning: methods and applications. Ensemble machine learning: methods and applications</source>. <publisher-name>Springer Science &#x26; Business Media</publisher-name>.</mixed-citation>
</ref>
<ref id="B76">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Nguyen</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Bui</surname>
<given-names>X. N.</given-names>
</name>
<name>
<surname>Pradhan</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Asteris</surname>
<given-names>P. G.</given-names>
</name>
<name>
<surname>Costache</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>A generalized artificial intelligence model for estimating the friction angle of clays in evaluating slope stability using a deep neural network and harris hawks optimization algorithm</article-title>. <source>Eng. Comput.</source> <volume>38</volume>, <fpage>3901</fpage>&#x2013;<lpage>3914</lpage>. <pub-id pub-id-type="doi">10.1007/s00366-020-01272-9</pub-id>
</mixed-citation>
</ref>
<ref id="B77">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Nguyen</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Nguyen-Thoi</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Asteris</surname>
<given-names>P. G.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Improved levenberg&#x2013;marquardt backpropagation neural network by particle swarm and whale optimization algorithms to predict the deflection of RC beams</article-title>. <source>Eng. Comput.</source> <volume>38</volume>, <fpage>3847</fpage>&#x2013;<lpage>3869</lpage>. <pub-id pub-id-type="doi">10.1007/s00366-020-01267-6</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/164286/overview">Vagelis Plevris</ext-link>, Qatar University, Qatar</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1442313/overview">Carlos Couto</ext-link>, University of Aveiro, Portugal</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3308318/overview">Yanping Zhu</ext-link>, Montana Technological University, United States</p>
</fn>
</fn-group>
</back>
</article>