<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article article-type="research-article" dtd-version="1.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mater.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Materials</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mater.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2296-8016</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1732297</article-id>
<article-id pub-id-type="doi">10.3389/fmats.2025.1732297</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Intelligent pavement moduli back-calculation using an SEM&#x2013;transformer framework</article-title>
<alt-title alt-title-type="left-running-head">Wang and Zhao</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fmats.2025.1732297">10.3389/fmats.2025.1732297</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Wang</surname>
<given-names>Guozhong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3255188"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhao</surname>
<given-names>Yanqing</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
</contrib-group>
<aff id="aff1">
<label>1</label>
<institution>School of Infrastructure Engineering, Dalian University of Technology</institution>, <city>Dalian</city>, <country country="CN">China</country>
</aff>
<aff id="aff2">
<label>2</label>
<institution>Shanxi Provincial Transportation Construction Engineering Quality Inspection Center (Co., Ltd.)</institution>, <city>Taiyuan</city>, <country country="CN">China</country>
</aff>
<aff id="aff3">
<label>3</label>
<institution>Department of Transportation and Logistics, Dalian University of Technology</institution>, <city>Dalian</city>, <country country="CN">China</country>
</aff>
<author-notes>
<corresp id="c001">
<label>&#x2a;</label>Correspondence: Guozhong Wang, <email xlink:href="mailto:wangguozhong_41@126.com">wangguozhong_41@126.com</email>
</corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-01-28">
<day>28</day>
<month>01</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>12</volume>
<elocation-id>1732297</elocation-id>
<history>
<date date-type="received">
<day>28</day>
<month>10</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>17</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>22</day>
<month>12</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Wang and Zhao.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Wang and Zhao</copyright-holder>
<license>
<ali:license_ref start_date="2026-01-28">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>This study proposes an intelligent back-calculation framework to estimate multilayer pavement elastic moduli from FWD deflection data under realistic measurement uncertainty. A spectral element method (SEM) model is used to simulate transient FWD responses and generate large-scale datasets. A Transformer regression model is trained to map peak deflection basins to layer moduli, considering four noise scenarios (no error, random, systematic, and combined). Baseline models (BPNN, SVR, and XGBoost) are also evaluated for comparison. The proposed SEM&#x2013;Transformer framework achieves strong accuracy and robustness, with average R<sup>2</sup>&gt;0.94 and MAPE &#x003C; 8% across all noise cases, and shows superior performance for the base course under noisy conditions. The results demonstrate a reliable and efficient data-driven feasibility framework to support pavement structural evaluation and future digital-twin-based pavement management.</p>
</abstract>
<kwd-group>
<kwd>data-driven modeling</kwd>
<kwd>FWD</kwd>
<kwd>intelligent back-calculation</kwd>
<kwd>intelligent maintenance</kwd>
<kwd>SEM</kwd>
<kwd>transformer</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work was supported by the National Natural Science Foundation of China NSFC (51678114), Urumqi Transportation Research Project (JSKJ201806), and Shanxi Province Transportation Research Project (19-JKKJ-4). The funder was not involved in the study design, collection, analysis, interpretation of data, the writing of this article, or the decision to submit it for publication.</funding-statement>
</funding-group>
<counts>
<fig-count count="8"/>
<table-count count="8"/>
<equation-count count="11"/>
<ref-count count="47"/>
<page-count count="00"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Computational Materials Science</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>The Falling Weight Deflectometer (FWD) test has become one of the most widely used nondestructive evaluation techniques for assessing pavement structural performance (<xref ref-type="bibr" rid="B13">Elbagalati et al., 2018</xref>; <xref ref-type="bibr" rid="B24">Nam et al., 2016</xref>; <xref ref-type="bibr" rid="B26">Plati et al., 2016</xref>). By applying an impulse load to the pavement surface and recording the resulting deflection data, the FWD test provides valuable information about the mechanical response of pavement layers. However, the measured surface deflections do not directly yield the material properties of each layer; therefore, an inverse analysis, commonly referred to as back-calculation, is required to estimate key parameters such as elastic moduli. Most current studies focus on the surface layer (<xref ref-type="bibr" rid="B3">Shamiyeh et al., 2022</xref>; <xref ref-type="bibr" rid="B27">Plati et al., 2024</xref>), lacking an overall performance evaluation of the pavement structure including the base layers (<xref ref-type="bibr" rid="B39">Yang et al., 2025</xref>). Accurate parameter back-calculation is essential for evaluating the structural integrity, residual life, and load-bearing capacity of pavements, serving as a foundation for performance prediction and maintenance decision-making. With the increasing demand for data-driven and intelligent infrastructure management, the integration of intelligent algorithms into the back-calculation process has emerged as a promising approach to enhance efficiency, robustness, and automation of pavement performance evaluation and smart maintenance systems.</p>
<p>Over the past several decades, numerous back-calculation methodologies have been developed to interpret FWD deflection data and estimate pavement layer moduli. Classical approaches, such as the layered elastic theory (LET) and finite element-based iterative algorithms, have formed the foundation of conventional inverse analysis. Early methods, such as the ILLI-BACK (<xref ref-type="bibr" rid="B16">Ioannides et al., 1989</xref>), BISDEF (<xref ref-type="bibr" rid="B4">Bush, 1985</xref>), CHEVDEF (<xref ref-type="bibr" rid="B5">Bush and Alexander, 1985</xref>) and MODCOMP (<xref ref-type="bibr" rid="B17">Irwin, 1994</xref>; <xref ref-type="bibr" rid="B18">Irwin and Szebenyi, 1983</xref>) or MODULUS (<xref ref-type="bibr" rid="B28">Scullion et al., 1990</xref>) programs, relied heavily on deterministic optimization techniques such as the regression formula based on experience, Newton-Raphson, gradient descent, or least-squares fitting. These methods typically minimize the discrepancy between measured and calculated deflections by repeatedly adjusting material parameters within predefined bounds. Although these traditional approaches have contributed significantly to the advancement of pavement evaluation, they suffer from several inherent limitations. The inverse problem is often ill-posed and highly nonlinear, making the solution sensitive to measurement noise and initial guesses (<xref ref-type="bibr" rid="B19">Jiang et al., 2022</xref>; <xref ref-type="bibr" rid="B33">Ullidtz, 1998</xref>). Moreover, conventional optimization algorithms tend to converge to local minima, require significant computational effort, and exhibit poor adaptability when dealing with complex pavement structures or large-scale datasets (<xref ref-type="bibr" rid="B10">Coletti et al., 2024</xref>; <xref ref-type="bibr" rid="B32">Torquato E Silva et al., 2025</xref>). The phenomenon of modulus layering, which undermines the credibility of the assessment results, occurs from time to time (<xref ref-type="bibr" rid="B37">Wang et al., 2024</xref>). These shortcomings highlight the need for more robust, efficient, and intelligent back-calculation strategies capable of capturing the nonlinear mapping between deflection responses and pavement material properties.</p>
<p>In recent years, the rapid development of artificial intelligence (AI) and machine learning (ML) techniques has provided new opportunities for solving the complex and nonlinear inverse problems in pavement engineering. Data-driven models, such as artificial neural networks (ANNs) (<xref ref-type="bibr" rid="B20">Khazanovich and Roesler, 1997</xref>; <xref ref-type="bibr" rid="B29">Sharma and Das, 2008</xref>; <xref ref-type="bibr" rid="B31">Tarefder et al., 2015</xref>), BPNN (<xref ref-type="bibr" rid="B23">Meier et al., 1997</xref>; <xref ref-type="bibr" rid="B35">Wang and Zhao, 2022</xref>), support vector machines (SVMs) (<xref ref-type="bibr" rid="B36">Wang et al., 2023</xref>; <xref ref-type="bibr" rid="B40">Zhang et al., 2021</xref>), and deep learning architectures (<xref ref-type="bibr" rid="B9">Chen et al., 2025</xref>), have been successfully applied to capture the intricate relationships between FWD deflection data and pavement parameters. These intelligent approaches overcome many limitations of traditional iterative methods by learning from large datasets and establishing direct mappings between input and output variables without the need for repeated forward simulations. Among various deep learning frameworks, Transformer-based models have recently attracted growing attention due to their outstanding ability to process sequential data and model long-range dependencies through self-attention mechanisms (<xref ref-type="bibr" rid="B34">Vaswani et al., 2017</xref>). Unlike conventional neural networks, Transformers can effectively learn complex spatial-mechanical correlations in multi-layer pavement systems, enabling more accurate and robust modulus back-calculation under uncertain or noisy measurement conditions. Consequently, the integration of Transformer architectures into FWD-based parameter back-calculation represents a promising direction toward automated, data-driven, and intelligent pavement evaluation and maintenance.</p>
<p>Beyond pavement engineering, physics-informed and data-driven inverse analysis based on indirect structural responses has been extensively investigated in broader civil and structural engineering domains. In the context of acoustic emission (AE)&#x2013;based damage identification, deep residual learning has been successfully applied to AE source localization in steel&#x2013;concrete composite slabs, demonstrating strong capability in learning inverse mappings under complex wave propagation conditions (<xref ref-type="bibr" rid="B46">Zhou et al., 2024b</xref>). AE-based data-driven approaches have also been employed for damage pattern recognition in corroded reinforced concrete beams strengthened with CFRP anchorage systems (<xref ref-type="bibr" rid="B25">Pan et al., 2023</xref>), as well as for localized corrosion-induced damage monitoring of large-scale RC piles in marine environments (<xref ref-type="bibr" rid="B42">Zheng et al., 2020</xref>), highlighting the effectiveness of deep learning in extracting damage-sensitive features from high-dimensional AE signals. In parallel, hybrid physics&#x2013;data-driven frameworks that integrate numerical modeling with deep learning have gained increasing attention. Representative examples include a hybrid FEM and 1D-CNN methodology for structural damage detection in typical high-pile wharves (<xref ref-type="bibr" rid="B45">Zhou et al., 2022</xref>). Moreover, vibration-based damage localization frameworks combining ambient vibration measurements with multi&#x2013;1D CNN ensemble models have been proposed and validated on large-scale reinforced concrete pedestrian bridges (<xref ref-type="bibr" rid="B47">Zhou et al., 2025b</xref>), demonstrating the scalability of data-driven inverse identification methods to complex, real-world structures. At a more fundamental level, lattice modeling approaches have been developed to simulate complete AE waveforms and fracture-induced AE wave propagation in concrete, providing physically interpretable forward models for inverse analysis (<xref ref-type="bibr" rid="B43">Zhou et al., 2024a</xref>; <xref ref-type="bibr" rid="B44">Zhou et al., 2025a</xref>).</p>
<p>Although these studies focus on different sensing modalities (AE or vibration) and structural systems, they share a common methodological paradigm with the present work: leveraging physics-based models to generate informative data and employing deep learning architectures to learn inverse mappings from indirect measurements to internal structural states. The proposed SEM&#x2013;Transformer framework follows this paradigm in the context of pavement engineering by integrating high-fidelity numerical simulations with attention-based learning for FWD-based modulus back-calculation.</p>
<p>With the advancement of sensing technologies and the increasing availability of large-scale pavement monitoring data, data-driven pavement management and intelligent maintenance systems have become an emerging trend in modern infrastructure engineering (<xref ref-type="bibr" rid="B14">Golmohammadi et al., 2025</xref>; <xref ref-type="bibr" rid="B21">Li et al., 2025</xref>; <xref ref-type="bibr" rid="B22">Lu et al., 2025</xref>). By integrating FWD test results with other sensing and inspection data, it is now possible to continuously evaluate pavement health conditions, predict performance degradation, and optimize maintenance scheduling through automated analytical frameworks. In this context, intelligent back-calculation serves as a crucial component of smart pavement management, enabling real-time structural assessment and decision support. Leveraging powerful deep learning models such as Transformers, the back-calculation of pavement mechanical parameters can be achieved with high efficiency and accuracy, supporting predictive maintenance and life-cycle performance optimization. Therefore, this study aims to develop a Transformer-based intelligent back-calculation framework for modulus back-calculation of pavements, providing a foundation for data-driven performance evaluation and intelligent pavement operation and maintenance.</p>
<p>The remainder of this paper is organized as follows. <xref ref-type="sec" rid="s2">Section 2</xref> introduces the overall methodology, including the spectral element method (SEM) for forward simulation, the Transformer-based intelligent back-calculation framework, and the evaluation metrics employed to assess model performance. <xref ref-type="sec" rid="s3">Section 3</xref> describes the procedures of data collection, extraction, and preprocessing, emphasizing the introduction of random and systematic measurement errors to simulate realistic field conditions. <xref ref-type="sec" rid="s4">Section 4</xref> presents the results and discussion, where the Transformer-based back-calculation model is comprehensively evaluated under four noise scenarios (no measurement error, random error, systematic error, and combined random&#x2013;systematic error) and benchmarked against representative machine learning models (BPNN, SVR, and XGBoost), followed by a comparative discussion of robustness and generalization, an assessment of the physical plausibility of the predicted moduli, and considerations regarding potential overfitting. Finally, <xref ref-type="sec" rid="s5">Section 5</xref> summarizes the main findings of this study and outlines potential directions for future research in intelligent pavement performance evaluation and maintenance.</p>
<p>It should be emphasized that the present study focuses on a numerical feasibility investigation, in which both training and testing datasets are generated using a validated spectral element method (SEM). Although synthetic noise is introduced to approximate typical measurement uncertainty in Falling Weight Deflectometer (FWD) tests, no field FWD dataset is directly used for model validation at this stage. Consequently, the primary objective of this work is to evaluate the learning capability, robustness, and stability of the proposed SEM&#x2013;Transformer framework under controlled yet realistic conditions, rather than to claim immediate applicability to in-service pavements.</p>
</sec>
<sec sec-type="methods" id="s2">
<label>2</label>
<title>Methodology</title>
<p>The overall workflow of the proposed intelligent back-calculation system integrates three key components: 1) numerical simulation of pavement responses using the SEM, 2) machine learning-based modulus prediction using the Transformer architecture, and 3) performance evaluation through multiple statistical metrics. The methodology is illustrated in <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Flowchart of the intelligent back-calculation methodology for pavement structure parameter prediction.</p>
</caption>
<graphic xlink:href="fmats-12-1732297-g001.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a three-phase process. Phase 1: Data collection and preprocessing involve numerical simulation, feature selection, error handling, and data preprocessing, splitting data into training and testing sets. Phase 2: Model construction and training use a transformer model with input embedding, multi-head attention, and output embedding. Phase 3: Model assessment and result analysis assess performance using MAE, MSE, RMSE, R-squared, and MAPE metrics, with a graph comparing predicted versus actual values.</alt-text>
</graphic>
</fig>
<sec id="s2-1">
<label>2.1</label>
<title>SEM</title>
<p>The SEM is employed to simulate the pavement surface deflection response under FWD loading. Compared with conventional finite element or finite difference schemes, the SEM achieves high accuracy by interpolating the field variables with high-order spectral shape functions within each element and by describing the distributed mass inertia exactly. In this study, a one-dimensional axisymmetric SEM formulation is adopted following <xref ref-type="bibr" rid="B41">Zhao et al. (2015)</xref> and <xref ref-type="bibr" rid="B7">Cao et al. (2020)</xref>. The layered pavement structure is modeled as a stack of homogeneous, isotropic layers characterized by thickness, elastic modulus, Poisson&#x2019;s ratio, and density, resting on a semi-infinite subgrade.</p>
<p>The governing equations of motion for the axisymmetric elastic medium are given in <xref ref-type="disp-formula" rid="e1">Equation 1</xref>:<disp-formula id="e1">
<mml:math id="m42">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2207;</mml:mo>
<mml:mo>&#x2207;</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
<mml:msup>
<mml:mo>&#x2207;</mml:mo>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c1;</mml:mi>
<mml:mover accent="true">
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mo>&#xa8;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>where <inline-formula id="inf42">
<mml:math id="m43">
<mml:mrow>
<mml:mi mathvariant="bold-italic">u</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represents the displacement vector composed of the radial component <inline-formula id="inf43">
<mml:math id="m44">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and the vertical component <inline-formula id="inf44">
<mml:math id="m45">
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf45">
<mml:math id="m46">
<mml:mrow>
<mml:mover accent="true">
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mo>&#xa8;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> refers to the acceleration vector, <inline-formula id="inf46">
<mml:math id="m47">
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> presents Lame&#x2019;s constant for the material, while <inline-formula id="inf47">
<mml:math id="m48">
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the shear modulus. <inline-formula id="inf48">
<mml:math id="m49">
<mml:mrow>
<mml:mo>&#x2207;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> denotes the gradient differential operator, <inline-formula id="inf49">
<mml:math id="m50">
<mml:mrow>
<mml:mo>&#x2207;</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mi mathvariant="bold-italic">u</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represents the divergence of <inline-formula id="inf50">
<mml:math id="m51">
<mml:mrow>
<mml:mi mathvariant="bold-italic">u</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf51">
<mml:math id="m52">
<mml:mrow>
<mml:msup>
<mml:mo>&#x2207;</mml:mo>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mi mathvariant="bold-italic">u</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represents the Laplacian of <inline-formula id="inf52">
<mml:math id="m53">
<mml:mrow>
<mml:mi mathvariant="bold-italic">u</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf53">
<mml:math id="m54">
<mml:mrow>
<mml:mi>&#x3c1;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> signifies the material density.</p>
<p>In the vertical direction, each pavement layer is discretized by a 2-node axisymmetric spectral layer element, while the semi-infinite subgrade is represented by a 1-node throw-off element that conducts energy out of the system. Each node carries two degrees of freedom (radial and vertical displacements). Within a spectral element, the displacement field is interpolated by high-order spectral shape functions constructed from Lagrange polynomials passing through Gauss&#x2013;Lobatto&#x2013;Legendre points, so that one spectral element per physical layer is sufficient and no further mesh refinement is required through the thickness.</p>
<p>In the radial direction, the domain is discretized by a graded mesh that is refined beneath and near the FWD loading area and gradually coarsened toward an outer truncation radius. This radius is chosen sufficiently large such that the computed surface vibration decays to negligible levels, which avoids spurious reflections from the lateral boundary. Axisymmetry is enforced at the centerline <inline-formula id="inf54">
<mml:math id="m55">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, and the pavement surface is traction-free outside the circular loading area where the FWD pressure is applied. At the bottom of the truncated domain, vertical displacement is fixed while radial displacement continuity is maintained through the throw-off spectral element to mimic the semi-infinite half-space.</p>
<p>The spatial discretization leads to a semi-discrete system of second-order ordinary differential equations in time. This system is advanced using an explicit central-difference time integration scheme, with the time step selected according to the standard SEM stability criterion based on the smallest element size and the maximum wave speed. Since all layers are modeled as linear elastic materials and no additional material or Rayleigh damping is introduced, the computed response corresponds to the undamped elastic wave propagation problem. The resulting SEM formulation has been validated in previous studies (<xref ref-type="bibr" rid="B7">Cao et al., 2020</xref>; <xref ref-type="bibr" rid="B41">Zhao et al., 2015</xref>), confirming its accuracy and stability for simulating pavement surface deflection histories. The peak values of the computed surface deflection basins at the sensor locations are then extracted and used as input features for the Transformer-based learning model described in <xref ref-type="sec" rid="s2-2">Section 2.2</xref>.</p>
</sec>
<sec id="s2-2">
<label>2.2</label>
<title>Intelligent back-calculation methodology</title>
<p>It should be clarified that the term physics-informed in this study refers to the use of SEM-based forward simulations to generate physically consistent training and testing datasets, rather than to the explicit enforcement of physical laws or inequality constraints within the neural network architecture itself. The Transformer model is trained as a data-driven regression mapping from deflection basins to layer elastic moduli and does not impose hard constraints such as modulus ordering or monotonicity during learning. In this study, an intelligent back-calculation framework based on the Transformer architecture is established to predict the elastic modulus of pavement layers from measured deflection data. The Transformer model, originally proposed by <xref ref-type="bibr" rid="B34">Vaswani et al. (2017)</xref>, has demonstrated exceptional performance in capturing long-range dependencies through its self-attention mechanism, making it well-suited for modeling complex nonlinear relationships in pavement structural systems.</p>
<sec id="s2-2-1">
<label>2.2.1</label>
<title>Overall structure of the transformer</title>
<p>As shown in <xref ref-type="fig" rid="F2">Figure 2</xref>, the proposed model is designed as an Encoder-only Transformer architecture specifically optimized for regression based back-calculation tasks. The input vector, composed of multiple deflection peaks extracted from FWD data, is first transformed into a high-dimensional feature representation through an input embedding layer, allowing the model to capture latent spatial and mechanical patterns. Within the Transformer encoder, each layer consists of two fundamental components: the Multi-Head Self-Attention (MHSA) mechanism and the feed-forward network (FFN). The MHSA module enables the model to learn global correlations among deflection points by dynamically computing the weighted relevance between all positions in the input sequence, effectively capturing inter peak dependencies that reflect subsurface mechanical interactions. The subsequent FFN applies nonlinear transformations to further refine and abstract the learned features, thereby enhancing the model&#x2019;s expressive capability. Each sublayer is enclosed within a residual connection and layer normalization (Add and Norm), which collectively stabilizes the training process, prevents gradient degradation, and accelerates convergence. Finally, the encoder output is passed through a regression head, which maps the learned feature representations to the predicted modulus values of each pavement layer, enabling accurate and interpretable estimation of structural parameters for intelligent pavement evaluation.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Overall architecture of the Transformer-based intelligent back-calculation model.</p>
</caption>
<graphic xlink:href="fmats-12-1732297-g002.tif">
<alt-text content-type="machine-generated">Diagram of a Transformer model architecture with Encoder and Decoder blocks. The Encoder includes Input Embedding, Positional Encoding, Multi-Head Attention, Add &#x26; Norm, and Feed Forward layers. The Decoder contains Masked Multi-Head Attention, Add &#x26; Norm, Feed Forward, Linear, ReLU, and Softmax layers. Arrows indicate data flow, and components are color-coded.</alt-text>
</graphic>
</fig>
<p>An encoder-only Transformer architecture is adopted in this study because the back-calculation task involves a fixed-length regression mapping from FWD deflection basins to elastic moduli, rather than a sequence-to-sequence or generative problem. The encoder-only design is therefore sufficient and computationally efficient. Compared with simpler architectures such as one-dimensional convolutional neural networks or attention-augmented multilayer perceptrons, the Transformer encoder enables direct modeling of global, nonlocal interactions among all deflection sensors through self-attention, without imposing predefined receptive fields or handcrafted feature aggregation rules.</p>
</sec>
<sec id="s2-2-2">
<label>2.2.2</label>
<title>Multi-head attention mechanism</title>
<p>The core of the Transformer lies in its multi-head attention mechanism, as shown in <xref ref-type="fig" rid="F3">Figure 3</xref>. For each attention head, the input sequence is linearly projected into three matrices: the Query (Q), Key (K), and Value (V). The scaled dot-product attention is computed as <xref ref-type="disp-formula" rid="e2">Equation 2</xref>:<disp-formula id="e2">
<mml:math id="m56">
<mml:mrow>
<mml:mtext>Attention</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>Q</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>K</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mtext>Softmax</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>Q</mml:mi>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:mrow>
<mml:msqrt>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:msqrt>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>where <inline-formula id="inf55">
<mml:math id="m57">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the dimensionality of the key vectors. Multiple attention heads operate in parallel to capture diverse feature interactions, and their outputs are concatenated and linearly transformed. This mechanism allows the model to learn complex dependencies between deflection measurements and corresponding modulus responses at multiple representation levels (<xref ref-type="bibr" rid="B11">Dosovitskiy et al., 2020</xref>).</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Structure of the multi-head attention mechanism.</p>
</caption>
<graphic xlink:href="fmats-12-1732297-g003.tif">
<alt-text content-type="machine-generated">Diagram of multi-head attention architecture in transformers. It includes Scaled Dot-Product Attention with components: MatMul, Scale, optional Mask, and SoftMax, followed by MatMul. Multiple heads process linear transformations of input vectors V, K, and Q, and results are concatenated. Output passes through a Linear layer.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s2-2-3">
<label>2.2.3</label>
<title>Application to pavement modulus back-calculation</title>
<p>The developed Transformer model is employed to perform back-calculation of pavement layer elastic moduli from FWD deflection data. It learns a direct mapping between surface deflection basins (either measured in the field or generated through numerical simulations) and the elastic moduli of individual pavement layers. Through supervised training on paired datasets of deflection responses and known material parameters, the model captures the complex nonlinear relationships between surface mechanical behavior and the internal structural characteristics of the pavement system. Unlike conventional iterative back-calculation algorithms that rely heavily on initial guesses and are prone to convergence to local minima, the Transformer exploits a data-driven learning mechanism and attention-based architecture to achieve high generalization performance across diverse pavement configurations, while also enabling efficient parallel computation and substantially reducing computational time. In addition, by incorporating global contextual dependencies among sensor readings, the Transformer exhibits strong robustness to measurement noise and maintains prediction stability under uncertain or imperfect data conditions. These characteristics make it a powerful and intelligent approach for accurately estimating layer moduli in multilayer pavement systems, thereby providing a solid foundation for automated and reliable pavement evaluation and maintenance decision-making.</p>
<p>The nonlinear mapping from the FWD deflection basin to the layer elastic moduli is realized by an encoder-only Transformer architecture. The model input is the vector of nine peak surface deflections <inline-formula id="inf56">
<mml:math id="m58">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>J</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mfrac>
<mml:mn>9</mml:mn>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula>, measured at radial distances <inline-formula id="inf57">
<mml:math id="m59">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>20</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>30</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>50</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>80</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>110</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>140</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>170</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>200</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> mm from the load center. To better exploit both the magnitude and spatial layout of these measurements, the raw deflections are first processed by a dedicated embedding module, denoted as PeakEmbed. In this module, each scalar peak deflection is projected from <inline-formula id="inf58">
<mml:math id="m60">
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> into a <inline-formula id="inf59">
<mml:math id="m61">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mtext>model</mml:mtext>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>128</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>-dimensional feature space by a fully connected layer. A sinusoidal positional encoding, similar to that used in the original Transformer formulation, is then added to retain the ordered sensor index information along the radial direction. Furthermore, the physical sensor spacing is explicitly encoded through a small multilayer perceptron (MLP) that maps the normalized sensor distance (in meters) through a 1&#x2013;128&#x2013;128 MLP with ReLU activation. The output of this distance MLP is added elementwise to the peak-value embedding, so that the final token representation accounts for both the measured deflection and its radial location. This procedure yields an input sequence of length <inline-formula id="inf60">
<mml:math id="m62">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>9</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> with feature dimension <inline-formula id="inf61">
<mml:math id="m63">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mtext>model</mml:mtext>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>128</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>On top of the PeakEmbed module, we employ an encoder-only Transformer with <inline-formula id="inf62">
<mml:math id="m64">
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mtext>enc</mml:mtext>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> identical encoder layers. Each encoder layer is implemented using the standard TransformerEncoderLayer in PyTorch with batch_first &#x3d; True. The multi-head self-attention (MHSA) block uses <inline-formula id="inf63">
<mml:math id="m65">
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mtext>head</mml:mtext>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> attention heads, leading to key and value dimensions <inline-formula id="inf64">
<mml:math id="m66">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>v</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mtext>model</mml:mtext>
</mml:msub>
<mml:mo>/</mml:mo>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mtext>head</mml:mtext>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>32</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> for each head. The position-wise feed-forward network (FFN) in each encoder layer consists of two fully connected layers with an intermediate dimension <inline-formula id="inf65">
<mml:math id="m67">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mtext>ff</mml:mtext>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>256</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> and a ReLU nonlinearity. Residual connections, layer normalization, and dropout are applied around both the MHSA and FFN sublayers following the default PyTorch implementation, with a dropout rate of 0.1 in each encoder layer. To obtain a compact representation of the entire deflection basin, a learnable &#x201c;[CLS]&#x201d; token of size <inline-formula id="inf66">
<mml:math id="m68">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>128</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> is prepended to the embedded sequence. The CLS token is introduced as a learnable global representation to aggregate information from all sensor tokens through self-attention. Although the input deflection vector has a fixed length, the CLS-based aggregation provides a principled alternative to fixed pooling operations (e.g., mean or max pooling) and allows the model to adaptively learn the relative contribution of each deflection measurement to the inverse mapping. The concatenated sequence (CLS token plus nine sensor tokens) is passed through the Transformer encoder, and only the output corresponding to the CLS position is retained as a global feature vector. This global vector is then mapped to the target elastic moduli through a regression head comprising a two-layer MLP: a fully connected layer from 128 to 128 units with ReLU activation, followed by a linear layer from 128 to 3 units. The three outputs correspond to the standardized (Z-score) elastic moduli <inline-formula id="inf67">
<mml:math id="m69">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> of the surface layer, base layer, and subgrade, respectively.</p>
<p>Prior to training, both the input peak deflections and the output moduli are standardized by Z-score normalization using statistics (mean and standard deviation) computed solely from the training subset. The Transformer is trained to minimize the Smooth L1 loss (Smooth L1 Loss with <inline-formula id="inf68">
<mml:math id="m70">
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.5</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>) between the predicted and true standardized moduli, which provides a compromise between the robustness of L1 loss and the sensitivity of L2 loss to small errors. The optimizer is AdamW with an initial learning rate of <inline-formula id="inf69">
<mml:math id="m71">
<mml:mrow>
<mml:mn>1.0</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:msup>
<mml:mn>10</mml:mn>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and a weight decay of <inline-formula id="inf70">
<mml:math id="m72">
<mml:mrow>
<mml:mn>1.0</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:msup>
<mml:mn>10</mml:mn>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. Training is performed for 90 epochs with a mini-batch size of 256, on a GPU when available or otherwise on a CPU. During training, we monitor the average training loss in the standardized space as well as the mean absolute error (MAE) on the held-out test set to verify convergence and stability.</p>
<p>The number of encoder layers and attention heads was selected based on empirical trade-offs between model capacity, training stability, and overfitting risk. Given the relatively small number of input sensors (nine deflection measurements) and the synthetic nature of the dataset, deeper or wider Transformer configurations were found to offer limited performance gains while increasing computational cost and susceptibility to overfitting. Accordingly, a compact configuration with two encoder layers and four attention heads was adopted as a balanced and reproducible design for the present feasibility study.</p>
<p>All key architectural and training hyperparameters of the Transformer model are summarized in <xref ref-type="table" rid="T1">Table 1</xref> for ease of reference and reproducibility.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Transformer architecture and training hyperparameters used for the back-calculation of multilayer pavement elastic moduli from FWD deflection data.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Item</th>
<th align="center">Description</th>
<th align="center">Value/setting</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Input dimension</td>
<td align="center">Number of FWD peak deflections in each sample</td>
<td align="center">9 sensors</td>
</tr>
<tr>
<td align="center">Sensor offsets</td>
<td align="center">Radial distances of FWD sensors from load center</td>
<td align="center">0, 20, 30, 50, 80, 110, 140, 170, 200 mm</td>
</tr>
<tr>
<td align="center">Input embedding (PeakEmbed)</td>
<td align="center">Linear projection from scalar peak value to model space</td>
<td align="center">Linear (1 &#x2192; 128)</td>
</tr>
<tr>
<td align="center">Positional encoding</td>
<td align="center">Sinusoidal positional encoding added to each sensor token</td>
<td align="center">Sinusoidal, max length &#x2265;64</td>
</tr>
<tr>
<td align="center">Distance embedding</td>
<td align="center">MLP applied to normalized sensor distance (in m) and added to token features</td>
<td align="center">MLP: 1 &#x2192; 128 &#x2192; 128, ReLU</td>
</tr>
<tr>
<td align="center">Model dimension</td>
<td align="center">Feature dimension of all tokens</td>
<td align="center">
<inline-formula id="inf71">
<mml:math id="m73">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mtext>model</mml:mtext>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>128</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="center">Encoder type</td>
<td align="center">Encoder-only transformer (TransformerEncoder)</td>
<td align="center">PyTorch implementation</td>
</tr>
<tr>
<td align="center">Number of encoder layers</td>
<td align="center">Stacked self-attention &#x2b; FFN blocks</td>
<td align="center">
<inline-formula id="inf72">
<mml:math id="m74">
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mtext>enc</mml:mtext>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="center">Number of attention heads</td>
<td align="center">Heads in multi-head self-attention</td>
<td align="center">
<inline-formula id="inf73">
<mml:math id="m75">
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mtext>head</mml:mtext>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="center">Key/value dimension per head</td>
<td align="center">Dimension of key and value vectors</td>
<td align="center">
<inline-formula id="inf74">
<mml:math id="m76">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>v</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>32</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="center">Feed-forward dimension</td>
<td align="center">Hidden size of position-wise FFN</td>
<td align="center">
<inline-formula id="inf75">
<mml:math id="m77">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mtext>ff</mml:mtext>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>256</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="center">FFN activation</td>
<td align="center">Nonlinear activation in FFN</td>
<td align="center">ReLU</td>
</tr>
<tr>
<td align="center">Dropout in encoder</td>
<td align="center">Dropout rate in each encoder layer</td>
<td align="center">0.1 (PyTorch default)</td>
</tr>
<tr>
<td align="center">Normalization</td>
<td align="center">Layer normalization with residual connections</td>
<td align="center">Post-attention and post-FFN</td>
</tr>
<tr>
<td align="center">CLS token</td>
<td align="center">Learnable global token prepended to sequence</td>
<td align="center">1 &#xd7; 1 &#xd7; 128 parameter</td>
</tr>
<tr>
<td align="center">Regression head</td>
<td align="center">MLP mapping CLS representation to moduli</td>
<td align="center">Linear (128 &#x2192; 128) &#x2b; ReLU &#x2b; Linear (128 &#x2192; 3)</td>
</tr>
<tr>
<td align="center">Output dimension</td>
<td align="center">Number of target variables</td>
<td align="center">3 (E1, E2, E3)</td>
</tr>
<tr>
<td align="center">Normalization of inputs/outputs</td>
<td align="center">z-score normalization using training statistics</td>
<td align="center">Mean and std from training set only</td>
</tr>
<tr>
<td align="center">Loss function</td>
<td align="center">Objective for regression</td>
<td align="center">Smooth L1 (&#x3b2; &#x3d; 0.5)</td>
</tr>
<tr>
<td align="center">Optimizer</td>
<td align="center">Optimization algorithm</td>
<td align="center">AdamW</td>
</tr>
<tr>
<td align="center">Initial learning rate</td>
<td align="center">Base learning rate for AdamW</td>
<td align="center">
<inline-formula id="inf76">
<mml:math id="m78">
<mml:mrow>
<mml:mn>1.0</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:msup>
<mml:mn>10</mml:mn>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="center">Weight decay</td>
<td align="center">L2 regularization through AdamW</td>
<td align="center">
<inline-formula id="inf77">
<mml:math id="m79">
<mml:mrow>
<mml:mn>1.0</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:msup>
<mml:mn>10</mml:mn>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="center">Batch size</td>
<td align="center">Mini-batch size during training</td>
<td align="center">256</td>
</tr>
<tr>
<td align="center">Number of epochs</td>
<td align="center">Training iterations over the dataset</td>
<td align="center">90</td>
</tr>
<tr>
<td align="center">Computation device</td>
<td align="center">Hardware used for training</td>
<td align="center">GPU if available, otherwise CPU</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s2-3">
<label>2.3</label>
<title>Evaluation metrics</title>
<p>The model&#x2019;s predictive performance is evaluated using five standard statistical metrics: MAE, Mean Squared Error (MSE), Root Mean Squared Error (RMSE), Mean Absolute Percentage Error (MAPE), and the Coefficient of Determination (R<sup>2</sup>). These metrics assess both the magnitude and distribution of prediction errors, providing a comprehensive evaluation of model accuracy.</p>
<sec id="s2-3-1">
<label>2.3.1</label>
<title>MAE</title>
<p>The MAE calculates the average magnitude of the absolute differences between predicted values and observed values. It is a linear score, meaning all individual differences are weighted equally in the average. MAE (<xref ref-type="disp-formula" rid="e3">Equation 3</xref>) avoids the issue of error cancellation and thus accurately reflects the actual size of the prediction errors (<xref ref-type="bibr" rid="B38">Wudil et al., 2024</xref>).<disp-formula id="e3">
<mml:math id="m80">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
</p>
</sec>
<sec id="s2-3-2">
<label>2.3.2</label>
<title>MSE</title>
<p>The MSE is a statistical metric used to evaluate the accuracy of a model. It is calculated by taking the average of the squared differences between the actual and predicted values (<xref ref-type="bibr" rid="B15">Goodfellow et al., 2016</xref>). MSE (<xref ref-type="disp-formula" rid="e4">Equation 4</xref>) is sensitive to outliers&#x2014;since large deviations between predictions and true values become even larger after squaring&#x2014;but this property also allows it to effectively reflect the overall distribution of prediction errors.<disp-formula id="e4">
<mml:math id="m81">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
</p>
</sec>
<sec id="s2-3-3">
<label>2.3.3</label>
<title>RMSE</title>
<p>The RMSE (<xref ref-type="disp-formula" rid="e5">Equation 5</xref>) represents the sample standard deviation of the differences&#x2014;known as residuals&#x2014;between predicted and observed values (<xref ref-type="bibr" rid="B6">Bypour et al., 2024</xref>). It indicates the degree of dispersion of the sample errors. In practical measurements, the number of observations <inline-formula id="inf78">
<mml:math id="m82">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is always limited, and the true value can only be approximated by the most reliable (best-estimated) value.<disp-formula id="e5">
<mml:math id="m83">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>M</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
</p>
</sec>
<sec id="s2-3-4">
<label>2.3.4</label>
<title>MAPE</title>
<p>The MAPE (<xref ref-type="disp-formula" rid="e6">Equation 6</xref>) is a statistical metric used to measure the degree of error between predicted and actual values (<xref ref-type="bibr" rid="B8">Chen et al., 2024</xref>). It is calculated by taking the absolute difference between the predicted and actual values as a percentage of the actual value, and then averaging these percentages to reflect the overall accuracy of the predictions.<disp-formula id="e6">
<mml:math id="m84">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="&#x7c;">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>100</mml:mn>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
</p>
</sec>
<sec id="s2-3-5">
<label>2.3.5</label>
<title>R<sup>2</sup>
</title>
<p>The <inline-formula id="inf79">
<mml:math id="m85">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="normal">R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> (<xref ref-type="disp-formula" rid="e7">Equation 7</xref>) is a statistical measure based on the decomposition of the total sum of squares, used to evaluate how well a regression model fits the observed data. It represents the proportion of variance in the dependent variable that is explained by the regression model (<xref ref-type="bibr" rid="B12">Draper and Smith, 1998</xref>). Therefore, the higher the <inline-formula id="inf80">
<mml:math id="m86">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="normal">R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> value, the better the model fits the data.<disp-formula id="e7">
<mml:math id="m87">
<mml:mrow>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>where <inline-formula id="inf81">
<mml:math id="m88">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the predicted value, <inline-formula id="inf82">
<mml:math id="m89">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the actual value, <inline-formula id="inf83">
<mml:math id="m90">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> the average value of the actual values, and <inline-formula id="inf84">
<mml:math id="m91">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> indicates the number of samples. Smaller values of MAE, MSE, RMSE, and MAPE indicate better predictive performance of the model, and the closer the value of R<sup>2</sup> is to 1, the better the performance of the model is.</p>
</sec>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Data collection, extraction and preprocessing</title>
<p>This section presents the complete workflow of data preparation for the intelligent back-calculation model, as illustrated in <xref ref-type="fig" rid="F4">Figure 4</xref>. The entire process integrates four major stages: numerical simulation, feature extraction, noise processing, and data preprocessing.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Flowchart of Data collection, extraction and preprocessing.</p>
</caption>
<graphic xlink:href="fmats-12-1732297-g004.tif">
<alt-text content-type="machine-generated">Diagram showing an applied load of 0.7 MPa on a pavement with measurement points labeled D1 to D9. A graph compares deflection peaks from numerical simulations and SEM in millimeters. The workflow includes data collection, error handling, feature selection, and database management. Various error scenarios are listed: none, random error, systematic error, and a combination of both.</alt-text>
</graphic>
</fig>
<p>Firstly, a three-layer pavement system comprising surface course, base course, and subgrade is modeled, and FWD loading is applied to reproduce field testing conditions. The dynamic responses of the pavement structure are computed using the SEM, which offers high precision and computational efficiency for transient wave propagation in layered media.</p>
<p>Secondly, the simulated deflection time histories at multiple measurement points are analyzed to obtain the peak deflection values, which serve as representative features reflecting the stiffness characteristics of the pavement layers. These extracted features are paired with their corresponding layer moduli to form the raw dataset.</p>
<p>Thirdly, in order to account for possible measurement uncertainty and improve model generalization, noise processing is introduced. Synthetic noise consistent with the statistical properties of field measurements is added to part of the dataset, simulating realistic variability in FWD test data.</p>
<p>Finally, the dataset undergoes preprocessing steps, including data normalization (via z-score standardization), sample shuffling, and train-test partitioning. These operations ensure that all features are dimensionally comparable and that the Transformer model can achieve stable convergence during training.</p>
<p>Overall, this systematic data preparation framework establishes a solid foundation for the subsequent intelligent back-calculation analysis, ensuring both the physical realism of the inputs and the statistical robustness of the learning process.</p>
<sec id="s3-1">
<label>3.1</label>
<title>Data collection and extraction</title>
<p>To train and evaluate the Transformer-based intelligent back-calculation model, a large-scale synthetic dataset was established through numerical simulations using the validated SEM model. The SEM approach provides high computational efficiency and accuracy for solving dynamic response problems of layered pavement systems, making it particularly suitable for simulating FWD tests.</p>
<p>The modeled pavement structure consists of three primary layers: a surface course, a base course, and a subgrade. Each layer is characterized by its elastic modulus, Poisson&#x2019;s ratio, and thickness, as summarized in <xref ref-type="table" rid="T2">Table 2</xref>. In the SEM simulations, pavement layers are modeled as linear elastic materials, and light viscous damping is introduced at the dynamic response level, following standard practice in SEM-based dynamic analysis of pavements (<xref ref-type="bibr" rid="B1">Al-Khoury et al., 2001</xref>; <xref ref-type="bibr" rid="B2">2002</xref>). The mechanical parameters were randomly combined within reasonable engineering ranges to ensure adequate representation of various pavement conditions, resulting in a total of 20592 combinations of pavement structures.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Structure information of asphalt pavements.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Layer</th>
<th align="center">Thickness (cm)</th>
<th align="center">Modulus (MPa)</th>
<th align="center">Poisson&#x2019;s ratio</th>
<th align="center">Density (kg/m<sup>3</sup>)</th>
<th align="center">Damping ratio</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Surface course</td>
<td align="center">10, 15, 20</td>
<td align="center">2500, 5000, 7500, 10000, 12500, 15000, 17500, 20000, 22500, 25000, 27500, 30000</td>
<td align="center">0.30</td>
<td align="center">2400</td>
<td align="center">0.02</td>
</tr>
<tr>
<td align="center">Base course</td>
<td align="center">20, 30, 40, 50</td>
<td align="center">4000, 5500, 7000, 8500, 10000, 11500, 13000, 14500, 16000, 17500, 19000</td>
<td align="center">0.25</td>
<td align="center">2200</td>
<td align="center">0.02</td>
</tr>
<tr>
<td align="center">Subgrade</td>
<td align="center">-</td>
<td align="center">40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100</td>
<td align="center">0.40</td>
<td align="center">1600</td>
<td align="center">0.05</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>During the simulation, the FWD test applies an impulsive load to the pavement surface in the form of a half-sine pulse with a peak pressure of 0.7 MPa and a duration of 25 ms. The loading plate radius is set to 15 cm, following standard FWD testing procedures. The pavement response was monitored at nine measurement points located at 0, 20, 30, 50, 80, 110, 140, 170, and 200 cm from the load center, corresponding to the typical sensor arrangement used in field testing.</p>
<p>For each simulation case, the SEM model outputs the deflection time history at all nine sensors. The peak deflection values were extracted from these time histories using an automated peak detection algorithm, representing the maximum surface displacement under the dynamic load. These peak values form the input features for the back-calculation model, while the corresponding elastic moduli of the three pavement layers serve as the output targets. Consequently, a comprehensive dataset with nine input variables (deflection peaks) and three output variables (layer moduli) was constructed, containing 20592 samples in total. This dataset was subsequently normalized and divided into training and testing subsets for the Transformer model development and performance evaluation.</p>
</sec>
<sec id="s3-2">
<label>3.2</label>
<title>Data preprocessing</title>
<sec id="s3-2-1">
<label>3.2.1</label>
<title>Noise processing</title>
<p>From each simulated time-domain response, the peak deflection values are extracted as input features. To account for potential measurement imperfections in real-world applications, random and systematic errors are introduced into the simulated deflection data to simulate realistic noise conditions. The corresponding error assumptions are listed in <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<p>From each simulated time-domain response, the peak deflection values are extracted as input features representing the pavement structural stiffness. However, the idealized numerical simulations do not fully reflect the uncertainties that commonly occur in field FWD measurements, such as sensor inaccuracies, temperature effects, and load plate contact variations. To account for these potential measurement imperfections and to enhance the model&#x2019;s robustness, different error treatment strategies were implemented during data preprocessing, as summarized in <xref ref-type="table" rid="T3">Table 3</xref>.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Specific information of measurement errors.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Measurement errors</th>
<th align="center">Specific assumed values</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Random error <inline-formula id="inf85">
<mml:math id="m92">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3b5;</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>r</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf86">
<mml:math id="m93">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3b5;</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>r</mml:mi>
</mml:msubsup>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="center">Systematic error <inline-formula id="inf87">
<mml:math id="m94">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3b5;</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf88">
<mml:math id="m95">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>4</mml:mn>
<mml:mo>%</mml:mo>
<mml:mo>&#x2264;</mml:mo>
<mml:msubsup>
<mml:mi>&#x3b5;</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
<mml:mo>&#x2264;</mml:mo>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>4</mml:mn>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Four distinct data processing scenarios were designed to assess the model&#x2019;s sensitivity to measurement noise:</p>
<sec id="s3-2-1-1">
<label>3.2.1.1</label>
<title>Case &#x2460;: No error treatment</title>
<p>The original simulated deflection data are used directly without any modification. This serves as the baseline condition, representing an ideal, noise-free environment where the inverse model is trained purely on clean data.</p>
</sec>
<sec id="s3-2-1-2">
<label>3.2.1.2</label>
<title>Case &#x2461;: Random error only</title>
<p>In this scenario, only random errors are introduced to each deflection value to simulate stochastic disturbances arising from equipment fluctuations or environmental noise. The random error term, denoted as <inline-formula id="inf89">
<mml:math id="m96">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3b5;</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>r</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, follows a Gaussian distribution <inline-formula id="inf90">
<mml:math id="m97">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3b5;</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>r</mml:mi>
</mml:msubsup>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, meaning the noise has a zero mean and a standard deviation of 2 &#x3bc;m, consistent with typical FWD sensor resolution limits (<xref ref-type="bibr" rid="B30">Stubstad et al., 2000</xref>).</p>
</sec>
<sec id="s3-2-1-3">
<label>3.2.1.3</label>
<title>Case &#x2462;: Systematic error only</title>
<p>To represent bias-type deviations caused by sensor miscalibration, temperature drift, or uneven load application, a systematic error term <inline-formula id="inf91">
<mml:math id="m98">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3b5;</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is applied uniformly across all sensors in each test. The error magnitude is randomly selected within the range from 4% to &#x2b;4%, implying either underestimation or overestimation of the deflection amplitude by the entire measurement system.</p>
</sec>
<sec id="s3-2-1-4">
<label>3.2.1.4</label>
<title>Case &#x2463;: Combined random and systematic errors</title>
<p>In the most realistic scenario, both error components are simultaneously introduced. The final deflection at measurement point <italic>i</italic> is expressed as<disp-formula id="equ1">
<mml:math id="m99">
<mml:mrow>
<mml:msubsup>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
<mml:mo>&#x2a;</mml:mo>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:msubsup>
<mml:mi>&#x3b5;</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:msubsup>
<mml:mi>&#x3b5;</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>r</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf92">
<mml:math id="m100">
<mml:mrow>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the true simulated deflection, <inline-formula id="inf93">
<mml:math id="m101">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3b5;</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> the systematic error, and <inline-formula id="inf94">
<mml:math id="m102">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3b5;</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>r</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> the random error. This condition closely emulates the uncertainty characteristics encountered in actual FWD testing, where both instrument bias and stochastic fluctuations coexist.</p>
<p>Through this four-level noise injection strategy, the constructed datasets enable comprehensive evaluation of the Transformer model&#x2019;s robustness, generalization capability, and resistance to measurement uncertainty, ensuring its applicability to real-world pavement deflection data.</p>
<p>It should be emphasized that the present dataset is fully generated from numerical simulations, and the introduced noise scenarios only approximate, rather than fully reproduce, the complexity of real FWD measurements. In practice, measurement errors may exhibit spatial correlation among sensors, time-dependent drift, temperature-induced bias, and coupling effects between sensors and pavement surface conditions. These factors are not explicitly modeled in the current study. Therefore, the adopted noise model should be regarded as a first-order representation designed to test model robustness, rather than a comprehensive description of field measurement uncertainty.</p>
</sec>
</sec>
<sec id="s3-2-2">
<label>3.2.2</label>
<title>Z-score normalization</title>
<p>The deflection data and modulus labels are standardized using Z-score normalization, ensuring zero mean and unit variance. This normalization helps maintain numerical stability and prevents feature dominance caused by scale differences. Z-score normalization is a method that transforms data into a distribution with zero mean and unit variance by subtracting the mean and dividing by the standard deviation. Before model training, both the input features (FWD peak deflections) and output variables (elastic moduli of the three pavement layers) were standardized using the Z-score method to eliminate dimensional differences and improve numerical stability during training. Let the training samples be denoted as <inline-formula id="inf95">
<mml:math id="m103">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf96">
<mml:math id="m104">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, with their respective means and standard deviations represented by <inline-formula id="inf97">
<mml:math id="m105">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf98">
<mml:math id="m106">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. The standardization formulas are as follows:<disp-formula id="e8">
<mml:math id="m107">
<mml:mrow>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>
<disp-formula id="e9">
<mml:math id="m108">
<mml:mrow>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>j</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>j</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>
</p>
<p>Both model training and prediction are performed in the standardized space. The means <inline-formula id="inf99">
<mml:math id="m109">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3bc;</mml:mi>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf100">
<mml:math id="m110">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3bc;</mml:mi>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and standard deviations <inline-formula id="inf101">
<mml:math id="m111">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf102">
<mml:math id="m112">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> used in <xref ref-type="disp-formula" rid="e8">Equations 8</xref>&#x2013;<xref ref-type="disp-formula" rid="e10">10</xref> are computed exclusively from the training subset. The same statistics are then applied to normalize the validation and test subsets, and to inverse-transform the predicted outputs back to physical units (MPa). This protocol ensures that no information from the validation or test data &#x201c;leaks&#x201d; into the training process through normalization, and that the reported performance truly reflects the model&#x2019;s generalization capability. After prediction, the results are transformed back to the physical units (MPa) through an inverse transformation as follows:<disp-formula id="e10">
<mml:math id="m113">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>j</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:msubsup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>j</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>where <inline-formula id="inf103">
<mml:math id="m114">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the original value of the <inline-formula id="inf104">
<mml:math id="m115">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
<sub>th</sub> input feature for the <inline-formula id="inf105">
<mml:math id="m116">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
<sub>th</sub> sample (for example, the peak deflection measured by the <inline-formula id="inf106">
<mml:math id="m117">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
<sub>th</sub> sensor); <inline-formula id="inf107">
<mml:math id="m118">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the original target value of the <inline-formula id="inf108">
<mml:math id="m119">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
<sub>th</sub> output variable for the <inline-formula id="inf109">
<mml:math id="m120">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th sample (i.e., the elastic modulus of the corresponding layer, in MPa). <inline-formula id="inf110">
<mml:math id="m121">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf111">
<mml:math id="m122">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are the mean and standard deviation of the <inline-formula id="inf112">
<mml:math id="m123">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
<sub>th</sub> input feature in the training set, respectively. <inline-formula id="inf113">
<mml:math id="m124">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>j</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf114">
<mml:math id="m125">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>j</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> denote the mean and standard deviation of the <inline-formula id="inf115">
<mml:math id="m126">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th output variable (modulus) in the training set. <inline-formula id="inf116">
<mml:math id="m127">
<mml:mrow>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> represents the dimensionless value obtained by applying Z-score normalization to the input feature <inline-formula id="inf117">
<mml:math id="m128">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula id="inf118">
<mml:math id="m129">
<mml:mrow>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the normalized value of the output variable <inline-formula id="inf119">
<mml:math id="m130">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula id="inf120">
<mml:math id="m131">
<mml:mrow>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the model&#x2019;s predicted output in the standardized space; and <inline-formula id="inf121">
<mml:math id="m132">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the actual predicted value after inverse standardization (in MPa). The notation &#x201c;<inline-formula id="inf122">
<mml:math id="m133">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mo>&#xb7;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>&#x201d; indicates a standardized variable, while the &#x201c;&#x5e;&#x201d; symbol denotes a predicted value. To avoid numerical instability, a lower bound correction is applied to very small standard deviations, defined as <inline-formula id="inf123">
<mml:math id="m134">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>max</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mn>10</mml:mn>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>8</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. Both training and testing data are standardized using the statistics (<inline-formula id="inf124">
<mml:math id="m135">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) computed from the training set to prevent data leakage.</p>
<p>For each noise scenario, the synthetic SEM-based dataset of noiseless input&#x2013;output pairs is first generated and then randomly partitioned into two mutually exclusive subsets, with 70% of the samples used for training and 30% reserved for testing, using a fixed random seed to ensure reproducibility (<xref ref-type="sec" rid="s3-1">Section 3.1</xref>). The clean deflection basins are computed from the SEM model and, after this train&#x2013;test split, are corrupted by the prescribed noise models: random measurement noise is introduced by adding zero-mean Gaussian noise to each sensor deflection with a standard deviation proportional to the local peak deflection magnitude, whereas systematic noise is represented by a constant bias term applied to all sensors; a combined-noise case is constructed by superposing the random and systematic components. Prior to training, both the input peak deflections and the output elastic moduli are standardized via Z-score normalization. All standardization operations are performed in a strictly training-only manner to avoid data leakage: the means and standard deviations of the input and output variables are computed from the training subset only and then used to normalize both the training and test data, as well as to inverse-transform the model predictions back to physical units (MPa). The normalized training subset is finally fed to the Transformer model described in <xref ref-type="sec" rid="s2-2">Section 2.2</xref>, and model training and loss computation are carried out entirely in this standardized space.</p>
</sec>
</sec>
</sec>
<sec sec-type="results|discussion" id="s4">
<label>4</label>
<title>Results and discussion</title>
<p>This section presents a comprehensive evaluation of the Transformer-based intelligent back-calculation model under four distinct noise conditions: (1) no measurement error, (2) random error, (3) systematic error, and (4) combined random and systematic error. Each condition corresponds to a realistic field scenario, reflecting the influence of measurement imperfections in FWD testing. Model performance was quantitatively assessed using the MAE, MSE, RMSE, MAPE, and R<sup>2</sup>. These metrics were calculated for each pavement layer (surface course, base course, and subgrade) as well as averaged over all layers to provide a holistic understanding of model behavior.</p>
<sec id="s4-1">
<label>4.1</label>
<title>Model performance without measurement error</title>
<p>The benchmark case, without any added noise, represents the ideal data condition for model evaluation. As shown in <xref ref-type="fig" rid="F5">Figure 5</xref>; <xref ref-type="table" rid="T4">Table 4</xref>, the Transformer model demonstrates excellent agreement between predicted and true elastic moduli for all pavement layers. The predicted points in <xref ref-type="fig" rid="F5">Figure 5</xref> closely follow the 1:1 reference line, indicating strong consistency across the entire modulus range.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Comparison between predicted and true moduli for all pavement layers with no measurement error: <bold>(a)</bold> surface course modulus E<sub>1</sub>; <bold>(b)</bold> base course modulus E<sub>2</sub>; <bold>(c)</bold> subgrade modulus E<sub>3</sub>.</p>
</caption>
<graphic xlink:href="fmats-12-1732297-g005.tif">
<alt-text content-type="machine-generated">Three scatter plots (a, b, c) compare predicted versus actual values of E1, E2, and E3 in MPa for training and testing sets. Each plot includes a diagonal line y &#x3d; x, histograms on the axes, and a legend indicating cyan for training and magenta for testing data. There is a positive correlation in each plot.</alt-text>
</graphic>
</fig>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Model performance evaluation on the test dataset with no measurement error.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Metric</th>
<th align="center">Surface course</th>
<th align="center">Base course</th>
<th align="center">Subgrade</th>
<th align="center">Average</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">MAE (MPa)</td>
<td align="center">1284.68</td>
<td align="center">847.39</td>
<td align="center">0.79</td>
<td align="center">710.95</td>
</tr>
<tr>
<td align="center">MSE (&#xd7;10<sup>3</sup> MPa<sup>2</sup>)</td>
<td align="center">3658995.25</td>
<td align="center">1348095.88</td>
<td align="center">1.20</td>
<td align="center">1669030.78</td>
</tr>
<tr>
<td align="center">RMSE (MPa)</td>
<td align="center">1912.85</td>
<td align="center">1161.08</td>
<td align="center">1.10</td>
<td align="center">1025.01</td>
</tr>
<tr>
<td align="center">MAPE (%)</td>
<td align="center">8.76</td>
<td align="center">7.88</td>
<td align="center">1.14</td>
<td align="center">5.93</td>
</tr>
<tr>
<td align="center">R<sup>2</sup>
</td>
<td align="center">0.95</td>
<td align="center">0.94</td>
<td align="center">1.00</td>
<td align="center">0.96</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Quantitatively, the average MAE reaches 710.95 MPa, and the MAPE remains as low as 5.93%, signifying a high prediction accuracy. The average R<sup>2</sup> of 0.96 further confirms that the model captures over 96% of the variance in the true modulus values.</p>
<p>Among individual layers, the subgrade modulus exhibits a very high statistical correlation with the reference values (R<sup>2</sup> close to 1.00) and relatively small absolute errors (MAE &#x3d; 0.79 MPa), reflecting its dominant influence on the overall deflection basin under the considered parameter ranges and sensor configuration. Conversely, the surface course exhibits slightly larger deviations due to its higher stiffness and greater sensitivity to small perturbations in deflection measurements.</p>
<p>These results highlight the Transformer model&#x2019;s powerful feature extraction ability and its capacity to establish a robust nonlinear mapping between deflection patterns and pavement layer moduli under ideal conditions.</p>
</sec>
<sec id="s4-2">
<label>4.2</label>
<title>Model performance under random error</title>
<p>To emulate random fluctuations in field measurements, Gaussian noise with zero mean and specified variance was introduced into the input data. The corresponding results are illustrated in <xref ref-type="fig" rid="F6">Figure 6</xref>; <xref ref-type="table" rid="T5">Table 5</xref>. Remarkably, even in the presence of random noise, the model maintains a high level of predictive accuracy. The average MAE (674.37 MPa) and RMSE (979.83 MPa) are slightly lower than those in the noise-free case, and R<sup>2</sup> remains above 0.95, suggesting that the model benefits from minor data perturbations, which can enhance generalization by reducing overfitting.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Comparison between predicted and true moduli for all pavement layers with random error: <bold>(a)</bold> surface course modulus E<sub>1</sub>; <bold>(b)</bold> base course modulus E<sub>2</sub>; <bold>(c)</bold> subgrade modulus E<sub>3</sub>.</p>
</caption>
<graphic xlink:href="fmats-12-1732297-g006.tif">
<alt-text content-type="machine-generated">Scatter plots with histograms compare predicted versus actual values for three elastic moduli: E1, E2, and E3 in megapascals. In each plot, cyan dots represent the training set, and magenta dots represent the testing set. The diagonal line denotes y &#x3d; x. Histograms display distributions of actual values above and predicted values to the right of each plot. Panel (a) depicts E1, (b) E2, and (c) E3.</alt-text>
</graphic>
</fig>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Model performance evaluation on the test dataset with random error.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Metric</th>
<th align="center">Surface course</th>
<th align="center">Base course</th>
<th align="center">Subgrade</th>
<th align="center">Average</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">MAE (MPa)</td>
<td align="center">1226.26</td>
<td align="center">796.28</td>
<td align="center">0.56</td>
<td align="center">674.37</td>
</tr>
<tr>
<td align="center">MSE (&#xd7;10<sup>3</sup> MPa<sup>2</sup>)</td>
<td align="center">3345253.50</td>
<td align="center">1231481.75</td>
<td align="center">0.59</td>
<td align="center">1525578.61</td>
</tr>
<tr>
<td align="center">RMSE (MPa)</td>
<td align="center">1829.00</td>
<td align="center">1109.72</td>
<td align="center">0.77</td>
<td align="center">979.83</td>
</tr>
<tr>
<td align="center">MAPE (%)</td>
<td align="center">8.53</td>
<td align="center">8.04</td>
<td align="center">0.86</td>
<td align="center">5.81</td>
</tr>
<tr>
<td align="center">R<sup>2</sup>
</td>
<td align="center">0.95</td>
<td align="center">0.95</td>
<td align="center">1.00</td>
<td align="center">0.97</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The R<sup>2</sup> values remain consistently high (&#x2265;0.95 for all layers), indicating that the random disturbances do not significantly affect the model&#x2019;s regression capability. This robustness can be attributed to the self-attention mechanism in the Transformer architecture, which effectively identifies key spatial dependencies among deflection features and suppresses the influence of random noise.</p>
<p>In particular, the base course achieves an R<sup>2</sup> of 0.95 with MAPE below 8.1%, demonstrating the model&#x2019;s adaptability to intermediate stiffness layers. The scatter distribution in <xref ref-type="fig" rid="F6">Figure 6</xref> remains tightly clustered around the reference line, further confirming the model&#x2019;s insensitivity to random fluctuations.</p>
</sec>
<sec id="s4-3">
<label>4.3</label>
<title>Model performance under systematic error</title>
<p>Systematic errors, such as sensor calibration bias or consistent drift in FWD equipment, were next introduced to evaluate the model&#x2019;s resilience to directional deviations. The outcomes are summarized in <xref ref-type="fig" rid="F7">Figure 7</xref>; <xref ref-type="table" rid="T6">Table 6</xref>. Compared with the previous cases, the performance metrics show a moderate decline. The average MAE increases to 856.30 MPa, RMSE to 1205.76 MPa, and MAPE to 8.19%, while the average R<sup>2</sup> decreases slightly to 0.94.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Comparison between predicted and true moduli for all pavement layers with systematic error: <bold>(a)</bold> surface course modulus E<sub>1</sub>; <bold>(b)</bold> base course modulus E<sub>2</sub>; <bold>(c)</bold> subgrade modulus E<sub>3</sub>.</p>
</caption>
<graphic xlink:href="fmats-12-1732297-g007.tif">
<alt-text content-type="machine-generated">Three scatter plots, each comparing predicted versus actual values of elastic moduli (E1, E2, E3) in MPa. Panel (a) shows data for E1 from 0 to 30,000, (b) for E2 from 4,000 to 20,000, and (c) for E3 from 40 to 100. In each plot, training data is teal, testing data magenta, with a y&#x3d;x dashed line indicating perfect prediction. Vertical and horizontal histograms show data distribution.</alt-text>
</graphic>
</fig>
<table-wrap id="T6" position="float">
<label>TABLE 6</label>
<caption>
<p>Model performance evaluation on the test dataset with systematic error.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Metric</th>
<th align="center">Surface course</th>
<th align="center">Base course</th>
<th align="center">Subgrade</th>
<th align="center">Average</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">MAE (MPa)</td>
<td align="center">1507.41</td>
<td align="center">1058.94</td>
<td align="center">2.55</td>
<td align="center">856.30</td>
</tr>
<tr>
<td align="center">MSE (&#xd7;10<sup>3</sup> MPa<sup>2</sup>)</td>
<td align="center">4768297.50</td>
<td align="center">2046352.00</td>
<td align="center">9.79</td>
<td align="center">2271553.10</td>
</tr>
<tr>
<td align="center">RMSE (MPa)</td>
<td align="center">2183.64</td>
<td align="center">1430.51</td>
<td align="center">3.13</td>
<td align="center">1205.76</td>
</tr>
<tr>
<td align="center">MAPE (%)</td>
<td align="center">9.37</td>
<td align="center">11.43</td>
<td align="center">3.78</td>
<td align="center">8.19</td>
</tr>
<tr>
<td align="center">R<sup>2</sup>
</td>
<td align="center">0.93</td>
<td align="center">0.91</td>
<td align="center">0.97</td>
<td align="center">0.94</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Visual inspection of <xref ref-type="fig" rid="F7">Figure 7</xref> reveals that the predicted moduli tend to deviate systematically from the 1:1 line, producing a slight offset pattern. This shift reflects the influence of persistent bias in the input data, which cannot be fully corrected by the model&#x2019;s internal learning process. The Transformer architecture, while capable of capturing complex nonlinear relationships, inherently inherits a portion of the systematic bias embedded in the training data distribution.</p>
<p>Nevertheless, even under such challenging conditions, the model&#x2019;s prediction accuracy remains acceptable for engineering applications. The R<sup>2</sup> values for all layers remain above 0.90, demonstrating that the model retains substantial predictive capability. These findings suggest that moderate systematic measurement errors do not critically impair the Transformer&#x2019;s inference reliability, making it feasible for use with field FWD data where small calibration biases are common.</p>
</sec>
<sec id="s4-4">
<label>4.4</label>
<title>Model performance under combined random and systematic errors</title>
<p>The most realistic testing condition involves the coexistence of both random and systematic errors. <xref ref-type="fig" rid="F8">Figure 8</xref>; <xref ref-type="table" rid="T7">Table 7</xref> show that under this comprehensive noise environment, the Transformer model continues to perform robustly. The average MAE increases modestly to 732.56 MPa, while the average R<sup>2</sup> remains high at 0.95. The MAPE of 7.48% indicates that overall prediction deviations remain within an acceptable engineering range.</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Comparison between predicted and true moduli for all pavement layers with random and systematic error: <bold>(a)</bold> surface course modulus E<sub>1</sub>; <bold>(b)</bold> base course modulus E<sub>2</sub>; <bold>(c)</bold> subgrade modulus E<sub>3</sub>.</p>
</caption>
<graphic xlink:href="fmats-12-1732297-g008.tif">
<alt-text content-type="machine-generated">Three scatter plots (a, b, c) depict predicted versus actual values of E&#x2081;, E&#x2082;, and E&#x2083; in MPa. Each plot includes data points for training (cyan) and testing sets (pink), with a diagonal y&#x3d;x line indicating perfect prediction. Density plots and histograms along the axes and top right corners provide additional data distribution insights.</alt-text>
</graphic>
</fig>
<table-wrap id="T7" position="float">
<label>TABLE 7</label>
<caption>
<p>Model performance evaluation on the test dataset with random and systematic error.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Metric</th>
<th align="center">Surface course</th>
<th align="center">Base course</th>
<th align="center">Subgrade</th>
<th align="center">Average</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">MAE (MPa)</td>
<td align="center">1311.87</td>
<td align="center">882.85</td>
<td align="center">2.95</td>
<td align="center">732.56</td>
</tr>
<tr>
<td align="center">MSE (&#xd7;10<sup>3</sup> MPa<sup>2</sup>)</td>
<td align="center">3719232.25</td>
<td align="center">1374503.75</td>
<td align="center">13.35</td>
<td align="center">1697916.45</td>
</tr>
<tr>
<td align="center">RMSE (MPa)</td>
<td align="center">1928.53</td>
<td align="center">1172.39</td>
<td align="center">3.65</td>
<td align="center">1034.86</td>
</tr>
<tr>
<td align="center">MAPE (%)</td>
<td align="center">9.09</td>
<td align="center">8.83</td>
<td align="center">4.51</td>
<td align="center">7.48</td>
</tr>
<tr>
<td align="center">R<sup>2</sup>
</td>
<td align="center">0.95</td>
<td align="center">0.94</td>
<td align="center">0.96</td>
<td align="center">0.95</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The subgrade layer once again demonstrates the highest stability, with an R<sup>2</sup> of 0.96, reflecting its lower sensitivity to noise due to smaller deflection amplitude variability. The surface and base layers experience minor performance degradation; however, the overall trend remains consistent, confirming that the Transformer effectively generalizes the underlying input&#x2013;output relationship even when measurement uncertainty increases.</p>
<p>The results collectively demonstrate that the Transformer-based back-calculation model is not only accurate under ideal conditions but also robust and reliable under realistic noise perturbations.</p>
</sec>
<sec id="s4-5">
<label>4.5</label>
<title>Compared with common machine learning models based on random and systematic dataset</title>
<p>For a fair and consistent comparison, all baseline models (BPNN, SVR, and XGBoost) were trained and evaluated under the same experimental conditions as the proposed Transformer model. Specifically, all models used identical input features (nine FWD deflection peaks), output targets (layer elastic moduli), train&#x2013;test split (70%/30%), and data preprocessing procedures, including Z-score normalization. The comparative evaluation was conducted on the same synthetic dataset with combined random and systematic errors, and model performance was assessed on an identical test set using the same evaluation metrics (MAE, MSE, RMSE, MAPE, and R<sup>2</sup>) for the surface course, base course, and subgrade. The hyperparameters of all baseline models were selected using standard tuning strategies within commonly accepted ranges. For the BPNN, the number of hidden neurons and the learning rate were adjusted empirically based on validation performance. For SVR, key hyperparameters including the kernel type, penalty parameter, and kernel width were optimized using grid search. For XGBoost, the tree depth, learning rate, and number of estimators were tuned through empirical validation. Default parameter settings were avoided when they resulted in clear underfitting or overfitting. Consequently, the adopted configurations represent reasonable and competitive baselines rather than minimally tuned models. The corresponding test results are summarized in <xref ref-type="table" rid="T8">Table 8</xref>.</p>
<table-wrap id="T8" position="float">
<label>TABLE 8</label>
<caption>
<p>Test set performance of different models on the random and systematic noisy dataset.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Model</th>
<th align="center">Layer</th>
<th align="center">MAE (MPa)</th>
<th align="center">MSE (&#xd7;10<sup>3</sup> MPa<sup>2</sup>)</th>
<th align="center">RMSE (MPa)</th>
<th align="center">MAPE (%)</th>
<th align="center">R<sup>2</sup>
</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="3" align="center">BPNN</td>
<td align="center">Surface course</td>
<td align="center">1176.47</td>
<td align="center">2343725.47</td>
<td align="center">1530.92</td>
<td align="center">10.70</td>
<td align="center">0.97</td>
</tr>
<tr>
<td align="center">Base course</td>
<td align="center">1073.63</td>
<td align="center">2064374.05</td>
<td align="center">1436.79</td>
<td align="center">10.63</td>
<td align="center">0.91</td>
</tr>
<tr>
<td align="center">Subgrade</td>
<td align="center">2.71</td>
<td align="center">10.72</td>
<td align="center">3.27</td>
<td align="center">4.00</td>
<td align="center">0.97</td>
</tr>
<tr>
<td rowspan="3" align="center">SVR</td>
<td align="center">Surface course</td>
<td align="center">2252.88</td>
<td align="center">9047769.73</td>
<td align="center">3007.95</td>
<td align="center">18.27</td>
<td align="center">0.88</td>
</tr>
<tr>
<td align="center">Base course</td>
<td align="center">1502.35</td>
<td align="center">3944127.77</td>
<td align="center">1985.98</td>
<td align="center">16.38</td>
<td align="center">0.83</td>
</tr>
<tr>
<td align="center">Subgrade</td>
<td align="center">2.61</td>
<td align="center">9.80</td>
<td align="center">3.13</td>
<td align="center">3.84</td>
<td align="center">0.97</td>
</tr>
<tr>
<td rowspan="3" align="center">XGBoost</td>
<td align="center">Surface course</td>
<td align="center">2939.54</td>
<td align="center">14,370,359.16</td>
<td align="center">3790.83</td>
<td align="center">23.51</td>
<td align="center">0.80</td>
</tr>
<tr>
<td align="center">Base course</td>
<td align="center">2537.81</td>
<td align="center">9855760.82</td>
<td align="center">3139.39</td>
<td align="center">28.04</td>
<td align="center">0.56</td>
</tr>
<tr>
<td align="center">Subgrade</td>
<td align="center">2.92</td>
<td align="center">12.95</td>
<td align="center">3.60</td>
<td align="center">4.32</td>
<td align="center">0.96</td>
</tr>
<tr>
<td rowspan="3" align="center">Tansformer</td>
<td align="center">Surface course</td>
<td align="center">1311.87</td>
<td align="center">3719232.25</td>
<td align="center">1928.53</td>
<td align="center">9.09</td>
<td align="center">0.95</td>
</tr>
<tr>
<td align="center">Base course</td>
<td align="center">882.85</td>
<td align="center">1374503.75</td>
<td align="center">1172.39</td>
<td align="center">8.83</td>
<td align="center">0.94</td>
</tr>
<tr>
<td align="center">Subgrade</td>
<td align="center">2.95</td>
<td align="center">13.35</td>
<td align="center">3.65</td>
<td align="center">4.51</td>
<td align="center">0.96</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As shown in <xref ref-type="table" rid="T8">Table 8</xref>, the Transformer model achieves the lowest overall error levels and the most consistent performance across all three layers. In terms of MAPE, the average values across the three layers are approximately 7.48% for the Transformer, compared with 8.44% for BPNN, 12.83% for SVR, and 18.62% for XGBoost. The corresponding average R<sup>2</sup> values are about 0.95 for both the Transformer and BPNN, but decrease to roughly 0.89 and 0.77 for SVR and XGBoost, respectively. These results indicate that although BPNN can reach a comparable average R<sup>2</sup>, it still yields larger errors than the Transformer, whereas SVR and XGBoost suffer from a clear degradation in predictive accuracy under the random and systematic noisy condition.</p>
<p>The advantage of the Transformer is particularly evident for the base course, which is generally the most difficult layer to identify due to its intermediate position and strong interaction with both the surface course and the subgrade. For this layer, the Transformer attains MAE &#x3d; 882.85 MPa, RMSE &#x3d; 1172.39 MPa, MAPE &#x3d; 8.83%, and R<sup>2</sup> &#x3d; 0.94, outperforming BPNN (MAE &#x3d; 1073.63 MPa, MAPE &#x3d; 10.63%, R<sup>2</sup> &#x3d; 0.91) and substantially surpassing SVR and XGBoost (e.g., XGBoost yields MAPE &#x3d; 28.04% and R<sup>2</sup> &#x3d; 0.56). For the surface course and subgrade, the Transformer also provides competitive MAE/RMSE and high R<sup>2</sup> values, remaining at least as accurate as, and in several cases more accurate than, the baseline models.</p>
<p>These quantitative comparisons help clarify why a Transformer is preferred over simpler models in the proposed SEM &#x2b; Transformer framework. The multi-head self-attention mechanism enables the Transformer to explicitly capture global dependencies among all FWD deflection measurements, allowing the network to focus on physically informative deflection patterns and to down-weight noisy or less relevant components. In contrast, BPNN relies on fixed fully connected mappings, SVR depends on pre-defined kernel functions, and XGBoost aggregates a series of decision trees, all of which have more limited capacity to represent the highly nonlinear and ill-posed mapping from surface deflections to multilayer elastic moduli under noisy conditions. Consequently, the Transformer not only achieves lower errors and higher R<sup>2</sup> on the random and systematic noisy dataset, but also exhibits stronger robustness and generalization, especially for the critical base course.</p>
</sec>
<sec id="s4-6">
<label>4.6</label>
<title>Physical plausibility, identifiability, and limitations of unconstrained learning</title>
<p>The above results presented above indicate that the proposed SEM&#x2013;Transformer framework achieves high predictive accuracy and strong robustness across all considered noise scenarios. Beyond numerical accuracy, however, two fundamental issues deserve careful discussion: physical plausibility of the predicted moduli and identifiability of the inverse mapping from FWD deflections to multilayer elastic properties. It should be emphasized that the adopted Transformer configuration is not claimed to be universally optimal. Rather, it represents a compact and effective design choice tailored to the specific characteristics of the FWD-based back-calculation problem considered in this study.</p>
<p>From the perspective of physical plausibility, the predicted moduli in this study remain consistent with expected pavement mechanics within the predefined parameter space. Across all experiments and noise levels, no non-physical outcomes such as negative elastic moduli or severe modulus layering were observed in the reported test cases. In particular, the stiffness ordering between the surface course, base course, and subgrade is generally preserved. This behavior can be attributed primarily to the use of SEM-generated training data, which inherently satisfy mechanical consistency and realistic stiffness hierarchies.</p>
<p>Nevertheless, it is important to emphasize that the Transformer model itself is unconstrained. No explicit monotonicity, ordering, or inequality constraints (e.g., <inline-formula id="inf125">
<mml:math id="m136">
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mtext>surface</mml:mtext>
</mml:msub>
<mml:mo>&#x2265;</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mtext>base</mml:mtext>
</mml:msub>
<mml:mo>&#x2265;</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mtext>subgrade</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) are enforced during training or inference. As a result, the observed physical consistency arises implicitly from the data distribution rather than from hard constraints embedded in the learning model. In principle, when applied outside the training distribution or under substantially different field conditions, the model may produce physically inconsistent modulus combinations, such as a surface-layer modulus lower than that of the subgrade. This limitation is common to most purely data-driven back-calculation approaches and should be carefully considered in practical applications.</p>
<p>The issue of identifiability and uniqueness is intrinsic to FWD-based modulus back-calculation and is independent of the specific learning algorithm employed. The inverse mapping from surface deflection basins to multilayer elastic moduli is inherently ill-posed and non-unique: different combinations of layer properties may yield very similar surface deflection responses, particularly under limited sensor spacing and in the presence of measurement noise. Consequently, even under ideal noise-free conditions, a mathematically unique inverse solution does not generally exist.</p>
<p>In this context, the role of the proposed Transformer model is not to recover a unique physical solution, but rather to learn a statistically optimal inverse mapping conditioned on the assumed parameter ranges, pavement configurations, sensor layout, and noise characteristics represented in the training data. The predicted moduli should therefore be interpreted as the most probable estimates within this constrained statistical space, rather than as exact physical truths. This interpretation is consistent with both classical optimization-based back-calculation methods and recent data-driven approaches reported in the literature.</p>
<p>Finally, it should be recognized that a non-negligible domain shift exists between SEM-generated responses and real-world FWD measurements. Real pavements exhibit temperature-dependent and viscoelastic material behavior, layer heterogeneity, construction-induced variability, and non-ideal load&#x2013;pavement contact conditions, whereas the present SEM model assumes linear elasticity, homogeneity, and axisymmetry. Field FWD data are also affected by sensor coupling effects and spatially correlated measurement errors that are difficult to reproduce numerically. While the noise models adopted in this study provide a first-order approximation of measurement uncertainty, they do not fully capture these complexities. Addressing both physical constraint enforcement and the simulation-to-field domain gap will be essential steps toward reliable deployment of the proposed framework in real-world pavement evaluation and digital twin&#x2013;based management systems.</p>
<p>From the perspective of inverse problem theory, pavement modulus back-calculation based on FWD deflections is a fundamentally ill-posed problem. The surface deflection basin represents an aggregated structural response, and different combinations of layer moduli may produce very similar deflection profiles, particularly when sensor spacing is limited and measurement noise is present. The proposed Transformer model does not eliminate this ill-posedness, but rather provides a data-driven regularization by learning the most statistically probable inverse mapping under the assumed parameter ranges and noise conditions.</p>
<p>It should also be noted that different pavement layers exhibit markedly different sensitivities in FWD measurements. The subgrade modulus predominantly controls the overall curvature and far-field deflections of the basin, while the surface and base layers mainly affect near-load deflections. As a result, the inverse mapping is inherently more sensitive to subgrade stiffness variations than to variations in upper-layer moduli. This sensitivity imbalance explains the near-perfect R<sup>2</sup> values observed for the subgrade in the present study. Such high R<sup>2</sup> values reflect dominant sensitivity rather than guaranteed identifiability or uniqueness of the subgrade modulus.</p>
<p>The current study does not explicitly assess the model&#x2019;s ability to distinguish between different modulus combinations that generate nearly indistinguishable deflection basins. A rigorous identifiability or sensitivity analysis&#x2014;such as controlled perturbation studies or equivalence-class analysis&#x2014;would be required to quantify this capability and is left for future work. The goal of this study is not to exhaustively benchmark all possible architectures, but to demonstrate feasibility and robustness of a Transformer-based inverse framework. The comparative analysis is intended to provide contextual performance references under consistent experimental conditions, rather than a statistically exhaustive or uncertainty-aware benchmark across all model families.</p>
</sec>
<sec id="s4-7">
<label>4.7</label>
<title>Overall summary</title>
<p>Across all four scenarios, the Transformer-based intelligent back-calculation framework demonstrates high accuracy, stability, and adaptability. The model achieves an average R<sup>2</sup> exceeding 0.94 under all noise conditions, confirming its robustness against both random and systematic measurement errors. The average MAPE values remain below 8%, well within acceptable limits for pavement engineering applications.</p>
<p>These findings verify that the Transformer model effectively learns the intrinsic relationships between FWD deflection responses and layer elastic moduli, even in the presence of complex noise patterns. Consequently, this method provides a reliable and data-driven solution for practical modulus back-calculation tasks, offering improved accuracy and interpretability compared with traditional approaches.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s5">
<label>5</label>
<title>Conclusion</title>
<p>This study developed an intelligent back-calculation framework integrating the SEM and a Transformer-based deep learning model to estimate multilayer pavement elastic moduli from FWD deflection data. Based on the numerical simulations, data preprocessing, and performance evaluations under four noise conditions, the main findings are summarized as follows:</p>
<sec id="s5-1">
<label>5.1</label>
<title>High prediction accuracy and robustness</title>
<p>The Transformer-based model achieved excellent predictive performance, with average <inline-formula id="inf126">
<mml:math id="m137">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="normal">R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> exceeding 0.94 and MAPE below 8% across all scenarios. Even under combined random and systematic noise, the model maintained stable accuracy, demonstrating strong generalization and robustness to measurement uncertainty.</p>
</sec>
<sec id="s5-2">
<label>5.2</label>
<title>Superior feature learning and physical consistency</title>
<p>By leveraging multi-head self-attention, the Transformer effectively captured global dependencies among deflection sensors, enabling precise mapping between surface deflection patterns and underlying layer moduli. The predicted trends were physically consistent with pavement structural behavior&#x2014;subgrade moduli showed the highest stability due to smoother deformation responses, while the surface layer exhibited slightly higher variability owing to its stiffness contrast.</p>
</sec>
<sec id="s5-3">
<label>5.3</label>
<title>Efficiency and applicability</title>
<p>Once trained, the proposed model provided rapid, millisecond-level predictions, offering a computationally efficient and fully data-driven solution for modulus inversion. Its end-to-end design minimizes manual parameter tuning and avoids convergence issues common in traditional iterative back-calculation, supporting integration into real-time pavement condition evaluation and intelligent maintenance systems.</p>
<p>Overall, the developed SEM&#x2013;Transformer framework demonstrates strong potential for intelligent, accurate, and efficient pavement structural evaluation, and provides a promising basis for data-driven digital twin systems in pavement management. However, the present study also has clear limitations. All training and testing data are synthetically generated using an SEM model, and no field FWD dataset is used for direct validation. As a result, the current findings should be interpreted as a numerical benchmark demonstrating feasibility and robustness, rather than as evidence of immediate field applicability. Future work will focus on validating the proposed framework using large-scale field FWD datasets, incorporating temperature-dependent and viscoelastic material behavior, and developing physics-guided or domain-adaptive learning strategies to mitigate the simulation-to-field gap. These efforts are essential before the proposed method can be reliably deployed in real-world pavement evaluation and digital twin&#x2013;based infrastructure management systems. Future work will also explicitly address physical constraint enforcement by incorporating monotonicity or inequality constraints into the learning process, for example, through output reparameterization, physics-guided loss functions, or hybrid inversion frameworks. Such extensions are expected to further improve physical interpretability, reduce the risk of inconsistent predictions, and enhance robustness when applying the model to real-world FWD datasets. It should be emphasized that the reported prediction accuracy does not imply mathematical uniqueness of the inverse solution. The proposed framework provides statistically optimal estimates conditioned on the assumed data distribution, rather than resolving the intrinsic non-uniqueness of pavement modulus back-calculation. Future work will incorporate uncertainty quantification and statistical testing, such as repeated sampling, confidence interval estimation, or Bayesian approaches, to further strengthen the rigor of comparative performance evaluation.</p>
</sec>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>GW: Investigation, Methodology, Software, Writing &#x2013; original draft, Data curation. YZ: Conceptualization, Funding acquisition, Supervision, Writing &#x2013; review and editing.</p>
</sec>
<ack>
<title>Acknowledgements</title>
<p>The authors gratefully acknowledged their financial support.</p>
</ack>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>Author GW was employed by Shanxi Provincial Transportation Construction Engineering Quality Inspection Center (Co., Ltd.).</p>
<p>The remaining author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<fn-group>
<fn fn-type="custom" custom-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2274698/overview">Alireza Tabarraei</ext-link>, University of North Carolina at Charlotte, United States</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3263819/overview">Zeping Yang</ext-link>, Griffith University, Australia</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3302053/overview">Yubao Zhou</ext-link>, Delft University of Technology, Netherlands</p>
</fn>
</fn-group>
<fn-group>
<fn fn-type="abbr" id="abbrev1">
<label>Abbreviations:</label>
<p>
<inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mi mathvariant="bold-italic">u</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, Displacement vector SI unit: m; <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:mover accent="true">
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mo>&#xa8;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> Acceleration vector; <inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, Lame&#x2019;s constant of the material; <inline-formula id="inf5">
<mml:math id="m5">
<mml:mrow>
<mml:mo>&#x2207;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, Gradient differential operator; <inline-formula id="inf6">
<mml:math id="m6">
<mml:mrow>
<mml:mo>&#x2207;</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mi mathvariant="bold-italic">u</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, Divergence of <inline-formula id="inf7">
<mml:math id="m7">
<mml:mrow>
<mml:mi mathvariant="bold-italic">u</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula id="inf8">
<mml:math id="m8">
<mml:mrow>
<mml:msup>
<mml:mo>&#x2207;</mml:mo>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mi mathvariant="bold-italic">u</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, Laplacian of <inline-formula id="inf9">
<mml:math id="m9">
<mml:mrow>
<mml:mi mathvariant="bold-italic">u</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula id="inf10">
<mml:math id="m10">
<mml:mrow>
<mml:mi>&#x3c1;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, Material density SI unit:kg/m<sup>3</sup>; <inline-formula id="inf11">
<mml:math id="m11">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, Dimensionality of the key vectors in the attention mechanism; <inline-formula id="inf12">
<mml:math id="m12">
<mml:mrow>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, True simulated deflection value at the <inline-formula id="inf13">
<mml:math id="m13">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th measurement point SI unit: &#x3bc;m; <inline-formula id="inf14">
<mml:math id="m14">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3b5;</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, Systematic error component for the <inline-formula id="inf15">
<mml:math id="m15">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th measurement; <inline-formula id="inf16">
<mml:math id="m16">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3b5;</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>r</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, Random error component for the <inline-formula id="inf17">
<mml:math id="m17">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th measurement; <inline-formula id="inf18">
<mml:math id="m18">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, Actual (observed) value of the target variable SI unit: MPa; <inline-formula id="inf19">
<mml:math id="m19">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, Predicted value obtained from the model SI unit: MPa; <inline-formula id="inf20">
<mml:math id="m20">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mi>&#x2c9;</mml:mi>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula>, Mean value of the actual target variable SI unit: MPa; <inline-formula id="inf21">
<mml:math id="m21">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, Number of samples; <inline-formula id="inf22">
<mml:math id="m22">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, Original value of the <inline-formula id="inf23">
<mml:math id="m23">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
<sub>th</sub> input feature for the <inline-formula id="inf24">
<mml:math id="m24">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
<sub>th</sub> sample (e.g., peak deflection) SI unit: &#x3bc;m; <inline-formula id="inf25">
<mml:math id="m25">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, Original target value of the <inline-formula id="inf26">
<mml:math id="m26">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
<sub>th</sub> output variable for the <inline-formula id="inf27">
<mml:math id="m27">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
<sub>th</sub> sample (elastic modulus) SI unit: MPa; <inline-formula id="inf28">
<mml:math id="m28">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, Mean of the <inline-formula id="inf29">
<mml:math id="m29">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
<sub>th</sub> input feature in the training set; <inline-formula id="inf30">
<mml:math id="m30">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, Standard deviation of the <inline-formula id="inf31">
<mml:math id="m31">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
<sub>th</sub> input feature in the training set; <inline-formula id="inf32">
<mml:math id="m32">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>j</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, Mean of the <inline-formula id="inf33">
<mml:math id="m33">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
<sub>th</sub> output variable (modulus) in the training set SI unit: MPa; <inline-formula id="inf34">
<mml:math id="m34">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>j</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, Standard deviation of the <inline-formula id="inf35">
<mml:math id="m35">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
<sub>th</sub> output variable (modulus) in the training set SI unit: MPa; <inline-formula id="inf36">
<mml:math id="m36">
<mml:mrow>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, Standardized (dimensionless) value of input feature <inline-formula id="inf37">
<mml:math id="m37">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> after Z-score normalization; <inline-formula id="inf38">
<mml:math id="m38">
<mml:mrow>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, Standardized (dimensionless) value of output variable <inline-formula id="inf39">
<mml:math id="m39">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula id="inf40">
<mml:math id="m40">
<mml:mrow>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, Predicted output in the standardized space; <inline-formula id="inf41">
<mml:math id="m41">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, Predicted output after inverse standardization (restored to physical units) SI unit: MPa.</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Al-Khoury</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Kasbergen</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Scarpas</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Blaauwendraad</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Spectral element technique for efficient parameter identification of layered media: part II: inverse calculation</article-title>. <source>Int. J. Solids Struct.</source> <volume>38</volume> (<issue>48</issue>), <fpage>8753</fpage>&#x2013;<lpage>8772</lpage>. <pub-id pub-id-type="doi">10.1016/S0020-7683(01)00109-3</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Al-Khoury</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Scarpas</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kasbergen</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Blaauwendraad</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Spectral element technique for efficient parameter identification of layered media. Part III: viscoelastic aspects</article-title>. <source>Int. J. Solids Struct.</source> <volume>39</volume> (<issue>8</issue>), <fpage>2189</fpage>&#x2013;<lpage>2201</lpage>. <pub-id pub-id-type="doi">10.1016/S0020-7683(02)00079-3</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bush</surname>
<given-names>A. J.</given-names>
</name>
</person-group> (<year>1985</year>). <article-title>
<italic>Computer program BISDEF</italic>. Vicksburg. Miss: US army corps of engineer waterways experiment station</article-title>.</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Bush</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Alexander</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>1985</year>). <source>Pavement evaluation using deflection basin measurements and layered theory</source>, <volume>1022</volume>, <fpage>16</fpage>&#x2013;<lpage>29</lpage>.</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bypour</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mahmoudian</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Yekrangnia</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kioumarsi</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Explainable tuned machine learning models for assessing the impact of corrosion on bond strength in concrete</article-title>. <source>Clean. Eng. Technol.</source> <volume>23</volume>, <fpage>100834</fpage>. <pub-id pub-id-type="doi">10.1016/j.clet.2024.100834</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cao</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Effectiveness of static and dynamic backcalculation approaches for asphalt pavement</article-title>. <source>Can. J. Civ. Eng.</source> <volume>47</volume> (<issue>7</issue>), <fpage>846</fpage>&#x2013;<lpage>855</lpage>. <pub-id pub-id-type="doi">10.1139/cjce-2019-0052</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ji</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Data-driven atmospheric corrosion prediction model for alloys based on a two-stage machine learning approach</article-title>. <source>Process Saf. Environ. Prot.</source> <volume>188</volume>, <fpage>1093</fpage>&#x2013;<lpage>1105</lpage>. <pub-id pub-id-type="doi">10.1016/j.psep.2024.06.028</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Abdel-Aty</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>A novel CPO-CNN-LSTM based deep learning approach for multi-time scale deflection basin area prediction in asphalt pavement</article-title>. <source>Constr. Build. Mater.</source> <volume>458</volume>, <fpage>139540</fpage>. <pub-id pub-id-type="doi">10.1016/j.conbuildmat.2024.139540</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Coletti</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Romeo</surname>
<given-names>R. C.</given-names>
</name>
<name>
<surname>Davis</surname>
<given-names>R. B.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Bayesian backcalculation of pavement properties using parallel transitional markov chain monte carlo</article-title>. <source>Comput.-Aided Civ. Infrastruct. Eng.</source> <volume>39</volume> (<issue>13</issue>), <fpage>1911</fpage>&#x2013;<lpage>1927</lpage>. <pub-id pub-id-type="doi">10.1111/mice.13123</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dosovitskiy</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Beyer</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Kolesnikov</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Weissenborn</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Zhai</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Unterthiner</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>An image is worth 16x16 words: transformers for image recognition at scale</article-title>.</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Draper</surname>
<given-names>N. R.</given-names>
</name>
<name>
<surname>Smith</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>1998</year>). <article-title>Applied regression analysis</article-title>.</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Elbagalati</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Elseifi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Gaspard</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Development of the pavement structural health index based on falling weight deflectometer testing</article-title>. <source>Int. J. Pavement Eng.</source> <volume>19</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1080/10298436.2016.1149838</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Golmohammadi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Hernando</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Van den Bergh</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Hasheminejad</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Advanced data-driven FBG sensor-based pavement monitoring system using multi-sensor data fusion and an unsupervised learning approach</article-title>. <source>Measurement</source> <volume>242</volume>, <fpage>115821</fpage>. <pub-id pub-id-type="doi">10.1016/j.measurement.2024.115821</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Goodfellow</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Bengio</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Courville</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2016</year>). <source>Deep learning</source>. <publisher-name>The MIT Press</publisher-name>.</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Ioannides</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Barenberg</surname>
<given-names>E. J.</given-names>
</name>
<name>
<surname>Lary</surname>
<given-names>J. A.</given-names>
</name>
</person-group> (<year>1989</year>). &#x201c;<article-title>Interpretation of falling weight deflectometer results using principles of dimensional analysis</article-title>,&#x201d; in <source>Paper presented at the the 4th international conference on concrete pavement design and rehabilitation: proceedings, west Lafayettefrom</source>.</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Irwin</surname>
<given-names>L. H.</given-names>
</name>
</person-group> (<year>1994</year>). <source>Instructional guide for back-calculation and the use of MODCOMP3 version 3.6</source>. <publisher-name>Ithaca, NY: Cornell University Local Roads Program, CLRP Publications</publisher-name>, <fpage>4</fpage>&#x2013;<lpage>10</lpage>.</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Irwin</surname>
<given-names>L. H.</given-names>
</name>
<name>
<surname>Szebenyi</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>1983</year>). <source>User&#x27;s guide to modcomp2</source>. <publisher-loc>Ithaca, NY</publisher-loc>: <publisher-name>Cornell University Local Roads Program</publisher-name>, <fpage>83</fpage>&#x2013;<lpage>88</lpage>.</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Gabrielson</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Bai</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Polaczyk</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Evaluation of inverted pavement by structural condition indicators from falling weight deflectometer</article-title>. <source>Constr. Build. Mater.</source> <volume>319</volume>, <fpage>125991</fpage>. <pub-id pub-id-type="doi">10.1016/j.conbuildmat.2021.125991</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khazanovich</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Roesler</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>1997</year>). <article-title>DIPLOBACK: neural-network-based backcalculation program for composite pavements</article-title>. <source>Transp. Res. Rec.</source> <volume>1570</volume> (<issue>1</issue>), <fpage>143</fpage>&#x2013;<lpage>150</lpage>. <pub-id pub-id-type="doi">10.3141/1570-17</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Physics-informed neural network with fuzzy partial differential equation for pavement performance prediction</article-title>. <source>Autom. Constr.</source> <volume>171</volume>, <fpage>105983</fpage>. <pub-id pub-id-type="doi">10.1016/j.autcon.2025.105983</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>D&#x27;Avigneau</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Brilakis</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Modeling heterogeneous spatiotemporal pavement data for condition prediction and preventive maintenance in digital twin-enabled highway management</article-title>. <source>Autom. Constr.</source> <volume>174</volume>, <fpage>106134</fpage>. <pub-id pub-id-type="doi">10.1016/j.autcon.2025.106134</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meier</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Alexander</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Freeman</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>1997</year>). <article-title>Using artificial neural networks as a forward approach to backcalculation</article-title>. <source>Transp. Res. Rec.</source> <volume>1570</volume>, <fpage>126</fpage>&#x2013;<lpage>133</lpage>. <pub-id pub-id-type="doi">10.3141/1570-15</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nam</surname>
<given-names>B. H.</given-names>
</name>
<name>
<surname>An</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Murphy</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Improvements to the structural condition index (SCI) for pavement structural evaluation at network level</article-title>. <source>Int. J. Pavement Eng.</source> <volume>17</volume> (<issue>8</issue>), <fpage>680</fpage>&#x2013;<lpage>697</lpage>. <pub-id pub-id-type="doi">10.1080/10298436.2015.1014369</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pan</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Hou</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Damage pattern recognition for corroded beams strengthened by CFRP anchorage system based on acoustic emission techniques</article-title>. <source>Constr. Build. Mater.</source> <volume>406</volume>, <fpage>133474</fpage>. <pub-id pub-id-type="doi">10.1016/j.conbuildmat.2023.133474</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Plati</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Georgiou</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Papavasiliou</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Simulating pavement structural condition using artificial neural networks</article-title>. <source>Struct. Infrastruct. Eng.</source> <volume>12</volume> (<issue>9</issue>), <fpage>1127</fpage>&#x2013;<lpage>1136</lpage>. <pub-id pub-id-type="doi">10.1080/15732479.2015.1086384</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Plati</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Gkyrtis</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Loizos</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>A practice-based approach to diagnose pavement roughness problems</article-title>. <source>Int. J. Civ. Eng.</source> <volume>22</volume> (<issue>3</issue>), <fpage>453</fpage>&#x2013;<lpage>465</lpage>. <pub-id pub-id-type="doi">10.1007/s40999-023-00900-x</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Scullion</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Uzan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Paredes</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>1990</year>). <article-title>MODULUS: a microcomputer-based backcalculation system</article-title>. <source>Transp. Res. Rec.</source> <volume>1260</volume>, <fpage>180</fpage>&#x2013;<lpage>191</lpage>.</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sharma</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Das</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Backcalculation of pavement layer moduli from falling weight deflectometer data using an artificial neural network</article-title>. <source>Can. J. Civ. Eng.</source> <volume>35</volume> (<issue>1</issue>), <fpage>57</fpage>&#x2013;<lpage>66</lpage>. <pub-id pub-id-type="doi">10.1139/l07-083</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shamiyeh</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Gunduz</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Shamiyeh</surname>
<given-names>M. E.</given-names>
</name>
</person-group>(<year>2022</year>). <article-title>Assessment of pavement performance management indicators through analytic network process</article-title>. <source>IEEE Trans. Eng. Manage.</source> <volume>69</volume>(<issue>6</issue>), <fpage>2684</fpage>&#x2013;<lpage>2692</lpage>. <pub-id pub-id-type="doi">10.1109/TEM.2019.2952153</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stubstad</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Irwin</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Lukanen</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Clevenson</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>It&#x27;s 10 o&#x27;clock: do you know where your sensors are?</article-title> <source>Transp. Res. Rec.</source> <volume>1716</volume>, <fpage>10</fpage>&#x2013;<lpage>19</lpage>. <pub-id pub-id-type="doi">10.3141/1716-02</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tarefder</surname>
<given-names>R. A.</given-names>
</name>
<name>
<surname>Ahsan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ahmed</surname>
<given-names>M. U.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Neural network&#x2013;based thickness determination model to improve backcalculation of layer moduli without coring</article-title>. <source>Int. J. Geomech.</source> <volume>15</volume> (<issue>3</issue>), <fpage>4014058</fpage>. <pub-id pub-id-type="doi">10.1061/(asce)gm.1943-5622.0000407</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Torquato E Silva</surname>
<given-names>S. D. A.</given-names>
</name>
<name>
<surname>Oliveira</surname>
<given-names>J. L. F. D.</given-names>
</name>
<name>
<surname>Furtado</surname>
<given-names>L. B. G.</given-names>
</name>
<name>
<surname>Babadopulos</surname>
<given-names>L. F. A. L.</given-names>
</name>
<name>
<surname>Parente Junior</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Batista Dos Santos</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Effect of the input of structural parameters&#x2019; uncertainties and analysts&#x2019; arbitrary decisions on the results of backcalculated pavement materials&#x2019; resilient moduli</article-title>. <source>Can. J. Civ. Eng.</source> <volume>52</volume> (<issue>9</issue>), <fpage>1743</fpage>&#x2013;<lpage>1751</lpage>. <pub-id pub-id-type="doi">10.1139/cjce-2024-0256</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Ullidtz</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>1998</year>). <source>Modelling flexible pavement response and performance</source>. <publisher-loc>Lyngby</publisher-loc>: <publisher-name>Polyteknisk Forlag</publisher-name>.</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Vaswani</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Shazeer</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Parmar</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Uszkoreit</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jones</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Gomez</surname>
<given-names>A. N.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <source>Paper presented at the proceedings of the 31st international conference on neural information processing systems</source>. <publisher-loc>Long Beach, California, USA</publisher-loc>.</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Predicting bedrock depth under asphalt pavement through a data-driven method based on particle swarm optimization-back propagation neural network</article-title>. <source>Constr. Build. Mater.</source> <volume>354</volume>, <fpage>129165</fpage>. <pub-id pub-id-type="doi">10.1016/j.conbuildmat.2022.129165</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Influence of bedrock on viscoelastic responses and parametric back-calculation results for asphalt pavements and prediction of bedrock depth under FWD tests</article-title>. <source>Constr. Build. Mater.</source> <volume>377</volume>, <fpage>131158</fpage>. <pub-id pub-id-type="doi">10.1016/j.conbuildmat.2023.131158</pub-id>
</mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Intelligent back-calculation approach to obtain viscoelastic properties of asphalt pavements on bedrock using falling weight deflectometer tests</article-title>. <source>Transp. Res. Rec.</source> <volume>2679</volume> (<issue>4</issue>), <fpage>431</fpage>&#x2013;<lpage>447</lpage>. <pub-id pub-id-type="doi">10.1177/03611981241292582</pub-id>
</mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wudil</surname>
<given-names>Y. S.</given-names>
</name>
<name>
<surname>Shalabi</surname>
<given-names>A. F.</given-names>
</name>
<name>
<surname>Al-Osta</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Gondal</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Al-Nahari</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Effective corrosion detection in reinforced concrete <italic>via</italic> laser-induced breakdown spectroscopy and machine learning</article-title>. <source>Mater. Today Commun.</source> <volume>41</volume>, <fpage>111005</fpage>. <pub-id pub-id-type="doi">10.1016/j.mtcomm.2024.111005</pub-id>
</mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Cui</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Integrating FWD test and laboratory observation for assessing the damage state of semi-rigid base in asphalt pavement</article-title>. <source>Constr. Build. Mater.</source> <volume>496</volume>, <fpage>143769</fpage>. <pub-id pub-id-type="doi">10.1016/j.conbuildmat.2025.143769</pub-id>
</mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Khan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Huyan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Predicting marshall parameters of flexible pavement using support vector machine and genetic programming</article-title>. <source>Constr. Build. Mater.</source> <volume>306</volume>, <fpage>124924</fpage>. <pub-id pub-id-type="doi">10.1016/j.conbuildmat.2021.124924</pub-id>
</mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Dynamic backcalculation of asphalt pavement layer properties using spectral element method</article-title>. <source>Road. Mater. Pavement Des.</source> <volume>16</volume> (<issue>4</issue>), <fpage>870</fpage>&#x2013;<lpage>888</lpage>. <pub-id pub-id-type="doi">10.1080/14680629.2015.1056214</pub-id>
</mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Localized corrosion induced damage monitoring of large-scale RC piles using acoustic emission technique in the marine environment</article-title>. <source>Constr. Build. Mater.</source> <volume>243</volume>, <fpage>118270</fpage>. <pub-id pub-id-type="doi">10.1016/j.conbuildmat.2020.118270</pub-id>
</mixed-citation>
</ref>
<ref id="B45">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A hybrid methodology for structural damage detection uniting FEM and 1d-CNNs: demonstration on typical high-pile wharf</article-title>. <source>Mech. Syst. Signal Proc.</source> <volume>168</volume>, <fpage>108738</fpage>. <pub-id pub-id-type="doi">10.1016/j.ymssp.2021.108738</pub-id>
</mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Aydin</surname>
<given-names>B. B.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Hendriks</surname>
<given-names>M. A. N.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2024a</year>). <article-title>A lattice modelling framework for fracture-induced acoustic emission wave propagation in concrete</article-title>. <source>Eng. Fract. Mech.</source> <volume>312</volume>, <fpage>110589</fpage>. <pub-id pub-id-type="doi">10.1016/j.engfracmech.2024.110589</pub-id>
</mixed-citation>
</ref>
<ref id="B46">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Yue</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2024b</year>). <article-title>Deep residual learning for acoustic emission source localization in a steel-concrete composite slab</article-title>. <source>Constr. Build. Mater.</source> <volume>411</volume>, <fpage>134220</fpage>. <pub-id pub-id-type="doi">10.1016/j.conbuildmat.2023.134220</pub-id>
</mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Aydin</surname>
<given-names>B. B.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Hendriks</surname>
<given-names>M. A. N.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2025a</year>). <article-title>Lattice modelling of complete acoustic emission waveforms in the concrete fracture process</article-title>. <source>Eng. Fract. Mech.</source> <volume>320</volume>, <fpage>111040</fpage>. <pub-id pub-id-type="doi">10.1016/j.engfracmech.2025.111040</pub-id>
</mixed-citation>
</ref>
<ref id="B47">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lian</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2025b</year>). <article-title>Ambient vibration measurement-aided multi-1d CNNs ensemble for damage localization framework: demonstration on a large-scale RC pedestrian bridge</article-title>. <source>Mech. Syst. Signal Proc.</source> <volume>224</volume>, <fpage>111937</fpage>. <pub-id pub-id-type="doi">10.1016/j.ymssp.2024.111937</pub-id>
</mixed-citation>
</ref>
</ref-list>
</back>
</article>