<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Artif. Intell.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Artificial Intelligence</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Artif. Intell.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2624-8212</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frai.2026.1741082</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Enhancing crack detection and severity assessment in historical Tabiya basins using U-Net and adaptive thresholding</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Matich</surname>
<given-names>Hafsa</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3221200"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Attmani</surname>
<given-names>Jamal</given-names>
</name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Mousannif</surname>
<given-names>Hajar</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>LISI Laboratory, Faculty of Sciences Semlalia, Cadi Ayyad University</institution>, <city>Marrakesh</city>, <country country="ma">Morocco</country></aff>
<aff id="aff2"><label>2</label><institution>National School of Applied Sciences of Marrakech, Cadi Ayyad University</institution>, <city>Marrakesh</city>, <country country="ma">Morocco</country></aff>
<author-notes>
<corresp id="c001"><label>&#x002A;</label>Correspondence: Hafsa Matich, <email xlink:href="mailto:h.matich.ced@uca.ac.ma">h.matich.ced@uca.ac.ma</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-27">
<day>27</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>9</volume>
<elocation-id>1741082</elocation-id>
<history>
<date date-type="received">
<day>06</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>10</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>16</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2026 Matich, Attmani and Mousannif.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Matich, Attmani and Mousannif</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-27">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>The conservation of the historical Tabiya water basins remains paramount, with consideration being their cultural and architectural importance, though structural degeneration like surface cracking poses a formidable challenge to conservation work. Since the traditional methods of inspection are often subjective, tedious, and prone to error, these limitations are tackled in this study by means of presenting an automated system for surface crack detection and segmentation based on artificial intelligence and computer vision techniques. High-resolution images were captured on-site using a Canon EOS 1100D camera and analyzed within a comparative deep learning framework using four models, namely U-Net with MobileNetV2, ResNet-50, InceptionV3, and EfficientNetB7 backbones. The proposed system performs crack detection and segmentation, as well as quantitative measurements, including crack length, width, and severity assessment through skeletonization, a crack length estimation algorithm, and a crack width extraction method. Experimental results indicated that the MobileNetV2-based model outperformed all other tested architectures, with an accuracy of 98.7%, a recall of 98.2%, a precision of 99.1%, and an F1-score of 98.6%. Furthermore, the developed framework has also been deployed as a web application that allows users to upload or drag and drop images and select from four available models for automated analysis. This integrated system represents a strong, precise, and user-friendly tool for the digital preservation and structural monitoring of heritage water infrastructure.</p>
</abstract>
<kwd-group>
<kwd>crack detection</kwd>
<kwd>deep learning</kwd>
<kwd>fully convolutional network</kwd>
<kwd>heritage building</kwd>
<kwd>semantic segmentation</kwd>
<kwd>U-Net adaptive thresholding</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement>
</funding-group>
<counts>
<fig-count count="17"/>
<table-count count="5"/>
<equation-count count="16"/>
<ref-count count="52"/>
<page-count count="19"/>
<word-count count="9267"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Machine Learning and Artificial Intelligence</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec1">
<label>1</label>
<title>Introduction</title>
<p>Heritage hydraulic structures are enormously significant cultural properties, yet their architectural properties suffer from material degradation with time. Surface cracking is one of the most frequently occurring and serious modes of distress in a structure. Cracks can form due to prolonged weathering, aging of the material, differential settlement, thermal variations, fluctuations in moisture, or seismic activity, and might be an early warning sign for progressive loss of structural integrity both in historic and modern constructions (<xref ref-type="bibr" rid="ref41">Safiuddin et al., 2018</xref>; <xref ref-type="bibr" rid="ref35">Qiu and Lau, 2023</xref>). In earthen and masonry heritage structures, even minor cracks can significantly accelerate deterioration by facilitating water ingress, promoting material disintegration, and ultimately compromising structural safety and long-term preservation (<xref ref-type="bibr" rid="ref25">Iraniparast et al., 2023</xref>; <xref ref-type="bibr" rid="ref26">Islam and Kim, 2019</xref>).</p>
<p>Crack assessment has traditionally relied on manual-based visual inspection, with rulers, crack gauges, or microscopes used to measure crack width, length, and evolution over a time duration (<xref ref-type="bibr" rid="ref48">Yang et al., 2022</xref>; <xref ref-type="bibr" rid="ref47">Yalew and Kim, 2023</xref>). Although widely practiced, such approaches are labor-intensive, time-consuming, and inherently subjective, particularly when applied to large-scale, irregular, or textured surfaces commonly encountered at heritage sites. Furthermore, variations in lighting conditions, surface complexity, accessibility constraints, and inspector expertise limit the repeatability, scalability, and objectivity of conventional inspection workflows (<xref ref-type="bibr" rid="ref2">Ahila Priyadharshini et al., 2023</xref>; <xref ref-type="bibr" rid="ref18">Fan et al., 2023</xref>; <xref ref-type="bibr" rid="ref3">Al Biajawi et al., 2023</xref>; <xref ref-type="bibr" rid="ref5">Alsharqawi et al., 2022</xref>; <xref ref-type="bibr" rid="ref10">Candelaria and Kee, 2023</xref>; <xref ref-type="bibr" rid="ref14">Cheng, 1997</xref>; <xref ref-type="bibr" rid="ref12">Chang et al., 2019</xref>; <xref ref-type="bibr" rid="ref16">Ding et al., 2023</xref>; <xref ref-type="bibr" rid="ref37">Rahmati et al., 2023</xref>; <xref ref-type="bibr" rid="ref39">Rasol et al., 2020</xref>). These limitations have driven growing interest in automated and data-driven alternatives for continuous and reliable structural monitoring.</p>
<p>Recent advances in computer vision and artificial intelligence have significantly transformed automated structural inspection. Image-based sensing combined with intelligent algorithms has enabled objective, repeatable, and data-driven condition assessments across a wide range of civil infrastructure applications (<xref ref-type="bibr" rid="ref29">Kontoni et al., 2023</xref>; <xref ref-type="bibr" rid="ref28">Koch et al., 2015</xref>; <xref ref-type="bibr" rid="ref13">Chen et al., 2023</xref>). Within the broader field of structural health monitoring, AI-driven systems increasingly support resilient infrastructure management, smart city initiatives, and sustainable maintenance strategies (<xref ref-type="bibr" rid="ref38">Raj et al., 2024</xref>; <xref ref-type="bibr" rid="ref27">Kapoor et al., 2024</xref>). Comprehensive reviews consistently highlight the expanding role of deep learning techniques for detecting and characterizing damage in concrete and masonry structures (<xref ref-type="bibr" rid="ref23">Hamidi et al., 2025</xref>).</p>
<p>Within this context, convolutional neural networks (CNNs) have demonstrated notable success in crack detection. Earlier CNN-oriented works concentrated mostly on picture-level classification, indicating whether cracks were present or not but providing no spatial localization of the cracks (<xref ref-type="bibr" rid="ref17">Elhariri et al., 2022</xref>). While useful for coarse screening, such approaches are insufficient for structural diagnosis, where accurate quantification of crack geometry and severity is essential. To overcome this limitation, semantic segmentation architectures&#x2014;most notably U-Net&#x2014;have gained prominence due to their encoder&#x2013;decoder design and skip connections, which enable precise pixel-wise delineation of crack patterns (<xref ref-type="bibr" rid="ref22">Hac&#x0131;efendio&#x011F;lu et al., 2023</xref>).</p>
<p>It has been shown in recent advances that architectural innovations can considerably enhance the segmentation performance. Residual learning and feature fusion strategies in RS-Net have improved crack boundary clarity and severity estimation in pavement applications (<xref ref-type="bibr" rid="ref4">Ali et al., 2024</xref>), while multiscale contextual integration in MSMC-U-Net has improved robustness under complex backgrounds and variable crack morphologies (<xref ref-type="bibr" rid="ref32">Pervaiz et al., 2025</xref>). Comparative studies consistently show that U-Net-based architectures offer strong segmentation accuracy, but that performance is highly dependent on the choice of backbone network and the trade-off between accuracy and computational cost (<xref ref-type="bibr" rid="ref19">Garg et al., 2025</xref>).</p>
<p>In the realm of crack detection, although CNN models have dominated, transformer models have gained traction due to their ability to model long-range dependencies. Hierarchical transformer architectures have demonstrated competitive performance on concrete and bituminous surfaces, particularly under challenging noise and texture conditions (<xref ref-type="bibr" rid="ref30">Li et al., 2024</xref>). However, such models typically require substantial computational resources, limiting their suitability for real-time deployment or resource-constrained heritage monitoring scenarios.</p>
<p>Besides, heritage and cultural structures present extra challenges that do not exist in ordinary concrete infrastructure Variations in material composition, erosion-induced surface irregularities, heterogeneous textures, and uneven illumination significantly complicate crack detection and segmentation. Attention-enhanced U-Net models integrated with optical pulsed thermography have successfully revealed cracks on ancient murals, underscoring the potential of deep learning for heritage applications (<xref ref-type="bibr" rid="ref15">Cui et al., 2024</xref>). Similarly, recent pixel-level segmentation studies on historic surfaces confirm the feasibility of deep learning approaches, while emphasizing the need for carefully curated datasets and tailored model designs (<xref ref-type="bibr" rid="ref43">S&#x00F6;yleyman et al., 2022</xref>).</p>
<p>Despite these advances, a clear research gap remains. Existing studies predominantly focus on pavements, bridges, and modern concrete structures, with limited attention to heritage hydraulic structures composed of earthen and masonry materials, such as Tabiya water basins (<xref ref-type="bibr" rid="ref44">Tran et al., 2024</xref>). Moreover, there is a lack of systematic comparison of U-Net variants with different transfer learning backbones under heritage-specific surface conditions, where crack patterns are subtle and closely intertwined with material heterogeneity.</p>
<p>Addressing this gap, the present study proposes an AI-driven workflow for automated detection, segmentation, and quantitative assessment of surface cracks in Tabiya water basins. Four U-Net variants incorporating different transfer learning backbones&#x2014;MobileNetV2, InceptionV3, ResNet-50, and EfficientNetB7&#x2014;are systematically evaluated in terms of segmentation accuracy, robustness, and computational efficiency on complex heritage surfaces. The proposed approach aims to deliver a scalable, objective, and practical tool to support heritage conservation and maintenance efforts. The overall workflow is illustrated in <xref ref-type="fig" rid="fig1">Figure 1</xref>.</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>The proposed crack detection workflow.</p>
</caption>
<graphic xlink:href="frai-09-1741082-g001.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Flowchart depicting the process for image-based crack analysis. It starts with "Acquisition of images," followed by two branches: "Grayscale Processing" and "Thresholding." Thresholding leads to "Image Cleaning," which loops back to "Crack Detection," connected to "Crack Analysis." Finally, "Crack Properties" are determined, categorizing into "Length," "Width," "Area," and "Severity."</alt-text>
</graphic>
</fig>
</sec>
<sec sec-type="materials|methods" id="sec2">
<label>2</label>
<title>Materials and methods</title>
<sec id="sec3">
<label>2.1</label>
<title>Case study description</title>
<p>This study is aimed at the automatic detection of surface cracking on ancient earthen structures, with Sahrij Labgar in the Kasbah district of Marrakech serving as a specific case for analysis. Constructed during the Almoravid period (<xref ref-type="bibr" rid="ref45">Visconti and Capozzi, 2014</xref>), the reservoir of Sahrij Labgar (from the 11th to the 12th century) is an important element linking to the early hydraulic engineering system of the city. The basin has a close relationship with the <italic>Khattaras</italic><xref ref-type="fn" rid="fn0001"><sup>1</sup></xref>, which form a network of underground galleries and storage basins devised for irrigation purposes. The basin assisted substantially in the irrigation of royal gardens and farming grounds and thus stands as a testimony to the highly advanced technical and environmental adaptations accomplished by Almoravid engineers.</p>
<p>Sahrij Labgar is architecturally representative of a large quadrangular reservoir, measuring approximately 3.8&#x202F;m deep, and having storage of above 40,000 m<sup>3</sup>. The walls of the structure, estimated to be 2.3&#x202F;m thick, were built using the traditional material of t&#x00E2;biya (rammed earth), where earth-lime-straw-gravel mixtures are repeatedly laid to create big solid walls. The inner surface of this basin has then been treated with a lime-based plaster and tadelakt, which is a polished waterproof coating characterized by its durability and resilience toward water intrusion.</p>
<p>However, while the antiquity of these materials is enduring, the processes of deterioration greatly supported by aging, environmental exposure, and changing levels of moisture are always taking place and manifesting in observable effects: surface cracking, erosion, biological colonization, and partial detachment of coatings. The irregularities woven into the diverse pathologies therefore pose a genuine conservation challenge with respect to the heterogeneous area of earthen surfaces of the structure.</p>
<p>Thus, Sahrij Labgar has become an ideal test site for AI-enabled crack detection and automated defect mapping concerning heritage. The cases being considered are marked with historical significance, coupled with complex surface morphology and environmental vulnerability, with an aim to assess this case as a prominent one where the latest visual analysis might serve to reinforce non-invasive monitoring and preventive conservation strategies of traditional earthen hydraulic architecture.</p>
</sec>
<sec id="sec4">
<label>2.2</label>
<title>Dataset description</title>
<p>In this research, relevant open crack datasets were exploited, along with <italic>in situ</italic> image acquisition, the two being systematically merged to forge a large and diverse dataset targeting the training and evaluation of the segmentation model proposed. A total of 2,500 images of high quality (<xref ref-type="fig" rid="fig2">Figure 2a</xref>) were obtained <italic>in situ</italic> from the ancient Tabiya water basins using a Canon EOS 1100D DSLR camera (12.2 MP, 30&#x202F;mm lens; <xref ref-type="fig" rid="fig3">Figure 3</xref>). Image acquisition was carried out under early morning natural light to avoid unwanted shadows or specular reflections, while a parallel movement acquisition method was employed. Approximately 50&#x2013;60 images were taken per wall in so-called &#x201C;overlapping&#x201D; mode to ensure complete surface coverage. Of the acquired images, 1,625 showed visible cracks, while 875 showed intact concrete.</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>Training data examples from dual sources: <bold>(a)</bold> on-site collected image and <bold>(b)</bold> external database sample.</p>
</caption>
<graphic xlink:href="frai-09-1741082-g002.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Panel (a) shows six close-up images of brownish wall surfaces with visible cracks and surface texture variations. Panel (b) displays six close-up images of gray or beige wall surfaces, each with distinct linear or jagged cracks.</alt-text>
</graphic>
</fig>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption>
<p>Canon EOS 1100D.</p>
</caption>
<graphic xlink:href="frai-09-1741082-g003.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Camera specifications table listing 12 megapixels, maximum resolution of 4272 by 2848, CMOS sensor type, sensor size of 22.2 by 14.8 millimeters, pixel area of 26.94 square micrometers, and released in 2011.</alt-text>
</graphic>
</fig>
<p>Ground-truth masks for these in situ images were generated through manual pixel-by-pixel annotation using the LabelMe tool (<xref ref-type="fig" rid="fig4">Figure 4</xref>). The crack regions were carefully delineated following a predetermined annotation protocol for uniformity in crack definitions and labeling accuracy across all heritage images (<xref ref-type="bibr" rid="ref40">Russell et al., 2008</xref>; <xref ref-type="bibr" rid="ref46">Wada, 2019</xref>).</p>
<fig position="float" id="fig4">
<label>Figure 4</label>
<caption>
<p>The main interface of LabelMe.</p>
</caption>
<graphic xlink:href="frai-09-1741082-g004.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">LabelMe software interface showing a toolbar with options including Open, Open Directory, Next Image, Previous Image, Save, and Delete File on the left side. The right side contains empty sections labeled Flags, Label List, Polygon Labels, and File List.</alt-text>
</graphic>
</fig>
<p>To cope with the in-situ dataset&#x2019;s limited size and enhance model generalization, the training data were supplemented with 7,500 annotated images extracted from two well-known public crack datasets (<xref ref-type="fig" rid="fig2">Figure 2b</xref>). The first dataset, SDNET2018, contains labeled crack and non-crack images collected from bridge decks, walls, and pavements, while the second dataset, CFD, offers manually annotated pixel-level crack masks for road-surface defects. Before integration, all external images and corresponding masks were normalized to the same resolution, while the differences in labeling conventions were reconciled by collectively adopting a binary crack-background representation. This ensured a uniform labeling rule across heritage and public datasets and allowed seamless joint training of the segmentation model.</p>
<p>The final combined dataset consists of 10,000 fully annotated images divided such that 8,000 images served for training (80%) and the remaining 2,000 images for testing (20%), with proportional contributions from both in-situ and public datasets in each subset. Such a proportional split would ensure that the heritage images maintain a significant proportion in the test set in order to directly and fairly assess the performance of the model in the prime application domain. This evaluation strategy would comply with the state-of-the-art practices in deep learning&#x2013;based image segmentation, which, after all, maintains an independent test set for the reliable estimation of model generalization performance (<xref ref-type="bibr" rid="ref8">Bishop, 2006</xref>; <xref ref-type="bibr" rid="ref20">G&#x00E9;ron, 2019</xref>). Very similar data-splitting strategies have been heavily relied upon in crack detection and structural inspection studies to guarantee balanced and unbiased evaluation under diverse surface conditions (<xref ref-type="bibr" rid="ref11">Cha et al., 2017</xref>; <xref ref-type="bibr" rid="ref49">Zou et al., 2019</xref>).</p>
</sec>
<sec id="sec5">
<label>2.3</label>
<title>Preprocessing</title>
<p>Preprocessing is of prime importance and crucial component of crack segmentation pipelines that provides the model with standard, de-noised inputs and helps increase the stability and accurate prediction. The first preprocessing was grayscale conversion (<xref ref-type="fig" rid="fig5">Figure 5</xref>), which reduced the redundancy across channels and improved the structural contrast in the concrete texture allowing a segmentation model to concentrate on intensity-based crack patterns rather than differences in color (<xref ref-type="bibr" rid="ref21">Gonzalez and Woods, 2007</xref>). Since concrete surfaces are affected mostly with shadows and stains with heterogeneous illumination, a dedicated thresholding step was introduced to model-specific enhancement the visibility of cracks prior to segmentation. Thresholding is a common practice for highlighting areas of low-intensity cracks while suppressing background noise in most related works, especially in classical computer vision workflows and hybrid deep learning pipelines, where illumination artifacts can often cover fine crack structures.</p>
<fig position="float" id="fig5">
<label>Figure 5</label>
<caption>
<p>RGB-to-grayscale conversion.</p>
</caption>
<graphic xlink:href="frai-09-1741082-g005.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Diagram showing the RGB image split into red, green, and blue channels. Each channel outputs to grayscale processing, resulting in a single grayscale image with visible cracks.</alt-text>
</graphic>
</fig>
<p>There were multiple thresholding techniques evaluated over which the ideal method should be selected to be used on the dataset, such as Global, Otsu, Triangle, Isodata, and Adaptive (<xref ref-type="table" rid="tab1">Table 1</xref>). Using local pixel-neighborhood statistics to compute thresholds &#x2013; Adaptive or local thresholding technique &#x2013; proved to provide consistently better results. This locality-aware approach preserved thin and low-contrast cracks, maintained structural continuity, and reduced false positives caused by surface artifacts. It was particularly proved effective under uneven illumination and textured concrete backgrounds yielding the cleanest binary representations of cracks. On the basis of these observations and quantitative assessments, Adaptive thresholding was selected as the primary preprocessing method, providing high-quality input masks that greatly improved subsequent segmentation performance (<xref ref-type="fig" rid="fig6">Figure 6</xref>).</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Comparison of thresholding methods (<xref ref-type="bibr" rid="ref6">Amiriebrahimabadi et al., 2024</xref>).</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Method</th>
<th align="left" valign="top">Time</th>
<th align="left" valign="top">Sensitivity to noise</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Global</td>
<td align="left" valign="top">Very fast</td>
<td align="left" valign="top">High is a simple threshold, amplifying noise in full image.</td>
</tr>
<tr>
<td align="left" valign="top">Otsu</td>
<td align="left" valign="top">Fast</td>
<td align="left" valign="top">Space is meant for clean bimodal images, but difficult to manage with noisy histograms.</td>
</tr>
<tr>
<td align="left" valign="top">Adaptive</td>
<td align="left" valign="top">Slower</td>
<td align="left" valign="top">Ignores local noise and illumination variations with effectiveness.</td>
</tr>
<tr>
<td align="left" valign="top">Triangle</td>
<td align="left" valign="top">Fast</td>
<td align="left" valign="top">medium is best for clean, sparse histograms; however, results can still be affected by noise.</td>
</tr>
<tr>
<td align="left" valign="top">Isodata</td>
<td align="left" valign="top">Medium</td>
<td align="left" valign="top">Medium- Some noise in the iterative process can average it out.</td>
</tr>
<tr>
<td align="left" valign="top">GMM</td>
<td align="left" valign="top">Slowest</td>
<td align="left" valign="top">Low: Models noise and complex distributions well.</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig position="float" id="fig6">
<label>Figure 6</label>
<caption>
<p>Comparison of image thresholding methods.</p>
</caption>
<graphic xlink:href="frai-09-1741082-g006.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Grayscale image of a crack on a surface is processed using different thresholding techniques. Six images display results of various methods: Global, OTSU, Adaptive, Triangle, Isodata, and Gaussian, highlighting variations in crack detection.</alt-text>
</graphic>
</fig>
</sec>
<sec id="sec6">
<label>2.4</label>
<title>Semantic segmentation models</title>
<p>There are many pre-trained convolutional neural networks available for classification tasks. What illustrates both the number of architectural possibilities offered to developers and their contrast includes accuracy, speed of prediction, and size of memory footprint. MobileNetV2 (<xref ref-type="bibr" rid="ref33">Qayyum et al., 2022</xref>), EfficientNetB7 (<xref ref-type="bibr" rid="ref31">Mazni et al., 2024</xref>), InceptionV3 (<xref ref-type="bibr" rid="ref33">Qayyum et al., 2022</xref>), and ResNet-50 (<xref ref-type="bibr" rid="ref9">Bussa and Boppana, 2025</xref>), whose distinctive reputation lies in weighing high accuracy against small model size, were selected (<xref ref-type="table" rid="tab2">Table 2</xref>). To the machine learning community, these four models represent models with the lowest computational and memory efficiency. In several scenarios, practical usage together with limited resource allocated for computation and memory makes them attractive options. <xref ref-type="table" rid="tab1">Table 1</xref> presents typical features of such pre-trained model instances.</p>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>Consideration of parameter counting and computational loads for assessing four eminent CNN architectures: MobileNetV2, InceptionV3, ResNet-50 and EfficientNetB7.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Model</th>
<th align="center" valign="top">Parameters (million)</th>
<th align="center" valign="top">Size (MB)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">MobileNetV2</td>
<td align="char" valign="top" char=".">3.5</td>
<td align="center" valign="top">14</td>
</tr>
<tr>
<td align="left" valign="top">InceptionV3</td>
<td align="char" valign="top" char=".">23.9</td>
<td align="center" valign="top">92</td>
</tr>
<tr>
<td align="left" valign="top">ResNet-50</td>
<td align="char" valign="top" char=".">25.6</td>
<td align="center" valign="top">98</td>
</tr>
<tr>
<td align="left" valign="top">EfficientNetB7</td>
<td align="char" valign="top" char=".">66.7</td>
<td align="center" valign="top">256</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Of course, MobileNetV2 does have some bragging rights with respect to fewer parameters and smaller size.</p>
</table-wrap-foot>
</table-wrap>
<sec id="sec7">
<label>2.4.1</label>
<title>U-Net</title>
<p>The U-Net architecture is formed by characteristic U-shape with the encoder-decoder design initially postulated by <xref ref-type="bibr" rid="ref9003">Ronneberger et al. (2015)</xref>. <xref ref-type="fig" rid="fig7">Figure 7</xref> illustrates the full process, including input image, U-Net architecture, and output segmentation mask. Among the most popular techniques for semantic segmentation, U-Net is particularly strong when training data are scarce.</p>
<fig position="float" id="fig7">
<label>Figure 7</label>
<caption>
<p>U-Net architecture illustrating the input image pipeline, encoder&#x2013;decoder structure, and resulting segmentation map.</p>
</caption>
<graphic xlink:href="frai-09-1741082-g007.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">U-Net architecture diagram illustrating the segmentation process. The left side shows the contracting path (encoder) with down-sampling and skip connections, while the right side displays the expansive path (decoder) with up-sampling. The input image, a grayscale tile, is processed to produce an output segmentation map highlighting features. Blocks represent convolutional layers with varying dimensions and connections, demonstrating the network's flow from input to output.</alt-text>
</graphic>
</fig>
<p>U-Net architecture consists of a contracting path (encoder) that captures contextual information and an expansive path (decoder) that enables precise localization. These two paths are joined via a bottleneck (bridge). The contracting path follows the typical structure of convolutional neural networks, taking the input image through successive encoding blocks and extracting increasingly abstract feature representations.</p>
<p>In this study, the selected pre-trained CNN models are integrated into the U-Net framework as encoder backbones, replacing the standard convolutional encoder while retaining the original decoder structure. The encoder extracts multi-scale hierarchical features, which are progressively upsampled by the decoder to restore spatial resolution.</p>
<p>The segmentation mask for crack detection is generated from decoding along the expansive path. During decoding, feature maps are iteratively upsampled and concatenated to their subsequently downsampled counterparts from the encoder through skip connections. This allows the network to restore valuable spatial cues that may have been lost during downsampling, thus giving it the ability to accurately delineate fine crack structures.</p>
<p>Finally, through a 1&#x202F;&#x00D7;&#x202F;1 convolution and the sigmoid activation function, a binary segmentation mask is generated. Each pixel is classified as either &#x201C;crack&#x201D; or &#x201C;non-crack,&#x201D; enabling precise pixel-level crack segmentation by combining global contextual information with fine spatial details.</p>
</sec>
<sec id="sec8">
<label>2.4.2</label>
<title>U-Net-MobileNetV2</title>
<p>A hybrid model is thus developed where the original U-Net encoder is replaced with a pre-trained MobileNetV2 backbone (<xref ref-type="fig" rid="fig8">Figure 8b</xref>), in contrast to the earlier U-Net-MobileNet variant (<xref ref-type="fig" rid="fig8">Figure 8a</xref>). This integration allows MobileNetV2 to serve as the feature-extraction encoder while retaining the original U-Net decoder with skip connections. Transfer learning accelerates feature extraction and improves performance, particularly on the limited in-situ dataset (<xref ref-type="bibr" rid="ref33">Qayyum et al., 2022</xref>). The decoder is similar to that of the U-Net, employing transposed convolutions with skip connections to the MobileNetV2 feature maps. This reduces the total number of trainable parameters and improves convergence.</p>
<fig position="float" id="fig8">
<label>Figure 8</label>
<caption>
<p>Architecture variants: <bold>(a)</bold> U-Net-MobileNet and <bold>(b)</bold> U-Net-MobileNet-V2.</p>
</caption>
<graphic xlink:href="frai-09-1741082-g008.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Diagram showing three convolutional network architectures. (a) Features a 3x3 depthwise convolution with variable stride and ReLU6, followed by a 1x1 convolution with ReLU6. (b) Includes a 1x1 convolution with ReLU6, 3x3 depthwise convolution with ReLU6, and a 1x1 linear convolution, with an addition step. Stride is 1. (c) Similar to (b) but with a stride of 2. Inputs are shown as ovals at the bottom.</alt-text>
</graphic>
</fig>
</sec>
<sec id="sec9">
<label>2.4.3</label>
<title>ResNet-50</title>
<p>The ResNet-50 architecture (<xref ref-type="fig" rid="fig9">Figure 9</xref>) was adapted as a U-Net encoder. The input layer was modified to accept 320&#x202F;&#x00D7;&#x202F;320 grayscale images, and the encoder&#x2019;s residual blocks extract hierarchical features useful for crack segmentation. The original fully connected classification head is removed, and the decoder of the U-Net reconstructs the segmentation mask from these features. The deep residual blocks are features of the network that allow effective training through skip connections and learn robust features for distinguishing crack patterns (<xref ref-type="bibr" rid="ref9">Bussa and Boppana, 2025</xref>).</p>
<fig position="float" id="fig9">
<label>Figure 9</label>
<caption>
<p>ResNet-50 model architecture.</p>
</caption>
<graphic xlink:href="frai-09-1741082-g009.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Diagram of a neural network architecture with stages. Input passes through zero padding, convolution, batch normalization, ReLU activation, and max pooling in Stage 1. Stages 2-5 contain convolution and identity blocks. Ultimately, the output results from average pooling, flattening, and a fully connected layer.</alt-text>
</graphic>
</fig>
</sec>
<sec id="sec10">
<label>2.4.4</label>
<title>InceptionV3</title>
<p>The InceptionV3 model (<xref ref-type="fig" rid="fig10">Figure 10</xref>) was employed as the U-Net encoder, leveraging its multi-scale feature extraction capability. The classification head was replaced with a global average pooling layer, feeding features to the U-Net decoder for segmentation (<xref ref-type="bibr" rid="ref33">Qayyum et al., 2022</xref>). Data augmentation techniques (such as rotation or brightness adjustment) were also applied during training for robustness. The parallel convolutional paths with varying kernel sizes enable the encoder to capture crack patterns at different widths and orientations.</p>
<fig position="float" id="fig10">
<label>Figure 10</label>
<caption>
<p>InceptionV3 model architecture.</p>
</caption>
<graphic xlink:href="frai-09-1741082-g010.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Diagram of a neural network architecture. It shows a sequence of layers including convolution, average pool, max pool, concatenation, dropout, fully connected, and softmax layers. Each type is color-coded. Input dimension is 299 by 299 by 3, transformed through the layers to an output of 8 by 8 by 2048, and finally to 1001.</alt-text>
</graphic>
</fig>
</sec>
<sec id="sec11">
<label>2.4.5</label>
<title>EfficientNetB7</title>
<p>EfficientNetB7 (<xref ref-type="fig" rid="fig11">Figure 11</xref>) was used as a high-capacity U-Net encoder, exploiting compound scaling to balance accuracy and computational efficiency. The classification head is replaced with the decoder for pixel-level segmentation. The essential parts of the MBConv blocks along with SE modules boost dynamically the most relevant features enabling the network to focus on discriminative crack patterns in high-resolution images without being computationally prohibitive (<xref ref-type="bibr" rid="ref31">Mazni et al., 2024</xref>).</p>
<fig position="float" id="fig11">
<label>Figure 11</label>
<caption>
<p>EfficientNetB7 model architecture.</p>
</caption>
<graphic xlink:href="frai-09-1741082-g011.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Flowchart of a neural network architecture depicting blocks from one to seven. Each block contains layers labeled Conv or MBConv with varying kernel sizes, such as three by three and five by five. The diagram starts with an input image processed through these layers and concludes with a feature map. Each block is color-coded for differentiation.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="sec12">
<label>2.5</label>
<title>Loss function and optimization</title>
<p>For the binary crack segmentation task, the network outputs a single-channel feature map. A sigmoid (<xref ref-type="bibr" rid="ref42">Shao and Wang, 2023</xref>) activation function is applied pixel-wise at the final layer, mapping each pixel&#x2019;s raw logit to a probability value in the range [0,1]. This formulation is appropriate for binary segmentation as it treats each pixel&#x2019;s classification as an independent Bernoulli trial, naturally representing the probability of belonging to the crack class.</p>
<p>The model is optimized using the Binary Cross-Entropy (BCE) loss, which measures the discrepancy between the predicted pixel-wise probabilities (<inline-formula>
<mml:math id="M1">
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:math>
</inline-formula>) and the ground-truth binary labels (<inline-formula>
<mml:math id="M2">
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:math>
</inline-formula>), defined as:</p>
<disp-formula id="E1">
<mml:math id="M3">
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi mathvariant="italic">BCE</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>N</mml:mi>
</mml:mfrac>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>&#x00D7;</mml:mo>
<mml:msub>
<mml:mi>&#x03A3;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="true">[</mml:mo>
<mml:mspace width="0.25em"/>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x00D7;</mml:mo>
<mml:mo>log</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi mathvariant="normal">y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>&#x00D7;</mml:mo>
<mml:mo>log</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi mathvariant="normal">y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo stretchy="true">]</mml:mo>
</mml:math>
<label>(1)</label>
</disp-formula>
<p>where <inline-formula>
<mml:math id="M4">
<mml:mi>N</mml:mi>
</mml:math>
</inline-formula> is the total number of pixels. This loss is well-suited for tasks with imbalanced class distributions (such as cracks occupying a small fraction of pixels) because it penalizes errors on both the minority (crack) and majority (background) classes with equal theoretical importance at the pixel level.</p>
<p>Training was performed using the Adam optimizer with a learning rate of 0.01, a batch size of 32, and a fixed schedule of 30 epochs. The model checkpoint yielding the highest validation accuracy was retained for final evaluation to prevent overfitting and ensure robust generalization.</p>
</sec>
<sec id="sec13">
<label>2.6</label>
<title>Post-processing</title>
<p>Following binary segmentation, post-processing techniques were applied to extract quantitative morphological metrics (crack length, width, and severity) from the predicted masks.</p>
<sec id="sec14">
<label>2.6.1</label>
<title>Skeletonization</title>
<p>The crack skeleton is obtained using thinning algorithms, which reduce cracks to a central axis while preserving connectivity and overall topology. Several methods (Zhang-Suen, Guo-Hall, iterative morphological, fast parallel thinning, MAT, and Distance Transform) were evaluated. The Medial Axis Transform (MAT) was selected because it preserves crack geometry, reduces noise, and accurately defines the central axis&#x2014;essential for precise length and width measurements.</p>
</sec>
<sec id="sec15">
<label>2.6.2</label>
<title>Crack length measurement</title>
<p>The overall length of cracks is measured using a pixel-based calculation on the skeletonized image produced from segmentation. The algorithm processes each pixel <inline-formula>
<mml:math id="M5">
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula>in the binary skeleton image, checking a calibration index <inline-formula>
<mml:math id="M6">
<mml:mi>f</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula>, which returns 1 for skeleton pixels and 0 otherwise. A point counter <inline-formula>
<mml:math id="M7">
<mml:mi>n</mml:mi>
<mml:mspace width="0.25em"/>
</mml:math>
</inline-formula>and a total length <inline-formula>
<mml:math id="M8">
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi>C</mml:mi>
</mml:msub>
<mml:mspace width="0.25em"/>
</mml:math>
</inline-formula>are initialized to zero, and the coordinates of the previous skeleton point are stored in previous point.</p>
<p>For each skeleton pixel, the Euclidean distance to the previous skeleton point is computed:</p>
<disp-formula id="E2">
<mml:math id="M9">
<mml:mtext>displacement</mml:mtext>
<mml:mo>=</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mtext mathvariant="italic">prev</mml:mtext>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mtext mathvariant="italic">prev</mml:mtext>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msqrt>
<mml:mo>.</mml:mo>
</mml:math>
</disp-formula>
<p>This displacement is added to the total crack length <inline-formula>
<mml:math id="M10">
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi>C</mml:mi>
</mml:msub>
</mml:math>
</inline-formula>, and previous point is updated to the current pixel. After processing all pixels, <inline-formula>
<mml:math id="M11">
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi>C</mml:mi>
</mml:msub>
<mml:mspace width="0.25em"/>
</mml:math>
</inline-formula>represents the total crack length in pixel units and can be converted to real-world dimensions through calibration. This method ensures continuous and precise measurement along the crack centerline, minimizing errors caused by width variations or noise.</p>
</sec>
<sec id="sec16">
<label>2.6.3</label>
<title>Crack width measurement</title>
<p>Crack width is quantified using the skeletonized image and edge detection. First, the Canny edge detector (thresholds: 100 and 200) is applied to extract crack boundaries. For each skeleton point <inline-formula>
<mml:math id="M12">
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula>, the nearest edge pixels on the left and right sides of the skeleton are identified, and the Euclidean distances to each edge are computed:</p>
<disp-formula id="E3">
<mml:math id="M13">
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mtext mathvariant="italic">left</mml:mtext>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mtext mathvariant="italic">left</mml:mtext>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mtext mathvariant="italic">left</mml:mtext>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msqrt>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mtext mathvariant="italic">right</mml:mtext>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mtext mathvariant="italic">right</mml:mtext>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mtext mathvariant="italic">right</mml:mtext>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msqrt>
<mml:mo>.</mml:mo>
</mml:math>
</disp-formula>
<p>The local crack width is calculated as <inline-formula>
<mml:math id="M14">
<mml:mtext>crack width</mml:mtext>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mtext>left</mml:mtext>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mtext>right</mml:mtext>
</mml:msub>
</mml:math>
</inline-formula>. After processing all skeleton points, statistical measures such as maximum, minimum, and average crack width are derived. The maximum width indicates the most critical portion of the crack, while the average width provides a global severity measure. These pixel-based measurements can be converted to real-world units, ensuring accurate structural assessment.</p>
</sec>
<sec id="sec17">
<label>2.6.4</label>
<title>Crack severity assessment</title>
<p>The severity of cracks has been categorized into three levels--low, medium, and high--in relation to linear and area criteria. Linear based severity is derived from the maximum crack width, whereas area-based severity relates the area damaged to the inspected area. Thresholds were established in collaboration with a construction expert and informed by studies before (<xref ref-type="bibr" rid="ref36">Ragnoli et al., 2018</xref>) and international guidelines such as ACI 224R-01 (<xref ref-type="bibr" rid="ref1">ACI Committee 224, 2001</xref>). These criteria, as summarized in <xref ref-type="table" rid="tab3">Table 3</xref>, were deployed on the input sections of 0.6&#x202F;m&#x202F;&#x00D7;&#x202F;1.06&#x202F;m for assessing the crack severity. While high severity cracks indicate unsafe conditions that need immediate attention, medium and low severity cracks indicate less critical damage to the structures.</p>
<table-wrap position="float" id="tab3">
<label>Table 3</label>
<caption>
<p>Classification criteria for wall crack severity.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Types of cracks</th>
<th align="left" valign="top">Measure</th>
<th align="center" valign="top">Severity</th>
<th align="center" valign="top">Range</th>
<th align="center" valign="top">R&#x00E9;f.</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top" rowspan="3">Linear crack</td>
<td align="left" valign="top" rowspan="3">Width (mm)</td>
<td align="center" valign="top">Low</td>
<td align="center" valign="top">X&#x202F;&#x003C;&#x202F;10</td>
<td align="center" valign="middle" rowspan="6"><xref ref-type="bibr" rid="ref36">Ragnoli et al. (2018)</xref> and <xref ref-type="bibr" rid="ref1">ACI Committee 224 (2001)</xref></td>
</tr>
<tr>
<td align="center" valign="top">Medium</td>
<td align="center" valign="top">10&#x202F;&#x2264;&#x202F;X&#x202F;&#x003C;&#x202F;75</td>
</tr>
<tr>
<td align="center" valign="top">High</td>
<td align="center" valign="top">X&#x202F;&#x2265;&#x202F;75</td>
</tr>
<tr>
<td align="left" valign="top" rowspan="3">Area crack</td>
<td align="left" valign="top" rowspan="3">Area (%)</td>
<td align="center" valign="top">Low</td>
<td align="center" valign="top">X&#x202F;&#x003C;&#x202F;10</td>
</tr>
<tr>
<td align="center" valign="top">Medium</td>
<td align="center" valign="top">10&#x202F;&#x2264;&#x202F;X&#x202F;&#x003C;&#x202F;25</td>
</tr>
<tr>
<td align="center" valign="top">High</td>
<td align="center" valign="top">X&#x202F;&#x2265;&#x202F;25</td>
</tr>
</tbody>
</table>
</table-wrap>
<sec id="sec18">
<label>2.6.4.1</label>
<title>Linear crack severity</title>
<p>Linear cracks (vertical, horizontal, or diagonal) are assessed based on maximum crack width. Each image (0.6&#x202F;&#x00D7;&#x202F;1.06&#x202F;m, 224&#x202F;&#x00D7;&#x202F;224 pixels) corresponds to ~2.68&#x202F;mm/pixel width and ~4.73&#x202F;mm/pixel height. Severity is classified as low, moderate, or high using thresholds derived from previous studies (<xref ref-type="bibr" rid="ref36">Ragnoli et al., 2018</xref>) and international guidelines [ACI 224R-01 (<xref ref-type="bibr" rid="ref1">ACI Committee 224, 2001</xref>)]. Classification is validated against manually annotated masks.</p>
</sec>
<sec id="sec19">
<label>2.6.4.2</label>
<title>Area crack severity</title>
<p>Area cracks (fissures, spalls, or large surface deterioration) are assessed by the area ratio:</p>
<disp-formula id="E4">
<mml:math id="M15">
<mml:mtext mathvariant="italic">Area Ratio</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mtext mathvariant="italic">Cracked Area</mml:mtext>
<mml:mo>/</mml:mo>
<mml:mtext mathvariant="italic">Total Wall Area</mml:mtext>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>&#x00D7;</mml:mo>
<mml:mn>100</mml:mn>
</mml:math>
</disp-formula>
<p>Cracked Area&#x202F;=&#x202F;number of pixels belonging to the crack (red pixels in <xref ref-type="table" rid="tab4">Table 4</xref>).</p>
<table-wrap position="float" id="tab4">
<label>Table 4</label>
<caption>
<p>The results of the UAV&#x2019;s measurements.</p>
</caption>
<table frame="hsides" rules="groups">
<tbody>
<tr>
<td align="left" valign="top">
<inline-graphic xlink:href="frai-09-1741082-i001.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Brown textured surface with a slightly rough and uneven finish, resembling a wall or sheet of paper. The hue is consistent across the entire image.</alt-text>
</inline-graphic>
</td>
<td align="left" valign="top">
<inline-graphic xlink:href="frai-09-1741082-i002.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Close-up of an imprint on a brown, textured surface, resembling a fossilized or embossed pattern with intricate details on the upper left section. The rest of the surface is smooth and uniform.</alt-text>
</inline-graphic>
</td>
<td align="left" valign="top">
<inline-graphic xlink:href="frai-09-1741082-i003.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">A brown surface displaying a horizontal irregular crack running across it, creating a natural split in the texture.</alt-text>
</inline-graphic>
</td>
</tr>
<tr>
<td align="left" valign="top">
<inline-graphic xlink:href="frai-09-1741082-i004.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">A textured gray surface with a rough appearance. The texture is uniform and resembles plaster or concrete.</alt-text>
</inline-graphic>
</td>
<td align="left" valign="top">
<inline-graphic xlink:href="frai-09-1741082-i005.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Close-up of a textured gray surface with a rough, irregular patch on the left side. The patch has a grainy appearance, contrasting with the smoother surrounding area.</alt-text>
</inline-graphic>
</td>
<td align="left" valign="top">
<inline-graphic xlink:href="frai-09-1741082-i006.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">A grayscale image showing a jagged horizontal seam or line running across a textured gray surface, suggesting a form of tearing or cracking.</alt-text>
</inline-graphic>
</td>
</tr>
<tr>
<td align="left" valign="top">
<inline-graphic xlink:href="frai-09-1741082-i007.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Illustration depicting a person with a gingerbread man cookie held in front of their face. The cookie has eyes, a smile, and three buttons. The person's face is not visible.</alt-text>
</inline-graphic>
</td>
<td align="left" valign="top">
<inline-graphic xlink:href="frai-09-1741082-i008.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Brightly outlined, irregularly shaped object appears against a black background, resembling a rocky structure or asteroid. The object is heavily textured and occupies the upper left corner.</alt-text>
</inline-graphic>
</td>
<td align="left" valign="top">
<inline-graphic xlink:href="frai-09-1741082-i009.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">White jagged line resembling a tear across a black background, creating a stark contrast between the uneven line and the dark surface.</alt-text>
</inline-graphic>
</td>
</tr>
<tr>
<td align="left" valign="top">
<inline-graphic xlink:href="frai-09-1741082-i010.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Flat surface with a uniform brown texture, resembling a suede-like material. The texture appears soft and consistent across the image.</alt-text>
</inline-graphic>
</td>
<td align="left" valign="top">
<inline-graphic xlink:href="frai-09-1741082-i011.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">A textured red heart shape on a brown, paper-like background. The heart is positioned in the upper left corner, contrasting with the beige surface.</alt-text>
</inline-graphic>
</td>
<td align="left" valign="top">
<inline-graphic xlink:href="frai-09-1741082-i012.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">A jagged red line runs horizontally across a textured brown background, creating a contrasting visual effect.</alt-text>
</inline-graphic>
</td>
</tr>
<tr>
<td align="left" valign="top">No CRACK Found</td>
<td align="left" valign="top">CRACK Found<break/>Type: Area Crack<break/>Total Wall Area (px): 3500<break/>Cracked Area (px): 224&#x202F;&#x00D7;&#x202F;224<break/>Area (%): 6.98%<break/>Severity: Low (&#x003C;10%)</td>
<td align="left" valign="top">CRACK Found<break/>Type: Linear Crack<break/>Length (px): 393<break/>Mean Width (px): 76.87<break/>Max Width (px): 76.87<break/>Length (mm): 103.98<break/>Avg Width (mm): 20.34<break/>Max Width (mm): 20.34<break/>Severity: Medium</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Total Wall Area&#x202F;=&#x202F;total number of pixels in the wall region (the entire image).</p>
<p>Severity is classified as low, medium, or high. Alternatively, the ratio of cracked area to its bounding box can be used, depending on the application. Reliability is checked against manually labeled ground truth.</p>
</sec>
</sec>
</sec>
</sec>
<sec id="sec20">
<label>3</label>
<title>Experiments and results</title>
<p>To validate the effectiveness of the developed system, tests were conducted on images containing thin and irregular cracks on complex concrete backgrounds. The proposed system is a responsive web application for automated crack detection and analysis, developed in Python (3.10) using the Flask (2.3) framework. Flask enables rapid deployment of deep learning models via RESTful APIs and seamless integration with trained networks. As illustrated in <xref ref-type="fig" rid="fig12">Figure 12</xref>, the web interface includes: (a) a home screen for selecting one of four deep learning models (MobileNetV2, ResNet-50, InceptionV3, EfficientNetB7) and uploading images, and (b) an output screen displaying the analysis results. Example classification outputs are shown in <xref ref-type="fig" rid="fig13">Figure 13</xref>. The system was tested across various devices and screen sizes to ensure responsiveness and accessibility.</p>
<fig position="float" id="fig12">
<label>Figure 12</label>
<caption>
<p>Interface of the proposed web application: <bold>(a)</bold> home screen showing the four selectable deep learning models and drag-and-drop image upload functionality; <bold>(b)</bold> output screen presenting the analysis results for the chosen model.</p>
</caption>
<graphic xlink:href="frai-09-1741082-g012.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Image showing a crack analysis tool interface. On the left, options to select analysis models like MobileNetV2, InceptionV3, and others, with MobileNetV2 selected. Below is an area to upload an image. On the right, a crack detection result is displayed, highlighting a red-lined linear crack on a surface. Details include crack length, width, severity as medium, and model confidence at ninety-four point two percent.</alt-text>
</graphic>
</fig>
<fig position="float" id="fig13">
<label>Figure 13</label>
<caption>
<p>Examples of crack classification results obtained from four CNN models, indicating predicted class and confidence percentage.</p>
</caption>
<graphic xlink:href="frai-09-1741082-g013.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Comparison of crack detection accuracy in images by four models: MobileNetV2, ResNet-50, InceptionV3, and EfficientNetB7. Each model analyzes four images, predicting "Crack" or "No Crack" with percentage confidence. MobileNetV2 and InceptionV3 show at least 98% confidence; ResNet-50 and EfficientNetB7 show varying confidences, with a minimum of 95%.</alt-text>
</graphic>
</fig>
<p>The U-Net architecture was trained using an 80&#x2013;20% dataset split (&#x2248;8,000 training images, &#x2248;2,000 test images) to assess generalization. Training was performed in Python 3.8 with PyTorch, using a batch size of 16 and the Adam optimizer (learning rate 0.001, &#x03B2;1&#x202F;=&#x202F;0.9, &#x03B2;2&#x202F;=&#x202F;0.999, weight decay 10<sup>&#x2212;4</sup>). Segmentation used a combination of Dice and Binary Cross-Entropy losses, with a Sigmoid output activation. Models were trained for 100 epochs, with a learning rate reduction factor of 0.1 at epoch 40. Runtime evaluation was performed on an NVIDIA DGX-1 system with dual 20-core Intel Xeon CPUs, 512&#x202F;GB RAM, and eight Tesla V100 GPUs.</p>
<sec id="sec21">
<label>3.1</label>
<title>Evaluation metrics</title>
<p>To rigorously assess the proposed deep learning system, two complementary sets of metrics were used: segmentation metrics, which evaluate pixel-level accuracy in localizing cracks, and classification metrics, which assess the model&#x2019;s ability to correctly label pixels as crack or non-crack. This dual evaluation provides a detailed understanding of model performance at both fine-grained and global levels.</p>
<sec id="sec22">
<label>3.1.1</label>
<title>Segmentation metrics</title>
<p>Segmentation metrics measure how accurately the predicted masks match the ground-truth crack regions.</p>
<p>(a) Pixel Accuracy (PA): The proportion of correctly classified pixels over the entire image, expressing the global correctness of the segmentation (<xref ref-type="disp-formula" rid="E1">Equation 1</xref>).</p>
<disp-formula id="E5">
<mml:math id="M16">
<mml:mi>PA</mml:mi>
<mml:mo>=</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mo>&#x2211;</mml:mo>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mi mathvariant="italic">ii</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>/</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mo>&#x2211;</mml:mo>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(1)</label>
</disp-formula>
<p>where <italic>n&#x1D62;&#x1D62;</italic> is the number of correctly classified pixels for class <italic>i</italic>, and <italic>t&#x1D62;</italic> is the total number of pixels belonging to class <italic>i</italic>.</p>
<p>(b) Mean Pixel Accuracy (MPA): The accuracy independently for each class and then averages the results. This metric is robust for imbalanced datasets (<xref ref-type="disp-formula" rid="E6">Equation 2</xref>).</p>
<disp-formula id="E6">
<mml:math id="M17">
<mml:mi>MPA</mml:mi>
<mml:mo>=</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>/</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">n</mml:mi>
<mml:mi mathvariant="normal">c</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>&#x00D7;</mml:mo>
<mml:mo>&#x2211;</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mi mathvariant="italic">ii</mml:mi>
</mml:msub>
<mml:mo>/</mml:mo>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(2)</label>
</disp-formula>
<p>with <italic>n<sub>c</sub></italic> representing the number of classes (crack, non-crack).</p>
<p>(c) Intersection over Union (IoU): Measures overlap between predicted and ground-truth regions of class <italic>i</italic>; (<xref ref-type="disp-formula" rid="E7">Equation 3</xref>).</p>
<disp-formula id="E7">
<mml:math id="M18">
<mml:msub>
<mml:mi>IoU</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mi mathvariant="italic">ii</mml:mi>
</mml:msub>
<mml:mo>/</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mo>&#x2211;</mml:mo>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mi mathvariant="italic">ji</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mi mathvariant="italic">ii</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(3)</label>
</disp-formula>
<p>Higher IoU values indicate more accurate segmentation.</p>
<p>(d) Mean Intersection over Union (MIoU): The averages IoU across all classes, a standard benchmark in semantic segmentation (<xref ref-type="disp-formula" rid="E8">Equation 4</xref>).</p>
<disp-formula id="E8">
<mml:math id="M19">
<mml:mtext>MIoU</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>/</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">n</mml:mi>
<mml:mi mathvariant="normal">c</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>&#x00D7;</mml:mo>
<mml:mo>&#x2211;</mml:mo>
<mml:msub>
<mml:mi>IoU</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:math>
<label>(4)</label>
</disp-formula>
<p>(e) Dice Coefficient (F1-Score for Segmentation): Evaluates similarity between predicted and reference masks, particularly useful for thin cracks (<xref ref-type="disp-formula" rid="E9">Equation 5</xref>).</p>
<disp-formula id="E9">
<mml:math id="M20">
<mml:mtext>Dice</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>&#x2223;</mml:mo>
<mml:mi mathvariant="normal">A</mml:mi>
<mml:mo>&#x2229;</mml:mo>
<mml:mi mathvariant="normal">B</mml:mi>
<mml:mo>&#x2223;</mml:mo>
<mml:mo>/</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mo>&#x2223;</mml:mo>
<mml:mi mathvariant="normal">A</mml:mi>
<mml:mo>&#x2223;</mml:mo>
<mml:mo>+</mml:mo>
<mml:mo>&#x2223;</mml:mo>
<mml:mi mathvariant="normal">B</mml:mi>
<mml:mo>&#x2223;</mml:mo>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(5)</label>
</disp-formula>
<p>where <italic>A</italic> and <italic>B</italic> are the predicted and ground-truth masks.</p>
</sec>
<sec id="sec23">
<label>3.1.2</label>
<title>Classification metrics</title>
<p>Classification performance was derived from four fundamental quantities in the confusion matrix:</p>
<list list-type="bullet">
<list-item>
<p>TP: correctly detected crack pixels</p>
</list-item>
<list-item>
<p>TN: correctly detected non-crack pixels</p>
</list-item>
<list-item>
<p>FP: non-crack pixels incorrectly labeled as cracks</p>
</list-item>
<list-item>
<p>FN: crack pixels incorrectly labeled as non-cracks</p>
</list-item>
</list>
<p>(a) Accuracy: Overall proportion of correct predictions (<xref ref-type="disp-formula" rid="E10">Equation 6</xref>).</p>
<disp-formula id="E10">
<mml:math id="M21">
<mml:mtext>Accuracy</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>TN</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>/</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>TN</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>FP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>FN</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(6)</label>
</disp-formula>
<p>(b) Recall/True Positive Rate (TPR): Proportion of actual crack pixels correctly detected (<xref ref-type="disp-formula" rid="E11">Equation 7</xref>).</p>
<disp-formula id="E11">
<mml:math id="M22">
<mml:mi>TPR</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>TP</mml:mi>
<mml:mo>/</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>FN</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(7)</label>
</disp-formula>
<p>(c) False Positive Rate (FPR): Proportion of non-crack pixels incorrectly labeled as cracks (<xref ref-type="disp-formula" rid="E12">Equation 8</xref>).</p>
<disp-formula id="E12">
<mml:math id="M23">
<mml:mi>FPR</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>FP</mml:mi>
<mml:mo>/</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>FP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>TN</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(8)</label>
</disp-formula>
<p>(d) False Negative Rate (FNR): Proportion of crack pixels missed by the model (<xref ref-type="disp-formula" rid="E13">Equation 9</xref>).</p>
<disp-formula id="E13">
<mml:math id="M24">
<mml:mi>FNR</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>FN</mml:mi>
<mml:mo>/</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>FN</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(9)</label>
</disp-formula>
<p>(e) True Negative Rate (TNR)/Specificity: Proportion of non-crack pixels correctly identified (<xref ref-type="disp-formula" rid="E14">Equation 10</xref>).</p>
<disp-formula id="E14">
<mml:math id="M25">
<mml:mi>TNR</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>TN</mml:mi>
<mml:mo>/</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>TN</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>FP</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(10)</label>
</disp-formula>
<p>(f) Precision: Proportion of predicted crack pixels that are correct (<xref ref-type="disp-formula" rid="E15">Equation 11</xref>).</p>
<disp-formula id="E15">
<mml:math id="M26">
<mml:mtext>Precision</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mi>TP</mml:mi>
<mml:mo>/</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>FP</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(11)</label>
</disp-formula>
<p>(g) F1-Score: Harmonic mean of Precision and Recall, balancing false positives and false negatives (<xref ref-type="disp-formula" rid="E16">Equation 12</xref>).</p>
<disp-formula id="E16">
<mml:math id="M27">
<mml:mi>F1</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>2TP</mml:mi>
<mml:mo>/</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>2TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>FP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>FN</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(12)</label>
</disp-formula>
</sec>
</sec>
<sec id="sec24">
<label>3.2</label>
<title>Comparative analysis of transfer learning techniques</title>
<p>The convergence behavior of the transfer learning models is shown in <xref ref-type="fig" rid="fig14">Figure 14</xref>. <xref ref-type="fig" rid="fig14">Figures 14a</xref>,<xref ref-type="fig" rid="fig14">b</xref> depict the training and validation accuracies, where MobileNetV2, InceptionV3, and ResNet-50 obtained almost perfect scores, and MobileNetV2 showed the best generalization characterized by a small difference in training and validation performance. EfficientNetB7 converged slower and achieved worse accuracies. <xref ref-type="fig" rid="fig14">Figures 14c</xref>,<xref ref-type="fig" rid="fig14">d</xref> show the training and validation losses. MobileNetV2 and InceptionV3 had their early minima and stable performance thereafter, whereas ResNet-50 showed small oscillations due to temporary overfitting. EfficientNetB7 was observed to have a steady linear decrease in loss. To summarize, this pattern indicates robust learning and generalization in MobileNetV2 and InceptionV3, whereas EfficientNetB7 appeared to converge slowly but consistently decrease in loss.</p>
<fig position="float" id="fig14">
<label>Figure 14</label>
<caption>
<p>Evaluation of the convergence behavior of the transfer learning models: <bold>(a)</bold> training accuracy, <bold>(b)</bold> validation accuracy, <bold>(c)</bold> training loss, and <bold>(d)</bold> validation loss.</p>
</caption>
<graphic xlink:href="frai-09-1741082-g014.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Four line charts show performance metrics for different deep learning models over epochs. (a) Training accuracy increases for all models, with EfficientNetB7 showing the most improvement. (b) Validation accuracy remains high for most models, except EfficientNetB7, which fluctuates. (c) Training loss decreases for all, with EfficientNetB7 having the highest initial loss. (d) Validation loss is consistently low except for EfficientNetB7, which decreases over time. Legends indicate models: MobileNetV2, ResNet50, InceptionV3, and EfficientNetB7.</alt-text>
</graphic>
</fig>
</sec>
<sec id="sec25">
<label>3.3</label>
<title>Crack classification outcomes and performance metrics</title>
<p><xref ref-type="table" rid="tab5">Table 5</xref> summarizes the performance of U-Net models with different transfer learning backbones. Evaluated using both segmentation metrics (PA, MPA, Dice, IoU, and mIoU) and classification metrics (Accuracy, Recall, Precision, F1-Score). MobileNetV2 and InceptionV3 achieved the highest and most balanced performance. MobileNetV2 showed excellent generalization (Precision&#x202F;=&#x202F;99.1%, Recall&#x202F;=&#x202F;98.2%), while InceptionV3 maintaining robust segmentation (F1-Score&#x202F;=&#x202F;97.3%, Dice&#x202F;=&#x202F;97.2%). ResNet-50 was competitive (Accuracy&#x202F;=&#x202F;96.5%, F1-Score&#x202F;=&#x202F;95.9%), with minor early overfitting, and EfficientNetB7 had the lowest overall performance, although its higher precision (90.2%) and efficiency make it suitable for low-resource applications.</p>
<table-wrap position="float" id="tab5">
<label>Table 5</label>
<caption>
<p>Evaluating performance metrics for different pre-trained transfer learning CNN models (%).</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Model (%)</th>
<th align="center" valign="top">PA</th>
<th align="center" valign="top">MPA</th>
<th align="center" valign="top">Accuracy</th>
<th align="center" valign="top">Recall</th>
<th align="center" valign="top">Precision</th>
<th align="center" valign="top">F1-score</th>
<th align="center" valign="top">Dice</th>
<th align="center" valign="top">IoU</th>
<th align="center" valign="top">mIoU</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">MobileNetV2</td>
<td align="char" valign="top" char=".">98.8</td>
<td align="char" valign="top" char=".">98.6</td>
<td align="char" valign="top" char=".">98.7</td>
<td align="char" valign="top" char=".">98.2</td>
<td align="char" valign="top" char=".">99.1</td>
<td align="char" valign="top" char=".">98.6</td>
<td align="char" valign="top" char=".">98.5</td>
<td align="char" valign="top" char=".">97.2</td>
<td align="char" valign="top" char=".">96.8</td>
</tr>
<tr>
<td align="left" valign="top">ResNet-50</td>
<td align="char" valign="top" char=".">96.7</td>
<td align="char" valign="top" char=".">96.2</td>
<td align="char" valign="top" char=".">96.5</td>
<td align="char" valign="top" char=".">95.7</td>
<td align="char" valign="top" char=".">96.2</td>
<td align="char" valign="top" char=".">95.9</td>
<td align="char" valign="top" char=".">95.8</td>
<td align="char" valign="top" char=".">94.1</td>
<td align="char" valign="top" char=".">93.7</td>
</tr>
<tr>
<td align="left" valign="top">InceptionV3</td>
<td align="char" valign="top" char=".">97.6</td>
<td align="char" valign="top" char=".">97.0</td>
<td align="char" valign="top" char=".">97.4</td>
<td align="char" valign="top" char=".">96.8</td>
<td align="char" valign="top" char=".">97.9</td>
<td align="char" valign="top" char=".">97.3</td>
<td align="char" valign="top" char=".">97.2</td>
<td align="char" valign="top" char=".">95.9</td>
<td align="char" valign="top" char=".">95.5</td>
</tr>
<tr>
<td align="left" valign="top">EfficientNetB7</td>
<td align="char" valign="top" char=".">89.3</td>
<td align="char" valign="top" char=".">88.8</td>
<td align="char" valign="top" char=".">89.1</td>
<td align="char" valign="top" char=".">88.4</td>
<td align="char" valign="top" char=".">90.2</td>
<td align="char" valign="top" char=".">89.3</td>
<td align="char" valign="top" char=".">89.0</td>
<td align="char" valign="top" char=".">86.7</td>
<td align="char" valign="top" char=".">86.2</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec26">
<label>3.4</label>
<title>Original per-Class IoU, robustness checks, and confusion matrices</title>
<p>Per-Class IoU: MobileNetV2 achieved the highest IoU values (Crack&#x202F;=&#x202F;96.1%, non-Crack&#x202F;=&#x202F;97.5%), followed by InceptionV3 (Crack&#x202F;=&#x202F;94.5%, non-crack&#x202F;=&#x202F;96.5%), ResNet-50 (92.8, 94.6%), and EfficientNetB7 (85.0, 87.4%), reflecting relative segmentation capacities (<xref ref-type="fig" rid="fig15">Figure 15</xref>).</p>
<fig position="float" id="fig15">
<label>Figure 15</label>
<caption>
<p>Original per-Class IoU values for different U-Net backbones.</p>
</caption>
<graphic xlink:href="frai-09-1741082-g015.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Bar chart depicting per-class Intersection over Union (IoU) percentages for crack and non-crack classes using four models: MobileNetV2 (blue), ResNet50 (orange), InceptionV3 (green), and EfficientNetB7 (red). Crack class IoUs are: MobileNetV2 96.1%, ResNet50 92.8%, InceptionV3 94.5%, and EfficientNetB7 85.0%. Non-crack class IoUs are: MobileNetV2 97.5%, ResNet50 94.6%, InceptionV3 96.5%, and EfficientNetB7 87.4%.</alt-text>
</graphic>
</fig>
<p>Robustness checks: Minor perturbations (e.g., noise, rotations) caused small reductions in IoU across all models. MobileNetV2 remained the most robust (Crack&#x202F;=&#x202F;94.2%, non-Crack&#x202F;=&#x202F;95.8%), followed by InceptionV3 and ResNet-50, while EfficientNetB7 showed greater sensitivity (Crack&#x202F;=&#x202F;83.2%, non-crack&#x202F;=&#x202F;85.5%) (<xref ref-type="fig" rid="fig16">Figure 16</xref>).</p>
<fig position="float" id="fig16">
<label>Figure 16</label>
<caption>
<p>Per-class IoU values under robustness checks.</p>
</caption>
<graphic xlink:href="frai-09-1741082-g016.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Bar chart titled "Robustness Per-Class IoU" compares IoU percentages for four models across "Crack" and "Non-Crack" classes. MobileNetV2 scores 94.7% and 95.1%, ResNet50 91.6% and 93.4%, InceptionV3 93.2% and 93.8%, EfficientNetB7 82.4% and 85.0%.</alt-text>
</graphic>
</fig>
<p>Confusion matrices: MobileNetV2 and InceptionV3 had the lowest misclassification rates, correctly identifying the majority of crack and non-crack pixels. EfficientNetB7 exhibited higher counts of false positives and negatives. These trends are visualized in <xref ref-type="fig" rid="fig17">Figure 17</xref> and confirm that MobileNetV2 and InceptionV3 offer both strong pixel-wise segmentation and classification performance, with superior robustness to minor input variations.</p>
<fig position="float" id="fig17">
<label>Figure 17</label>
<caption>
<p>Confusion matrices of the evaluated models: <bold>(a)</bold> MobileNetV2, <bold>(b)</bold> ResNet-50, <bold>(c)</bold> InceptionV3, and <bold>(d)</bold> EfficientNetB7.</p>
</caption>
<graphic xlink:href="frai-09-1741082-g017.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Four confusion matrices comparing crack detection performance: (a) MobileNetV2 with high true negative and positive rates, (b) ResNet50 showing moderate false positives, (c) InceptionV3 with balanced precision, and (d) EfficientNetB7 showing increased false negatives. Each matrix displays predicted versus actual classes with red intensity indicating the number of samples.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec sec-type="discussion" id="sec27">
<label>4</label>
<title>Discussion</title>
<p>The experimental results show that the web-based crack detection system presented here, utilizing U-Net architectures with different options for transfer learning backbones, can achieve very high accuracy when segmenting thin and irregular cracks on intricate concrete surfaces. MobileNetV2 and InceptionV3 consistently outperformed ResNet-50 and EfficientNetB7, offering a balanced trade-off between computational efficiency and generalization (<xref ref-type="table" rid="tab5">Table 5</xref>). MobileNetV2 was the most robust and pixilated in precision, represented by per-Class IoU values of 96.1 percent for cracks and 97.5 percent for non-cracks (<xref ref-type="fig" rid="fig15">Figure 15</xref>) with an overall F1-score of 98.6 percent. The convergence behavior (<xref ref-type="fig" rid="fig14">Figure 14</xref>) and robustness (<xref ref-type="fig" rid="fig16">Figure 16</xref>) checks confirm the stability of these top models, and confusion matrices (<xref ref-type="fig" rid="fig17">Figure 17</xref>) and output examples (<xref ref-type="fig" rid="fig13">Figure 13</xref>) highlight minimal false positives and good segmentation.</p>
<p>Against other recent studies, our results go beyond many benchmarks. <xref ref-type="bibr" rid="ref9001">Diakogiannis et al. (2020)</xref> report IoU around 95% and Dice scores near 94% using a ResNet-based U-Net; in contrast, our MobileNetV2 backbone achieves promising segmentation quality and generalization. On the other hand, hybrid architectures, such as attention-enhanced U-Nets or Transformers, achieve Dice coefficients that are close to 0.99, but at a much higher computational burden. Classification-focused CNNs achieve high detection accuracy (~97&#x2013;99%), although without fine-grained pixel-level localization, and object detection frameworks perform fast inference with moderate mIoU (~94&#x2013;95%) resolutions but fail to capture detailed morphology of cracks (<xref ref-type="bibr" rid="ref34">Qayyum et al., 2023</xref>; <xref ref-type="bibr" rid="ref7">Anusha and Anbarasi, 2025</xref>).</p>
<p>On the contrary, the proposed system yields dense, pixel-wise segmentation with robust performance under perturbations and practical interpretability via a web-based interface (<xref ref-type="fig" rid="fig12">Figures 12</xref>, <xref ref-type="fig" rid="fig13">13</xref>). Overall, lightweight transfer learning method either meets or surpasses state-of-the-art performance and represents a truly deployable solution for practical applications in the field of structural health monitoring.</p>
</sec>
<sec id="sec28">
<label>5</label>
<title>Limitations and assumptions</title>
<p>The proposed technique exhibits good performance subject to given assumptions defining the limits of its operational capability. First, the reported metrics (like MobileNetV2&#x2019;s 98.7% accuracy) are based on controlled imaging conditions: input images comparable to the training set in terms of resolution, contrast, and lighting conditions, hence devoid of significant shadows, blur, or distortions. Validation has been confined to concrete surfaces, so further adaptations may be necessary to generalize to other materials like asphalt or masonry. Finally, all quantitative outputs such as crack width or area are dependent on segmentation accuracy, which means that any error, however small, at the pixel level will propagate into these measurements. These considerations are critical for placing the system in its practical context.</p>
</sec>
<sec id="sec29">
<label>6</label>
<title>Conclusion and future work</title>
<p>This research successfully develops and validates a responsive, web-based system for automated crack detection and quantitative structural health assessment. The primary contributions of this work are fourfold: (1) the implementation and rigorous evaluation of a U-Net architecture enhanced with several state-of-the-art transfer learning backbones (MobileNetV2, InceptionV3, ResNet-50, EfficientNetB7) for precise pixel-wise segmentation; (2) the demonstration of exceptional performance, particularly with MobileNetV2 achieving a mean Intersection over Union (mIoU) of 96.8% and an overall accuracy of 98.7%, establishing a robust benchmark for crack delineation in complex concrete backgrounds; (3) the development of a user-accessible Flask web application that integrates model inference, visualization, and feature extraction into a single, deployable platform; and (4) a comprehensive quantitative analysis using both segmentation (IoU, Dice) and classification (Precision, Recall, F1-Score) metrics, complemented by robustness checks, to provide a holistic evaluation of model performance and generalizability.</p>
<p>The practical value of the system lies in its dual capability for high-accuracy detection and immediate, quantifiable analysis, delivering metrics such as crack width and area through an interface optimized for standard computational resources. This makes the tool both technically sophisticated and operationally practical for engineers and inspectors.</p>
<p>Future work will focus on extending the system to 3D defect quantification by integrating photogrammetry and LiDAR data, enhancing robustness under real-world conditions such as varying lighting and occlusion, expanding detection to a wider range of defect types, and optimizing the framework for real-time, edge-based deployment on-site.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="sec30">
<title>Data availability statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found in the article/supplementary material.</p>
</sec>
<sec sec-type="author-contributions" id="sec31">
<title>Author contributions</title>
<p>HMa: Writing &#x2013; review &#x0026; editing, Visualization, Writing &#x2013; original draft, Conceptualization, Formal Analysis, Methodology, Data curation. JA: Writing &#x2013; review &#x0026; editing, Formal analysis, Validation. HMo: Methodology, Supervision, Writing &#x2013; review &#x0026; editing, Project administration, Validation, Writing &#x2013; original draft.</p>
</sec>
<ack>
<title>Acknowledgments</title>
<p>We express our gratitude to the construction safety inspectors who participated in the qualitative evaluation and provided essential feedback. We also acknowledge the contributors to the public datasets used in this research.</p>
</ack>
<sec sec-type="COI-statement" id="sec32">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="sec33">
<title>Generative AI statement</title>
<p>The author(s) declared that Generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="sec34">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><mixed-citation publication-type="book"><collab id="coll1">ACI Committee 224</collab> (<year>2001</year>). <source>ACI 224R 01: Control of cracking in concrete structures</source>: <publisher-name>American Concrete Institute</publisher-name>.</mixed-citation></ref>
<ref id="ref2"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ahila Priyadharshini</surname><given-names>R.</given-names></name> <name><surname>Arivazhagan</surname><given-names>S.</given-names></name> <name><surname>Arun</surname><given-names>M.</given-names></name></person-group> (<year>2023</year>). <article-title>Crack recognition on concrete structures based on machine crafted and handcrafted features</article-title>. <source>Expert Syst. Appl.</source> <volume>228</volume>:<fpage>120447</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.eswa.2023.120447</pub-id></mixed-citation></ref>
<ref id="ref3"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Al Biajawi</surname><given-names>M. I.</given-names></name> <name><surname>Embong</surname><given-names>R.</given-names></name> <name><surname>Muthusamy</surname><given-names>K.</given-names></name> <name><surname>Ismail</surname><given-names>N.</given-names></name> <name><surname>Johari</surname><given-names>I.</given-names></name></person-group> (<year>2023</year>). <article-title>Assessing the performance of concrete made with recycled latex gloves and silicone catheter using ultrasonic pulse velocity</article-title>. <source>Mater Today Proc</source> <volume>109</volume>:<fpage>S2214785323037173</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.matpr.2023.06.317</pub-id></mixed-citation></ref>
<ref id="ref4"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ali</surname><given-names>L.</given-names></name> <name><surname>AlJassmi</surname><given-names>H.</given-names></name> <name><surname>Swavaf</surname><given-names>M.</given-names></name> <name><surname>Khan</surname><given-names>W.</given-names></name> <name><surname>Alnajjar</surname><given-names>F.</given-names></name></person-group> (<year>2024</year>). <article-title>RS-net: residual sharp U-net architecture for pavement crack segmentation and severity assessment</article-title>. <source>J. Big Data</source> <volume>11</volume>:<fpage>116</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s40537-024-00981-y</pub-id></mixed-citation></ref>
<ref id="ref5"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Alsharqawi</surname><given-names>M.</given-names></name> <name><surname>Dawood</surname><given-names>T.</given-names></name> <name><surname>Abdelkhalek</surname><given-names>S.</given-names></name> <name><surname>Abouhamad</surname><given-names>M.</given-names></name> <name><surname>Zayed</surname><given-names>T.</given-names></name></person-group> (<year>2022</year>). <article-title>Condition assessment of concrete-made structures using ground penetrating radar</article-title>. <source>Autom. Constr.</source> <volume>144</volume>:<fpage>104627</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.autcon.2022.104627</pub-id></mixed-citation></ref>
<ref id="ref6"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Amiriebrahimabadi</surname><given-names>M.</given-names></name> <name><surname>Rouhi</surname><given-names>Z.</given-names></name> <name><surname>Mansouri</surname><given-names>N.</given-names></name></person-group> (<year>2024</year>). <article-title>A comprehensive survey of multi-level thresholding segmentation methods for image processing</article-title>. <source>Arch. Comput. Methods Eng.</source> <volume>31</volume>, <fpage>3647</fpage>&#x2013;<lpage>3697</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11831-024-10093-8</pub-id></mixed-citation></ref>
<ref id="ref7"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Anusha</surname><given-names>N.</given-names></name> <name><surname>Anbarasi</surname><given-names>L. J.</given-names></name></person-group> (<year>2025</year>). <article-title>Crackdetection in structural images using a hybridSwin transformer and enhanced features representation block</article-title>. <source>Front. Artif. Intell.</source> <volume>8</volume>:<fpage>1655091</fpage>. doi: <pub-id pub-id-type="doi">10.3389/frai.2025.1655091</pub-id>, <pub-id pub-id-type="pmid">41409739</pub-id></mixed-citation></ref>
<ref id="ref8"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Bishop</surname><given-names>C. M.</given-names></name></person-group> (<year>2006</year>). <source>Pattern recognition and machine learning</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>Springer</publisher-name>.</mixed-citation></ref>
<ref id="ref9"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bussa</surname><given-names>S. K.</given-names></name> <name><surname>Boppana</surname><given-names>N. K.</given-names></name></person-group> (<year>2025</year>). <article-title>Enhanced ResNet-50 deep learning algorithm for classification of crack images in RCC structures</article-title>. <source>Asian J. Civ. Eng.</source> <volume>26</volume>, <fpage>3773</fpage>&#x2013;<lpage>3784</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s42107-025-01396-7</pub-id></mixed-citation></ref>
<ref id="ref10"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Candelaria</surname><given-names>M.a. D. E.</given-names></name> <name><surname>Kee</surname><given-names>S.-H.</given-names></name></person-group> (<year>2023</year>). <article-title>Evaluation of thermal damages of concrete subjected to high temperatures using recurrent neural networks for ultrasonic pulse waves</article-title>. <source>Constr. Build. Mater.</source> <volume>407</volume>:<fpage>133416</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.conbuildmat.2023.133416</pub-id></mixed-citation></ref>
<ref id="ref12"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chang</surname><given-names>C.-C.</given-names></name> <name><surname>Yu</surname><given-names>C.-P.</given-names></name> <name><surname>Lin</surname><given-names>Y.</given-names></name></person-group> (<year>2019</year>). <article-title>Distinction between crack echoes and rebar echoes based on Morlet wavelet transform of impact echo signals</article-title>. <source>NDT E Int.</source> <volume>108</volume>:<fpage>102169</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ndteint.2019.102169</pub-id></mixed-citation></ref>
<ref id="ref11"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cha</surname><given-names>Y.-J.</given-names></name> <name><surname>Choi</surname><given-names>W.</given-names></name> <name><surname>B&#x00FC;y&#x00FC;k&#x00F6;zt&#x00FC;rk</surname><given-names>O.</given-names></name></person-group> (<year>2017</year>). <article-title>Deep learning-based crack damage detection using convolutional neural networks</article-title>. <source>Comput. Aided Civ. Inf. Eng.</source> <volume>32</volume>, <fpage>361</fpage>&#x2013;<lpage>378</lpage>. doi: <pub-id pub-id-type="doi">10.1111/mice.12263</pub-id></mixed-citation></ref>
<ref id="ref14"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cheng</surname><given-names>C. C.</given-names></name></person-group> (<year>1997</year>). <article-title>The impact-echo response of concrete containing steel reinforcing bars, cracks around bars, and delaminations</article-title>. <source>NDT E Int.</source> <volume>30</volume>:<fpage>259</fpage>. doi: <pub-id pub-id-type="doi">10.1016/S0963-8695(97)88947-5</pub-id></mixed-citation></ref>
<ref id="ref13"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname><given-names>L.</given-names></name> <name><surname>Chen</surname><given-names>W.</given-names></name> <name><surname>Wang</surname><given-names>L.</given-names></name> <name><surname>Zhai</surname><given-names>C.</given-names></name> <name><surname>Hu</surname><given-names>X.</given-names></name> <name><surname>Sun</surname><given-names>L.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Convolutional neural networks (CNNs)-based multi-category damage detection and recognition of high-speed rail (HSR) reinforced concrete (RC) bridges using test images</article-title>. <source>Eng. Struct.</source> <volume>276</volume>:<fpage>115306</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.engstruct.2022.115306</pub-id></mixed-citation></ref>
<ref id="ref15"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cui</surname><given-names>J.</given-names></name> <name><surname>Tao</surname><given-names>N.</given-names></name> <name><surname>Omer</surname><given-names>A. M.</given-names></name> <name><surname>Zhang</surname><given-names>C.</given-names></name> <name><surname>Zhang</surname><given-names>Q.</given-names></name> <name><surname>Ma</surname><given-names>Y.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Attention-enhanced U-net for automatic crack detection in ancient murals using optical pulsed thermography</article-title>. <source>J. Cult. Herit.</source> <volume>70</volume>, <fpage>111</fpage>&#x2013;<lpage>119</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.culher.2024.08.015</pub-id></mixed-citation></ref>
<ref id="ref9001"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Diakogiannis</surname><given-names>F. I.</given-names></name> <name><surname>Waldner</surname><given-names>F.</given-names></name> <name><surname>Caccetta</surname><given-names>P.</given-names></name> <name><surname>Wu</surname><given-names>C.</given-names></name></person-group> (<year>2020</year>). <article-title>ResUNet-a: A deep learning framework for semantic segmentation of remotely sensed data</article-title>. <source>ISPRS Journal of Photogrammetry and Remote Sensing</source>, <volume>162</volume>, <fpage>94</fpage>&#x2013;<lpage>114</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.isprsjprs.2020.01.013</pub-id></mixed-citation></ref>
<ref id="ref16"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ding</surname><given-names>W.</given-names></name> <name><surname>Yang</surname><given-names>H.</given-names></name> <name><surname>Yu</surname><given-names>K.</given-names></name> <name><surname>Shu</surname><given-names>J.</given-names></name></person-group> (<year>2023</year>). <article-title>Crack detection and quantification for concrete structures using UAV and transformer</article-title>. <source>Autom. Constr.</source> <volume>152</volume>:<fpage>104929</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.autcon.2023.104929</pub-id></mixed-citation></ref>
<ref id="ref17"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Elhariri</surname><given-names>E.</given-names></name> <name><surname>Meshesha</surname><given-names>K.</given-names></name> <name><surname>Dandache</surname><given-names>A.</given-names></name> <name><surname>Yacoub</surname><given-names>S.</given-names></name> <name><surname>El Gohary</surname><given-names>M.</given-names></name></person-group> (<year>2022</year>). <article-title>Automated pixel level deep crack segmentation on historical surfaces using U net models</article-title>. <source>Algorithms</source> <volume>15</volume>:<fpage>281</fpage>. doi: <pub-id pub-id-type="doi">10.3390/a15080281</pub-id></mixed-citation></ref>
<ref id="ref18"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fan</surname><given-names>J.</given-names></name> <name><surname>Ma</surname><given-names>T.</given-names></name> <name><surname>Zhu</surname><given-names>Y.</given-names></name> <name><surname>Zhang</surname><given-names>Y.</given-names></name></person-group> (<year>2023</year>). <article-title>Ground penetrating radar detection of buried depth of pavement internal crack in asphalt surface: a study based on multiphase heterogeneous model</article-title>. <source>Measurement</source> <volume>221</volume>:<fpage>113531</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.measurement.2023.113531</pub-id></mixed-citation></ref>
<ref id="ref19"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Garg</surname><given-names>B.</given-names></name> <name><surname>Lohani</surname><given-names>D. C.</given-names></name> <name><surname>Rana</surname><given-names>B.</given-names></name></person-group> (<year>2025</year>). <article-title>A comparative study of U-net-based segmentation architectures for crack detection</article-title>. <source>Proc. Comput. Sci</source> <volume>261</volume>, <fpage>588</fpage>&#x2013;<lpage>597</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.procs.2025.0</pub-id></mixed-citation></ref>
<ref id="ref20"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>G&#x00E9;ron</surname><given-names>A.</given-names></name></person-group> (<year>2019</year>). <source>Hands-on machine learning with Scikit-learn, Keras, and TensorFlow</source>. <edition>2nd</edition> Edn. <publisher-loc>Sebastopol, CA</publisher-loc>: <publisher-name>O&#x2019;Reilly Media</publisher-name>.</mixed-citation></ref>
<ref id="ref21"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Gonzalez</surname><given-names>R. C.</given-names></name> <name><surname>Woods</surname><given-names>R. E.</given-names></name></person-group> (<year>2007</year>). <source>Digital Image Processing</source>. <edition>3rd</edition> Edn. <publisher-loc>Upper Saddle River, NJ, USA</publisher-loc>: <publisher-name>Pearson/Prentice Hall</publisher-name>.</mixed-citation></ref>
<ref id="ref22"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hac&#x0131;efendio&#x011F;lu</surname><given-names>O.</given-names></name> <name><surname>Sar&#x0131;</surname><given-names>S.</given-names></name> <name><surname>Altun</surname><given-names>H.</given-names></name></person-group> (<year>2023</year>). <article-title>Deep learning based automated detection of cracks in historical masonry structures</article-title>. <source>Buildings</source> <volume>13</volume>:<fpage>3113</fpage>. doi: <pub-id pub-id-type="doi">10.3390/buildings13123113</pub-id></mixed-citation></ref>
<ref id="ref23"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Hamidi</surname><given-names>S.</given-names></name> <name><surname>Soleymani</surname><given-names>A.</given-names></name> <name><surname>Rashidi</surname><given-names>M.</given-names></name></person-group> (<year>2025</year>). &#x201C;<article-title>A review of structural health monitoring and damage detection techniques in frames and bridges</article-title>&#x201D; in <source>Damage detection and structural health monitoring of concrete and masonry structures (springer tracts in civil engineering)</source>. eds. <person-group person-group-type="editor"><name><surname>Jahangir</surname><given-names>H.</given-names></name> <name><surname>Arora</surname><given-names>H. C.</given-names></name> <name><surname>dos Santos</surname><given-names>J. V. A.</given-names></name> <name><surname>Kumar</surname><given-names>K.</given-names></name> <name><surname>Kumar</surname><given-names>A.</given-names></name> <name><surname>Kapoor</surname><given-names>N. R.</given-names></name></person-group> (<publisher-loc>Singapore</publisher-loc>: <publisher-name>Springer</publisher-name>).</mixed-citation></ref>
<ref id="ref9002"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hayes-Rich</surname><given-names>E.</given-names></name> <name><surname>Levy</surname><given-names>J.</given-names></name> <name><surname>Hayes-Rich</surname><given-names>N.</given-names></name> <name><surname>Lightfoot</surname><given-names>D.</given-names></name> <name><surname>Gauthier</surname><given-names>Y.</given-names></name></person-group> (<year>2023</year>). <article-title>Searching for hidden waters: The effectiveness of remote sensing in assessing the distribution and status of a traditional, earthen irrigation system (khettara) in Morocco</article-title>. <source>Journal of Archaeological Science: Reports</source>, <volume>51</volume>, <fpage>104175</fpage>.  doi: <pub-id pub-id-type="doi">10.1016/j.jasrep.2023.104175</pub-id></mixed-citation></ref>
<ref id="ref24"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Heaton</surname><given-names>J.</given-names></name> <name><surname>Goodfellow</surname><given-names>I.</given-names></name> <name><surname>Bengio</surname><given-names>Y.</given-names></name> <name><surname>Courville</surname><given-names>A.</given-names></name></person-group> (<year>2018</year>). <article-title>Deep learning</article-title>. <source>Genet. Program Evolvable Mach.</source> <volume>19</volume>, <fpage>305</fpage>&#x2013;<lpage>307</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10710-017-9314-z</pub-id></mixed-citation></ref>
<ref id="ref25"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Iraniparast</surname><given-names>M.</given-names></name> <name><surname>Ranjbar</surname><given-names>S.</given-names></name> <name><surname>Rahai</surname><given-names>M.</given-names></name> <name><surname>Moghadas Nejad</surname><given-names>F.</given-names></name></person-group> (<year>2023</year>). <article-title>Surface concrete cracks detection and segmentation using transfer learning and multi-resolution image processing</article-title>. <source>Structure</source> <volume>54</volume>, <fpage>386</fpage>&#x2013;<lpage>398</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.istruc.2023.05.062</pub-id></mixed-citation></ref>
<ref id="ref26"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Islam</surname><given-names>M. M. M.</given-names></name> <name><surname>Kim</surname><given-names>J.-M.</given-names></name></person-group> (<year>2019</year>). <article-title>Vision-based autonomous crack detection of concrete structures using a fully convolutional encoder&#x2013;decoder network</article-title>. <source>Sensors</source> <volume>19</volume>:<fpage>4251</fpage>. doi: <pub-id pub-id-type="doi">10.3390/s19194251</pub-id>, <pub-id pub-id-type="pmid">31574963</pub-id></mixed-citation></ref>
<ref id="ref27"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Kapoor</surname><given-names>N. R.</given-names></name> <name><surname>Kumar</surname><given-names>A.</given-names></name> <name><surname>Kumar</surname><given-names>A.</given-names></name> <name><surname>Kumar</surname><given-names>A.</given-names></name> <name><surname>Arora</surname><given-names>H. C.</given-names></name></person-group> (<year>2024</year>). &#x201C;<article-title>Artificial intelligence in civil engineering: an immersive view</article-title>&#x201D; in <source>Artificial intelligence applications for sustainable construction. Woodhead publishing series in civil and structural engineering</source> Ed. <person-group person-group-type="author"><name><surname>Noor</surname><given-names>Z.</given-names></name> <name><surname>Tamim</surname><given-names>A. K.</given-names></name> <name><surname>Chowdary</surname><given-names>K. P.</given-names></name></person-group> (<publisher-loc>Duxford, Cambridge, United Kingdom</publisher-loc>: <publisher-name>Woodhead Publishing</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>74</lpage>.</mixed-citation></ref>
<ref id="ref28"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Koch</surname><given-names>C.</given-names></name> <name><surname>Georgieva</surname><given-names>K.</given-names></name> <name><surname>Kasireddy</surname><given-names>V.</given-names></name> <name><surname>Akinci</surname><given-names>B.</given-names></name> <name><surname>Fieguth</surname><given-names>P.</given-names></name></person-group> (<year>2015</year>). <article-title>A review on computer vision based defect detection and condition assessment of concrete and asphalt civil infrastructure</article-title>. <source>Adv. Eng. Inform.</source> <volume>29</volume>, <fpage>196</fpage>&#x2013;<lpage>210</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.aei.2015.01.008</pub-id></mixed-citation></ref>
<ref id="ref29"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Kontoni</surname><given-names>D.-P. N.</given-names></name> <name><surname>Kumar</surname><given-names>A.</given-names></name> <name><surname>Arora</surname><given-names>H. C.</given-names></name> <name><surname>Kapoor</surname><given-names>N. R.</given-names></name></person-group> (<year>2023</year>). &#x201C;<article-title>Damage detection in reinforced concrete structures using advanced automatic systems</article-title>&#x201D; in <source>Automation in construction toward resilience</source> Ed. <person-group person-group-type="author"><name><surname>Erica</surname><given-names>F.</given-names></name> <name><surname>Madasamy</surname><given-names>A</given-names></name> <name><surname>Ananth</surname><given-names>R.</given-names></name></person-group> (<publisher-loc>Boca Raton, FL, USA</publisher-loc>: <publisher-name>CRC Press</publisher-name>).</mixed-citation></ref>
<ref id="ref30"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname><given-names>H.</given-names></name> <name><surname>Zhang</surname><given-names>H.</given-names></name> <name><surname>Zhu</surname><given-names>H.</given-names></name> <name><surname>Gao</surname><given-names>K.</given-names></name> <name><surname>Liang</surname><given-names>H.</given-names></name> <name><surname>Yang</surname><given-names>J.</given-names></name></person-group> (<year>2024</year>). <article-title>Automatic crack detection on concrete and asphalt surfaces using semantic segmentation network with hierarchical transformer</article-title>. <source>Eng. Struct.</source> <volume>307</volume>:<fpage>117903</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.engstruct.2024.117903</pub-id></mixed-citation></ref>
<ref id="ref31"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mazni</surname><given-names>M.</given-names></name> <name><surname>Husain</surname><given-names>A. R.</given-names></name> <name><surname>Shapiai</surname><given-names>M. I.</given-names></name> <name><surname>Ibrahim</surname><given-names>I. S.</given-names></name> <name><surname>Anggara</surname><given-names>D. W.</given-names></name> <name><surname>Zulkifli</surname><given-names>R.</given-names></name></person-group> (<year>2024</year>). <article-title>An investigation into real-time surface crack classification and measurement for structural health monitoring using transfer learning convolutional neural networks and Otsu method</article-title>. <source>Alex. Eng. J.</source> <volume>92</volume>:<fpage>102052</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.aej.2024.02.052</pub-id></mixed-citation></ref>
<ref id="ref32"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pervaiz</surname><given-names>S.</given-names></name> <name><surname>Cai</surname><given-names>C.</given-names></name> <name><surname>Javed</surname><given-names>R.</given-names></name> <name><surname>Liu</surname><given-names>S.</given-names></name> <name><surname>Sohel</surname><given-names>F.</given-names></name> <name><surname>Hassan</surname><given-names>S. G.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Enhanced concrete crack segmentation with MSMC-U-net: integrating multiscale features and contextual analysis for infrastructure safety</article-title>. <source>Expert Syst. Appl.</source> <volume>293</volume>:<fpage>128683</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.eswa.2025.128683</pub-id></mixed-citation></ref>
<ref id="ref33"><mixed-citation publication-type="confproc"><person-group person-group-type="author"><name><surname>Qayyum</surname><given-names>W.</given-names></name> <name><surname>Ahmad</surname><given-names>A.</given-names></name> <name><surname>Chairman</surname><given-names>N.</given-names></name> <name><surname>Aljuhni</surname><given-names>A.</given-names></name></person-group>, <article-title>Evaluation of GoogLeNet, MobileNetV2, and InceptionV3 pre-trained convolutional neural networks for detection and classification of concrete crack images</article-title>, <conf-name>1st International Conference on Advances in Civil and Environmental Engineering, University of Engineering and Technology Taxila, Pakistan</conf-name>, (<year>2022</year>), <fpage>2</fpage>&#x2013;<lpage>3</lpage>.</mixed-citation></ref>
<ref id="ref34"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Qayyum</surname><given-names>W.</given-names></name> <name><surname>Ehtisham</surname><given-names>R.</given-names></name> <name><surname>Bahrami</surname><given-names>A.</given-names></name> <name><surname>Mir</surname><given-names>J.</given-names></name> <name><surname>Khan</surname><given-names>Q. U. Z.</given-names></name> <name><surname>Ahmad</surname><given-names>A.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Predicting characteristics of cracks in concrete structure using convolutional neural network and image processing</article-title>. <source>Front. Mater.</source> <volume>10</volume>:<fpage>1210543</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fmats.2023.1210543</pub-id></mixed-citation></ref>
<ref id="ref35"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Qiu</surname><given-names>Q.</given-names></name> <name><surname>Lau</surname><given-names>D.</given-names></name></person-group> (<year>2023</year>). <article-title>Real-time detection of cracks in tiled sidewalks using YOLO-based method applied to unmanned aerial vehicle (UAV) images</article-title>. <source>Autom. Constr.</source> <volume>147</volume>:<fpage>104745</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.autcon.2023.104745</pub-id></mixed-citation></ref>
<ref id="ref36"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ragnoli</surname><given-names>A.</given-names></name> <name><surname>De Blasiis</surname><given-names>M. R.</given-names></name> <name><surname>Benedetto</surname><given-names>A. D.</given-names></name></person-group> (<year>2018</year>). <article-title>Pavement distress detection methods: a review</article-title>. <source>Infrastructures</source> <volume>3</volume>, <fpage>1</fpage>&#x2013;<lpage>19</lpage>. doi: <pub-id pub-id-type="doi">10.3390/infrastructures3040058</pub-id></mixed-citation></ref>
<ref id="ref37"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rahmati</surname><given-names>M.</given-names></name> <name><surname>Toufigh</surname><given-names>V.</given-names></name> <name><surname>Keyvan</surname><given-names>K.</given-names></name></person-group> (<year>2023</year>). <article-title>Monitoring of crack healing in geopolymer concrete using a nonlinear ultrasound approach in phase-space domain</article-title>. <source>Ultrasonics</source> <volume>134</volume>:<fpage>107095</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ultras.2023.107095</pub-id>, <pub-id pub-id-type="pmid">37385144</pub-id></mixed-citation></ref>
<ref id="ref38"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Raj</surname><given-names>N.</given-names></name> <name><surname>Kapoor</surname><given-names>N.</given-names></name> <name><surname>Kumar</surname><given-names>A.</given-names></name> <name><surname>Chandra</surname><given-names>H.</given-names></name> <name><surname>Arora</surname><given-names>A.</given-names></name> <name><surname>Kumar</surname><given-names>A.</given-names></name></person-group> (<year>2024</year>). &#x201C;<article-title>Structural health monitoring of existing building structures for creating green smart cities using deep learning</article-title>&#x201D; in <source>Structural health monitoring of existing building structures for creating green smart cities using deep learning</source> Ed. <person-group person-group-type="author"><name><surname>Tyagi</surname><given-names>A. T.</given-names></name> <name><surname>Abraham</surname><given-names>A.</given-names></name></person-group> (<publisher-loc>Boca Raton, FL</publisher-loc>: <publisher-name>CRC Press/Taylor &#x0026; Francis</publisher-name>), <fpage>315</fpage>&#x2013;<lpage>345</lpage>.</mixed-citation></ref>
<ref id="ref39"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rasol</surname><given-names>M. A.</given-names></name> <name><surname>P&#x00E9;rez-Gracia</surname><given-names>V.</given-names></name> <name><surname>Solla</surname><given-names>M.</given-names></name> <name><surname>Pais</surname><given-names>J. C.</given-names></name> <name><surname>Fernandes</surname><given-names>F. M.</given-names></name> <name><surname>Santos</surname><given-names>C.</given-names></name></person-group> (<year>2020</year>). <article-title>An experimental and numerical approach to combine ground penetrating radar and computational modeling for the identification of early cracking in cement concrete pavements</article-title>. <source>NDT E Int.</source> <volume>115</volume>:<fpage>102293</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ndteint.2020.102293</pub-id></mixed-citation></ref>
<ref id="ref9003"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Ronneberger</surname><given-names>O.</given-names></name> <name><surname>Fischer</surname><given-names>P.</given-names></name> <name><surname>Brox</surname><given-names>T.</given-names></name></person-group> (<year>2015</year>). <article-title>U-Net: Convolutional networks for biomedical image segmentation. arXiv preprint arXiv:1505.04597</article-title>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1505.04597</pub-id></mixed-citation></ref>
<ref id="ref40"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Russell</surname><given-names>B. C.</given-names></name> <name><surname>Torralba</surname><given-names>A.</given-names></name> <name><surname>Murphy</surname><given-names>K. P.</given-names></name> <name><surname>Freeman</surname><given-names>W. T.</given-names></name></person-group> (<year>2008</year>). <article-title>Labelme: a database and web-based tool for image annotation</article-title>. <source>Int. J. Comput. Vis.</source> <volume>77</volume>, <fpage>157</fpage>&#x2013;<lpage>173</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11263-007-0090-8</pub-id></mixed-citation></ref>
<ref id="ref41"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Safiuddin</surname><given-names>M.</given-names></name> <name><surname>Kaish</surname><given-names>A. A.</given-names></name> <name><surname>Woon</surname><given-names>C. O.</given-names></name> <name><surname>Raman</surname><given-names>S. N.</given-names></name></person-group> (<year>2018</year>). <article-title>Early-age cracking in concrete: causes, consequences, remedial measures, and recommendations</article-title>. <source>Appl. Sci.</source> <volume>8</volume>:<fpage>1730</fpage>. doi: <pub-id pub-id-type="doi">10.3390/app8101730</pub-id></mixed-citation></ref>
<ref id="ref42"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shao</surname><given-names>H.</given-names></name> <name><surname>Wang</surname><given-names>S.</given-names></name></person-group> (<year>2023</year>). <article-title>Deep classification with linearity-enhanced logits to Softmax function</article-title>. <source>Entropy</source> <volume>25</volume>:<fpage>727</fpage>. doi: <pub-id pub-id-type="doi">10.3390/e25050727</pub-id>, <pub-id pub-id-type="pmid">37238482</pub-id></mixed-citation></ref>
<ref id="ref43"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>S&#x00F6;yleyman</surname><given-names>T.</given-names></name> <name><surname>&#x00C7;elebi</surname><given-names>M. E.</given-names></name> <name><surname>&#x00D6;zyer</surname><given-names>T.</given-names></name></person-group> (<year>2022</year>). <article-title>Bibliometric analysis and review of deep learning based crack detection literature 2010&#x2013;2022</article-title>. <source>Buildings</source> <volume>12</volume>:<fpage>432</fpage>. doi: <pub-id pub-id-type="doi">10.3390/buildings12040432</pub-id></mixed-citation></ref>
<ref id="ref44"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tran</surname><given-names>T. V.</given-names></name> <name><surname>Nguyen-Xuan</surname><given-names>H.</given-names></name> <name><surname>Zhuang</surname><given-names>X.</given-names></name></person-group> (<year>2024</year>). <article-title>Investigation of crack segmentation and fast evaluation of crack propagation, based on deep learning</article-title>. <source>Front. Struct. Civ. Eng.</source> <volume>18</volume>, <fpage>516</fpage>&#x2013;<lpage>535</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11709-024-1040-z</pub-id></mixed-citation></ref>
<ref id="ref45"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Visconti</surname><given-names>F.</given-names></name> <name><surname>Capozzi</surname><given-names>R.</given-names></name></person-group> (<year>2014</year>). <article-title>Water and earth: the Sahrij Labgar basin in Marrakech</article-title>. <source>ESEMPI DI ARCHITETTURA Int. J. Archit. Engin.</source>, <volume>1</volume>, <fpage>15</fpage>&#x2013;<lpage>22</lpage>. doi: <pub-id pub-id-type="doi">10.4399/97888548734142</pub-id></mixed-citation></ref>
<ref id="ref46"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Wada</surname><given-names>K.</given-names></name></person-group> (<year>2019</year>). LabelMe [Software]. GitHub. Available online at: <ext-link xlink:href="https://github.com/wkentaro" ext-link-type="uri">https://github.com/wkentaro</ext-link> (accessed March 15, 2021)</mixed-citation></ref>
<ref id="ref47"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yalew</surname><given-names>T. T.</given-names></name> <name><surname>Kim</surname><given-names>K.-S.</given-names></name></person-group> (<year>2023</year>). <article-title>Automatic quantification of concrete cracks via multistage image filtration and trajectory-based local binarization</article-title>. <source>J. Build. Eng.</source> <volume>77</volume>:<fpage>107391</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jobe.2023.107391</pub-id></mixed-citation></ref>
<ref id="ref48"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname><given-names>G.</given-names></name> <name><surname>Liu</surname><given-names>K.</given-names></name> <name><surname>Zhang</surname><given-names>J.</given-names></name> <name><surname>Zhao</surname><given-names>B.</given-names></name> <name><surname>Zhao</surname><given-names>Z.</given-names></name> <name><surname>Chen</surname><given-names>X.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Datasets and processing methods for boosting visual inspection of civil infrastructure: a comprehensive review and algorithm comparison for crack classification, segmentation, and detection</article-title>. <source>Constr. Build. Mater.</source> <volume>356</volume>:<fpage>129226</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.conbuildmat.2022.129226</pub-id></mixed-citation></ref>
<ref id="ref49"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zou</surname><given-names>Q.</given-names></name> <name><surname>Zhang</surname><given-names>Z.</given-names></name> <name><surname>Li</surname><given-names>Q.</given-names></name> <name><surname>Qi</surname><given-names>X.</given-names></name> <name><surname>Wang</surname><given-names>Q.</given-names></name> <name><surname>Wang</surname><given-names>S.</given-names></name></person-group> (<year>2019</year>). <article-title>DeepCrack: learning hierarchical convolutional features for crack detection</article-title>. <source>IEEE Trans. Image Process.</source> <volume>28</volume>, <fpage>1498</fpage>&#x2013;<lpage>1512</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TIP.2018.2878966</pub-id>, <pub-id pub-id-type="pmid">30387731</pub-id></mixed-citation></ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0002">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1274932/overview">Reza Soleimanpour</ext-link>, Australian University - Kuwait, Kuwait</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0003">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2133294/overview">Nishant Raj Kapoor</ext-link>, Academy of Scientific and Innovative Research (AcSIR), India</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3344462/overview">Shima Pakniat</ext-link>, University of Memphis, United States</p>
</fn>
</fn-group>
<fn-group>
<fn id="fn0001"><label>1</label><p>The khettara (also known as qanat) is a traditional, earthen water management system (<xref ref-type="bibr" rid="ref9002">Hayes-Rich et al., 2023</xref>).</p></fn>
</fn-group>
<fn-group>
<fn fn-type="abbr" id="abbr1">
<label>Abbreviations:</label>
<p>AI, Artificial Intelligence; CNN, Convolutional Neural Network; U-Net, U-shaped Network for image segmentation; CFD, CrackForest Dataset; GMM, Gaussian Mixture Model; RGB, Red, Green, Blue color channels; SE, Squeeze-and-Excitation; NN, Neural Network; ReLU, Rectified Linear Unit; TanH, Hyperbolic Tangent; SeLU, Scaled Exponential Linear Unit; MAT, Medial Axis Transform.</p>
</fn>
</fn-group>
</back>
</article>