<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article article-type="research-article" dtd-version="1.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mater.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Materials</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mater.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2296-8016</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1648653</article-id>
<article-id pub-id-type="doi">10.3389/fmats.2025.1648653</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Deep learning-based image classification for microstructural analysis in computational materials science</article-title>
<alt-title alt-title-type="left-running-head">Liu et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fmats.2025.1648653">10.3389/fmats.2025.1648653</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Haiyan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Zhu</surname>
<given-names>Penghua</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Tan</surname>
<given-names>Chenyu</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3103514"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="writing &#x2013; review and editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">writing &#x2013; review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Funding acquisition</role>
</contrib>
</contrib-group>
<aff id="aff1">
<label>1</label>
<institution>North China Institute of Aerospace Engineering</institution>, <city>Langfang</city>, <state>Hebei</state>, <country country="CN">China</country>
</aff>
<aff id="aff2">
<label>2</label>
<institution>Cangzhou Normal University</institution>, <city>Cangzhou</city>, <state>Hebei</state>, <country country="CN">China</country>
</aff>
<aff id="aff3">
<label>3</label>
<institution>Changchun University of Technology</institution>, <city>Changchun</city>, <state>Jilin</state>, <country country="CN">China</country>
</aff>
<author-notes>
<corresp id="c001">
<label>&#x2a;</label>Correspondence: Penghua Zhu, <email xlink:href="hazerarogi0@hotmail.com">hazerarogi0@hotmail.com</email>
</corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-01-20">
<day>20</day>
<month>01</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>12</volume>
<elocation-id>1648653</elocation-id>
<history>
<date date-type="received">
<day>17</day>
<month>06</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>23</day>
<month>10</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>27</day>
<month>10</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Liu, Zhu and Tan.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Liu, Zhu and Tan</copyright-holder>
<license>
<ali:license_ref start_date="2026-01-20">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Recently, the integration of deep learning techniques and computational materials science has catalyzed significant advances in the microstructural analysis of materials, particularly through the lens of multiscale, high-dimensional imaging data. However, conventional models often fall short in capturing the intricate topology and spatial variability that define realistic microstructural patterns, limiting their ability to inform material property predictions, inverse design, and structural synthesis.</p>
</sec>
<sec>
<title>Methods</title>
<p>To overcome these challenges, we introduce an innovative deep learning framework designed for microstructural image classification and representation learning, incorporating physical, geometric, and topological constraints directly into the training process. Our method, centered on the structured generative model MorphoTensor, introduces hierarchical tensorial embeddings that retain directionality, anisotropy, and spatial locality&#x2014;features crucial for realistic material modeling. We further incorporate a Topology-Aware Latent Refinement strategy, which couples persistent homology with differentiable approximations of Betti numbers to enforce topological consistency and augment microstructural diversity. Unlike existing data-driven pipelines, our framework seamlessly integrates statistical encoding, topologicalization, and latent manifold alignment within a unified architecture, ensuring robustness across diverse datasets including phase-field simulations and real microscopy data.</p>
</sec>
<sec>
<title>Results and Discussion</title>
<p>Empirical evaluations on benchmark and experimental datasets demonstrate that our method significantly outperforms standard convolutional and autoencoding baselines in accuracy, stability, and generalization. Moreover, our approach aligns closely with the ongoing efforts in the broader computational materials and mechanics communities to build interpretable, physically informed, and adaptable deep learning systems. These contributions illustrate the potential of structured deep generative modeling as a foundational tool for advancing intelligent microstructure analysis and design in materials informatics.</p>
</sec>
</abstract>
<kwd-group>
<kwd>microstructural analysis</kwd>
<kwd>deep generative models</kwd>
<kwd>topological learning</kwd>
<kwd>computational materials science</kwd>
<kwd>MorphoTensor</kwd>
</kwd-group>
<funding-group>
<funding-statement>The authors declare that financial support was received for the research and/or publication of this article. Research Projects of the 14th Five-Year Plan for Educational Science under the Hebei Provincial Department of Education: Research on the Construction of Industry-Education Integration Community in the Field of Aerospace Cybersecurity Testing (No. 2503105).</funding-statement>
</funding-group>
<counts>
<fig-count count="4"/>
<table-count count="8"/>
<equation-count count="25"/>
<ref-count count="49"/>
<page-count count="14"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Computational Materials Science</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>The rapid advancement of computational materials science has made it possible to simulate and analyze with unprecedented accuracy the microstructural features of materials (<xref ref-type="bibr" rid="B7">Chen C.-F. et al., 2021</xref>). However, accurately classifying these microstructures remains a challenging task due to the intricate patterns, varying scales, and diverse morphologies present in materials data (<xref ref-type="bibr" rid="B16">Hong et al., 2021</xref>). Traditional image analysis techniques struggle to generalize across different material systems, leading to inconsistent performance (<xref ref-type="bibr" rid="B27">Maur&#xed;cio et al., 2023</xref>). Therefore, there is a growing necessity for more robust and adaptive methods to interpret microstructural images. Deep learning, especially convolutional neural networks (CNNs), has emerged as a powerful solution for such tasks, not only enhancing classification accuracy but also enabling the discovery of subtle structural patterns that are difficult to identify through manual or conventional computational methods (<xref ref-type="bibr" rid="B41">Touvron et al., 2021</xref>). The integration of deep learning into microstructural analysis holds promise for accelerating materials discovery, optimizing material properties, and improving predictive modeling capabilities (<xref ref-type="bibr" rid="B43">Wang et al., 2022</xref>).</p>
<p>Initial studies approached microstructural image interpretation through classical computer vision techniques that emphasized low-level descriptors and algorithmic rules. These methods typically relied on predefined image processing operations such as edge detection, texture analysis, and morphological transformations (<xref ref-type="bibr" rid="B40">Tian et al., 2020</xref>). The extracted features were then used to construct visual representations that could be manually classified or interpreted by domain experts. Such techniques offered clear interpretability and were relatively straightforward to implement, making them well-suited for early investigations into structured or periodic microstructures (<xref ref-type="bibr" rid="B44">Yang et al., 2021</xref>). However, their effectiveness was largely constrained to idealized or synthetic datasets, where visual patterns exhibited strong regularity and minimal noise. In real-world materials, microstructures often display high variability in scale, orientation, and contrast, compounded by imaging artifacts and inter-sample heterogeneity (<xref ref-type="bibr" rid="B15">Hong et al., 2020</xref>). Classical descriptors, being low level and often linear in nature, lacked the expressiveness to model these complexities. Consequently, their generalization ability across different material systems, imaging modalities, or sample preparation methods was limited. In response to these shortcomings, the field began transitioning toward more adaptive and data-responsive frameworks. Researchers introduced semi-automated pipelines that combined classical feature extraction with rule-based decision trees or clustering algorithms, aiming to reduce the burden of manual annotation while improving consistency (<xref ref-type="bibr" rid="B38">Sun et al., 2022</xref>). These hybrid methods offered improved flexibility and some resilience to noise and structural diversity, but they still depended heavily on expert knowledge to define relevant features and threshold values. As the demand for scalable and generalizable microstructural analysis grew&#x2014;particularly in high-throughput materials discovery contexts&#x2014;it became clear that more robust, data-driven modeling approaches were needed to cope with the growing complexity and volume of materials imaging data (<xref ref-type="bibr" rid="B30">Rao et al., 2021</xref>).</p>
<p>Building on this need for adaptability, researchers began integrating statistical modeling and pattern recognition techniques that allowed systems to learn from annotated examples rather than relying solely on fixed rule sets (<xref ref-type="bibr" rid="B19">Kim et al., 2022</xref>). This marked a methodological shift toward supervised learning paradigms, where algorithms were trained to associate input features with known output labels based on curated microstructural datasets. Methods like support vector machines (SVM), random forests, k-nearest neighbors (k-NN), and principal component analysis (PCA) have gained broad usage in tasks involving classification, clustering, and dimensionality reduction (<xref ref-type="bibr" rid="B24">Mai et al., 2021</xref>). These models were typically coupled with engineered feature extraction pipelines involving texture descriptors, histogram statistics, frequency domain transforms, and geometric quantifiers of microstructural morphology (<xref ref-type="bibr" rid="B6">Bostanabad et al., 2018</xref>). The resulting hybrid frameworks improved both prediction accuracy and computational efficiency relative to early rule-based approaches, particularly for moderately sized datasets where manual labeling was feasible. Their utility was demonstrated in tasks such as grain boundary classification, phase segmentation, and defect detection in polycrystalline or composite materials (<xref ref-type="bibr" rid="B3">Azizi et al., 2021</xref>). Despite these advances, the performance of such models remained tightly bound to the quality and representativeness of the input features. Because feature design was largely manual and guided by domain heuristics, important structural cues&#x2014;especially those spanning multiple spatial scales or exhibiting irregular patterns&#x2014;were often missed or poorly encoded (<xref ref-type="bibr" rid="B10">DeCost and Holm, 2015</xref>). Furthermore, these models lacked the hierarchical representation learning capacity necessary to capture complex dependencies and interactions within heterogeneous microstructures (<xref ref-type="bibr" rid="B22">Li et al., 2020</xref>). As a result, their generalization capability across diverse material systems, imaging resolutions, or sample preparation techniques was limited (<xref ref-type="bibr" rid="B18">Kalidindi and De Graef, 2015</xref>). These shortcomings highlighted the need for more expressive, automated, and data-adaptive frameworks that could learn robust feature representations directly from raw or minimally processed image data (<xref ref-type="bibr" rid="B5">Bhojanapalli et al., 2021</xref>).</p>
<p>The evolution toward fully trainable, end-to-end systems marked a significant turning point with the adoption of deep learning models (<xref ref-type="bibr" rid="B45">Zhang et al., 2020</xref>). Convolutional neural networks (CNNs) and their extensions enabled direct learning from raw image data, bypassing the need for handcrafted features (<xref ref-type="bibr" rid="B34">Roy et al., 2022</xref>). These architectures proved especially powerful in capturing hierarchical spatial patterns and adapting across different imaging settings (<xref ref-type="bibr" rid="B49">Zhu et al., 2020</xref>). The availability of larger datasets and computational resources further amplified their impact, allowing for deeper and more expressive models (<xref ref-type="bibr" rid="B8">Chen L. et al., 2021</xref>). The application of transfer learning techniques and domain-specific fine-tuning expanded their reach into data-limited areas of materials research. While challenges such as interpretability and generalization across modalities persist, the integration of deep learning with physics-guided modeling and generative frameworks continues to push the boundaries of automated microstructural analysis (<xref ref-type="bibr" rid="B2">Ashtiani et al., 2021</xref>).</p>
<p>Based on the limitations of feature dependence in ML and the data hunger and interpretability concerns of deep learning, we propose a hybrid method that leverages domain-specific inductive biases and lightweight attention-enhanced CNNs to balance performance, efficiency, and generalizability. Our approach integrates a shallow attention module that dynamically focuses on microstructural regions of interest, combined with transfer learning from a domain-specific pretraining phase, allowing the model to effectively classify diverse microstructures with fewer labeled examples. By embedding prior knowledge and enhancing feature saliency, our model addresses both the data efficiency and explainability challenges commonly faced in deep learning-based material image analysis. Furthermore, we evaluate the method across multiple datasets covering various material types and imaging resolutions, demonstrating its robustness and practical value for accelerating microstructural classification in computational materials science.<list list-type="bullet">
<list-item>
<p>We introduce a novel attention-augmented CNN architecture tailored for microstructural image classification, enabling dynamic focus on relevant texture features.</p>
</list-item>
<list-item>
<p>The method exhibits high adaptability across different materials and imaging conditions, ensuring generalizability and efficiency in practical applications.</p>
</list-item>
<list-item>
<p>Experimental results show that our model achieves superior classification accuracy with reduced training data requirements, outperforming conventional CNN baselines.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Related work</title>
<sec id="s2-1">
<label>2.1</label>
<title>Convolutional networks for microstructures</title>
<p>The application of convolutional neural networks (CNNs) to microstructural image classification has emerged as a central theme in computational materials science. CNNs are well-suited to this domain due to their capability to capture hierarchical spatial features in image data, which is critical when analyzing complex textures and phase distributions inherent in materials microstructures (<xref ref-type="bibr" rid="B25">Masana et al., 2020</xref>). Early efforts focused on utilizing standard architectures such as AlexNet and VGGNet to distinguish between different grain morphologies, crystal orientations, and defect types. These models demonstrated strong performance on datasets of synthetic micrographs generated through phase-field simulations or molecular dynamics (<xref ref-type="bibr" rid="B33">Rezaei et al., 2025</xref>). Subsequent studies improved upon these methods by incorporating domain-specific augmentations and preprocessing techniques tailored to the nature of materials images. For, contrast normalization, orientation alignment, and noise filtering were often employed to standardize inputs and enhance feature salience (<xref ref-type="bibr" rid="B36">Sheykhmousa et al., 2020</xref>). Transfer learning from pretrained networks on natural image datasets such as HEDM has also been shown to significantly boost performance, particularly when labeled microstructural datasets are limited in size. Recent work has moved beyond mere classification to integrate CNNs with unsupervised learning and clustering to uncover latent structural patterns (<xref ref-type="bibr" rid="B26">Mascarenhas and Agarwal, 2021</xref>). Hybrid methods that combine CNN-based feature extractors with classical machine learning classifiers have proven effective in improving the generalizability of findings across diverse material systems. Such approaches underscore the adaptability of deep convolutional models in handling the heterogeneity and high dimensionality typical of microstructural data in materials informatics (<xref ref-type="bibr" rid="B31">Rezaei et al., 2024a</xref>).</p>
</sec>
<sec id="s2-2">
<label>2.2</label>
<title>Data augmentation and synthesis</title>
<p>Data scarcity remains a pressing challenge in the development of robust deep learning models for microstructural classification (<xref ref-type="bibr" rid="B32">Rezaei et al., 2024b</xref>). To address this, a variety of data augmentation and synthesis strategies have been employed. Basic augmentation techniques such as rotation, flipping, scaling, and elastic deformation are widely adopted to enhance model generalization and reduce overfitting. These transformations simulate the physical variability present in microstructural samples without altering their intrinsic material characteristics (<xref ref-type="bibr" rid="B46">Zhang et al., 2022</xref>). More sophisticated approaches leverage generative adversarial networks (GANs) to create realistic synthetic micrographs. GANs can learn the underlying distribution of microstructural images and generate high-fidelity examples that preserve critical statistical and textural properties. These synthetic datasets not only augment training corpora but also support model benchmarking under controlled conditions (<xref ref-type="bibr" rid="B9">Dai and Gao, 2021</xref>). Conditional GANs (cGANs) have further enabled the generation of class-specific microstructures, enhancing the diversity and utility of synthetic samples in supervised learning contexts. Another promising avenue involves the use of physics-informed simulations to generate labeled microstructural data. Phase-field modeling, Monte Carlo methods, and cellular automata simulations are commonly utilized to produce synthetic micrographs with known ground truths (<xref ref-type="bibr" rid="B39">Taori et al., 2020</xref>). These simulated datasets serve as a valuable source of training data, particularly for rare or experimentally inaccessible microstructural features. Integrating such data with real experimental micrographs through domain adaptation techniques can bridge the synthetic&#x2013;real gap and improve model transferability to practical applications (<xref ref-type="bibr" rid="B1">Alotaibi et al., 2025</xref>).</p>
</sec>
<sec id="s2-3">
<label>2.3</label>
<title>Interpretable and physics-guided models</title>
<p>The integration of interpretability and physical priors into deep learning frameworks represents crucial research (<xref ref-type="bibr" rid="B35">Ru et al., 2025</xref>). Traditional CNNs, while powerful, often function as black boxes, providing little insight into the underlying material phenomena driving classification outcomes (<xref ref-type="bibr" rid="B29">Peng et al., 2022</xref>). To mitigate this, recent work has explored explainable AI (XAI) techniques to visualize salient features and activation maps. Methods such as Grad-CAM, Layer-wise Relevance Propagation, and occlusion sensitivity analysis have been applied to reveal which microstructural regions contribute most significantly to model predictions (<xref ref-type="bibr" rid="B4">Bazi et al., 2021</xref>). Parallel efforts aim to embed physical constraints directly into model architectures or loss functions. Physics-guided neural networks (PGNNs) and theory-informed loss formulations ensure that predictions are not only accurate but also consistent with known physical laws and microstructural mechanics. These approaches improve trustworthiness and facilitate integration with existing computational materials models (<xref ref-type="bibr" rid="B48">Zheng et al., 2022</xref>). For instance, incorporating symmetry operations, crystallographic invariants, and defect energetics into the learning pipeline enables the network to learn more meaningful and generalizable representations. Another stream of research involves the fusion of multimodal data&#x2014;combining image data with scalar features such as composition, processing history, or mechanical properties (<xref ref-type="bibr" rid="B11">Dong H. et al., 2022</xref>). By constructing multi-input models or employing attention mechanisms, these frameworks can model complex structure&#x2013;property&#x2013;process relationships that govern material behavior. The emphasis on interpretability and physics consistency ensures that deep learning models serve not just as predictive tools but also as instruments for scientific discovery in materials science (<xref ref-type="bibr" rid="B23">Liu and Huang, 2025</xref>).</p>
</sec>
</sec>
<sec sec-type="methods" id="s3">
<label>3</label>
<title>Methods</title>
<sec id="s3-1">
<label>3.1</label>
<title>Overview</title>
<p>In this paper, we investigate the problem of microstructural analysis from a computational perspective, aiming to uncover latent patterns embedded within the complex topology and morphology of material microstructures. This problem is central to a variety of disciplines, including materials science, computational mechanics, and imaging analysis, where fine-grained structural understanding is indispensable for property prediction, synthesis, and optimization. Our method section unfolds as a comprehensive blueprint of the proposed framework, which is grounded in rigorous mathematical modeling, algorithmic innovation, and domain-specific reasoning. In the following sections, we articulate the methodology across three complementary components, each of which addresses a critical stage in the analytical process. The overall structure is designed to support a seamless transition from theoretical abstraction to practical implementation, thereby enhancing both interpretability and extensibility of the proposed pipeline. <xref ref-type="sec" rid="s3-2">Section 3.2</xref> lays the groundwork by introducing the essential formalism required for modeling microstructural data, and we establish the notation and mathematical foundations required to express microstructure fields, characterize their variability, and formulate the analytical objectives. These preliminaries include the symbolic encoding of spatial domains, the statistical representation of morphological features, and the formal expression of symmetry and invariance conditions. We also outline the high-level problem setting, emphasizing the role of probabilistic descriptors, topological constraints, and the challenges associated with high-dimensional microstructural manifolds. <xref ref-type="sec" rid="s3-3">Section 3.3</xref> introduces our core contribution&#x2014;a novel generative mechanism tailored for microstructural representation learning, which we refer to as MorphoTensor. Unlike existing approaches that treat microstructure either as deterministic fields or fixed-resolution images, MorphoTensor incorporates hierarchical tensorial embeddings to preserve directional, scale-sensitive, and spatially localized information. This representation enables fine control over the expressivity and regularity of the model and accommodates domain priors such as anisotropy and periodicity. We also integrate latent Gaussian processes into the architecture to capture the uncertainty and multi-modality, ensuring robustness under incomplete or noisy observations. In <xref ref-type="sec" rid="s3-4">Section 3.4</xref>, we introduce a complementary strategy we term Topology-Aware Augmented Encoding, which governs how microstructures are processed, interpreted, and regularized during learning. This strategy goes beyond conventional supervision or autoencoding schemes by embedding topological invariants&#x2014;such as Betti numbers and persistence diagrams&#x2014;into the optimization loop via differentiable approximations. This coupling between topological reasoning and geometric encoding forms a feedback system wherein local morphological consistency and global topological stability co-evolve during training. We explore a data augmentation and sampling regime inspired by persistent homology, which aids in generating diverse yet structurally coherent microstructures for both training and downstream applications.</p>
<p>Each of the three aforementioned sections is designed to build upon the previous one, progressively refining the microstructural analysis from abstract symbolic encoding to structured representations and then to intelligent processing strategies. The integration of these components enables a unified and extensible analysis framework capable of handling a broad spectrum of microstructural modalities&#x2014;including binary phase fields, grayscale reconstructions, orientation maps, and multiphase composites. Throughout this methodological exposition, we remain anchored to the physical and statistical realities of microstructural data. This includes adherence to periodic boundary conditions, accommodation of multiscale heterogeneities, and respect for the sparsity and redundancy that typify real-world microstructures. Our framework is implemented in a modular fashion, enabling easy extension to supervised learning, inverse design, and uncertainty quantification tasks. Furthermore, the proposed methods are compatible with both synthetic benchmark datasets and empirical datasets derived from electron microscopy, X-ray tomography, and phase-field simulations. The methodology section of this paper lays out a rigorous, principled, and interpretable framework for microstructural analysis, including a formal problem encoding of microstructure variability and spatial characteristics; a generative modeling framework with structural priors and hierarchical embeddings; and a topology-aware processing strategy that couples geometric representation with topological reasoning. These components coalesce to form a holistic analytical toolkit, enabling robust learning and meaningful interpretation of complex material microstructures.</p>
</sec>
<sec id="s3-2">
<label>3.2</label>
<title>Preliminaries</title>
<p>Let <inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
<mml:mo>&#x2282;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represent a bounded domain in physical space that defines the spatial extent of a microstructure, where <inline-formula id="inf5">
<mml:math id="m5">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> corresponds to a planar setting and <inline-formula id="inf6">
<mml:math id="m6">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> to a volumetric one. A microstructure is modeled as a measurable function <inline-formula id="inf7">
<mml:math id="m7">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>:</mml:mo>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:mi mathvariant="script">S</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf8">
<mml:math id="m8">
<mml:mrow>
<mml:mi mathvariant="script">S</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes the material state space. Depending on the context, <inline-formula id="inf9">
<mml:math id="m9">
<mml:mrow>
<mml:mi mathvariant="script">S</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> may be a discrete label set, a real-valued interval, or a manifold.</p>
<p>We define the space of admissible microstructures <xref ref-type="disp-formula" rid="e1">Equation 1</xref> as<disp-formula id="e1">
<mml:math id="m10">
<mml:mrow>
<mml:mi mathvariant="script">U</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="script">S</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>:</mml:mo>
<mml:mi mathvariant="script">C</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>where <inline-formula id="inf10">
<mml:math id="m11">
<mml:mrow>
<mml:mi mathvariant="script">C</mml:mi>
<mml:mo>:</mml:mo>
<mml:mi mathvariant="script">U</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> encodes a set of constraint functionals such as volume fraction, symmetry, or topology preservation.</p>
<p>To characterize microstructure variability, we consider a probability space <inline-formula id="inf11">
<mml:math id="m12">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="normal">&#x398;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="script">F</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="double-struck">P</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, where each <inline-formula id="inf12">
<mml:math id="m13">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="normal">&#x398;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> corresponds to a latent descriptor and induces a realization <inline-formula id="inf13">
<mml:math id="m14">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. This gives rise to a random field <inline-formula id="inf14">
<mml:math id="m15">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x21a6;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>Let <inline-formula id="inf15">
<mml:math id="m16">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mo>:</mml:mo>
<mml:mi mathvariant="script">U</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> denote a feature extractor mapping a microstructure to a finite-dimensional descriptor space, such as statistical moments, correlation functions, or topological invariants.</p>
<p>We define a metric <inline-formula id="inf16">
<mml:math id="m17">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="script">U</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> measuring the dissimilarity between microstructures <xref ref-type="disp-formula" rid="e2">Equation 2</xref>.<disp-formula id="e2">
<mml:math id="m18">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="script">U</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mo>&#x222b;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x2016;</mml:mo>
<mml:mi>u</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>v</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">&#x2016;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>d</mml:mi>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>/</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
</p>
<p>Given a dataset <inline-formula id="inf17">
<mml:math id="m19">
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2282;</mml:mo>
<mml:mi mathvariant="script">U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, we aim to learn a compact representation or generative process for the underlying distribution <inline-formula id="inf18">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="double-struck">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. We express this as finding a mapping <xref ref-type="disp-formula" rid="e3">Equation 3</xref>.<disp-formula id="e3">
<mml:math id="m21">
<mml:mrow>
<mml:mi mathvariant="script">G</mml:mi>
<mml:mo>:</mml:mo>
<mml:mi mathvariant="script">Z</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:mi mathvariant="script">U</mml:mi>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
<mml:mi>z</mml:mi>
<mml:mo>&#x21a6;</mml:mo>
<mml:mi mathvariant="script">G</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>where <inline-formula id="inf19">
<mml:math id="m22">
<mml:mrow>
<mml:mi>z</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">Z</mml:mi>
<mml:mo>&#x2282;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is drawn from a known prior distribution <inline-formula id="inf20">
<mml:math id="m23">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, yielding the generative formulation <inline-formula id="inf21">
<mml:math id="m24">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>&#x223c;</mml:mo>
<mml:mi mathvariant="script">G</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>Structural constraints are enforced through functionals <inline-formula id="inf22">
<mml:math id="m25">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> such that <xref ref-type="disp-formula" rid="e4">Equation 4</xref>
<disp-formula id="e4">
<mml:math id="m26">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
<mml:mo>&#x2200;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>including invariants like volume fraction or periodicity.</p>
<p>
<xref ref-type="table" rid="T1">Table 1</xref> provides an explicit mapping between the abstract constraint functionals <inline-formula id="inf23">
<mml:math id="m27">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> introduced in the preliminaries and the concrete physical or geometric properties enforced during model training. This mapping helps bridge the mathematical formalism with domain-relevant material descriptors commonly used in microstructural analysis.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Mapping of constraint functionals <inline-formula id="inf24">
<mml:math id="m28">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> to material descriptors used in experiments.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Symbol</th>
<th align="left">Physical constraint</th>
<th align="left">Description/Implementation</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">
<inline-formula id="inf25">
<mml:math id="m29">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Volume fraction</td>
<td align="left">Enforced via global average intensity; target value fixed per class</td>
</tr>
<tr>
<td align="center">
<inline-formula id="inf26">
<mml:math id="m30">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Symmetry</td>
<td align="left">Implicitly enforced through rotational augmentation and generator filters</td>
</tr>
<tr>
<td align="center">
<inline-formula id="inf27">
<mml:math id="m31">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Periodicity</td>
<td align="left">Applied via periodic boundary conditions in synthetic datasets</td>
</tr>
<tr>
<td align="center">
<inline-formula id="inf28">
<mml:math id="m32">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Topological invariants</td>
<td align="left">Enforced through persistence-based <inline-formula id="inf29">
<mml:math id="m33">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>topo</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> loss</td>
</tr>
<tr>
<td align="center">
<inline-formula id="inf30">
<mml:math id="m34">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>5</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Grain size distribution</td>
<td align="left">Controlled through latent sampling and phase-field simulation parameters</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3-3">
<label>3.3</label>
<title>MorphoTensor</title>
<p>To effectively model microstructural variability under geometric, physical, and topological constraints, we propose a novel generative model termed MorphoTensor. This model integrates hierarchical tensor representations with stochastic latent encoding, enabling expressivity over multiscale spatial patterns while respecting the underlying microstructural physics (As shown in <xref ref-type="fig" rid="F1">Figure 1</xref>).</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Schematic diagram of the MorphoTensor architecture. The framework consists of three modules: (1) a Hierarchical Tensor-Based Generator that synthesizes physically plausible microstructure images using multiscale tensorial convolutions; (2) a Latent Spatial Warping Mechanism that introduces deformation fields to capture heterogeneity and anisotropy in microstructures; (3) a Differentiable Regularization module that enforces structural consistency and aligns latent representations with geometric and statistical priors.</p>
</caption>
<graphic xlink:href="fmats-12-1648653-g001.tif">
<alt-text content-type="machine-generated">Diagram illustrating two processes: Hierarchical Tensor Convolutions and Latent Spatial Warping with Spectral Interpolation and Residual Paths. The top section shows tensors undergoing convolution and spatial warping. The bottom section depicts tensor convolution leading to spectral interpolation and combination with residual paths.</alt-text>
</graphic>
</fig>
<sec id="s3-3-1">
<label>3.3.1</label>
<title>Hierarchical Tensor-Based Generator</title>
<p>Let <inline-formula id="inf31">
<mml:math id="m35">
<mml:mrow>
<mml:mi>z</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> be a latent vector sampled from a known prior distribution <inline-formula id="inf32">
<mml:math id="m36">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, typically <inline-formula id="inf33">
<mml:math id="m37">
<mml:mrow>
<mml:mi>z</mml:mi>
<mml:mo>&#x223c;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. The generative model <inline-formula id="inf34">
<mml:math id="m38">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2192;</mml:mo>
<mml:mi mathvariant="script">U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, parameterized by <inline-formula id="inf35">
<mml:math id="m39">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, maps the latent vector to a structured microstructure function <inline-formula id="inf36">
<mml:math id="m40">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> defined over the spatial domain <inline-formula id="inf37">
<mml:math id="m41">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
<mml:mo>&#x2282;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>(as shown in <xref ref-type="fig" rid="F2">Figure 2</xref>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Schematic diagram of the Hierarchical Tensor-Based Generator framework. Microstructure inputs are encoded into tensor representations, followed by multiscale convolutions, attention mapping, and latent decoding. Spectral interpolation and residual connections are applied to maintain frequency-domain consistency and enhance feature expressiveness. The diagram highlights the flow of structural variables across different processing stages, supporting fine-grained feature learning and robust microstructure classification.</p>
</caption>
<graphic xlink:href="fmats-12-1648653-g002.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a neural network process. It begins with &#x22;Microstructure Input,&#x22; followed by &#x22;Tensor Encoding,&#x22; then &#x22;Multiscale Convolutions,&#x22; and an &#x22;Attention Map&#x22;. Variables &#x3C8;, V, X, and &#x3BE; are included. Next is &#x22;Latent Decoding,&#x22; &#x22;Spectral Interpolation,&#x22; and ends with &#x22;Residual paths.&#x22; Arrows indicate the process flow.</alt-text>
</graphic>
</fig>
<p>To capture multiscale and anisotropic textures, the generator is constructed using a hierarchy of tensor-valued convolutional layers, spectral interpolation modules, and directional filter banks.</p>
<p>The architecture is defined recursively as a depth-<inline-formula id="inf38">
<mml:math id="m42">
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> composition of learned transformations with residual integration <xref ref-type="disp-formula" rid="e5">Equation 5</xref>.<disp-formula id="e5">
<mml:math id="m43">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x25e6;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">U</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x25e6;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x25e6;</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>&#x25e6;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>where <inline-formula id="inf39">
<mml:math id="m44">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is a learnable convolutional operator at scale <inline-formula id="inf40">
<mml:math id="m45">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf41">
<mml:math id="m46">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">U</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is a Fourier-domain interpolation that ensures smooth upsampling, and <inline-formula id="inf42">
<mml:math id="m47">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the final nonlinearity adapted to the output range, such as <inline-formula id="inf43">
<mml:math id="m48">
<mml:mrow>
<mml:mtext>sigmoid</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> for grayscale porosity maps or <inline-formula id="inf44">
<mml:math id="m49">
<mml:mrow>
<mml:mtext>softmax</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> for categorical phase fields.</p>
<p>Each convolutional layer <inline-formula id="inf45">
<mml:math id="m50">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is defined using structured tensorial filters <inline-formula id="inf46">
<mml:math id="m51">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf47">
<mml:math id="m52">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the number of output channels. These filters are constructed as expansions over directional basis functions <xref ref-type="disp-formula" rid="e6">Equation 6</xref>.<disp-formula id="e6">
<mml:math id="m53">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3c8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
<mml:mtext>with&#x2009;</mml:mtext>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3c8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>where <inline-formula id="inf48">
<mml:math id="m54">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is a Hilbert space of steerable wavelets or spherical harmonics, and <inline-formula id="inf49">
<mml:math id="m55">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> are learnable coefficients controlling the response in each direction. The filter rank <inline-formula id="inf50">
<mml:math id="m56">
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> determines the expressiveness of each tensor.</p>
<p>To preserve frequency-domain consistency across scales, we use spectral upsampling <inline-formula id="inf51">
<mml:math id="m57">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">U</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> defined by zero-padding and inverse FFT <xref ref-type="disp-formula" rid="e7">Equation 7</xref>.<disp-formula id="e7">
<mml:math id="m58">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">U</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="script">F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mtext>Pad</mml:mtext>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="script">F</mml:mi>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>where <inline-formula id="inf52">
<mml:math id="m59">
<mml:mrow>
<mml:mi mathvariant="script">F</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf53">
<mml:math id="m60">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="script">F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> denote the discrete Fourier transform and its inverse, respectively, and <inline-formula id="inf54">
<mml:math id="m61">
<mml:mrow>
<mml:mtext>Pad</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is a zero-padding operator that enlarges the spatial resolution while maintaining alignment of dominant frequencies.</p>
<p>To ensure that the convolutional responses reflect localized, structured phenomena such as grains, inclusions, or fibers, we enforce local energy normalization on the output of each filter bank <xref ref-type="disp-formula" rid="e8">Equation 8</xref>.<disp-formula id="e8">
<mml:math id="m62">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mrow>
<mml:mo>&#x222b;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mi>d</mml:mi>
<mml:mi>x</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>where <inline-formula id="inf55">
<mml:math id="m63">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is a scale-dependent energy budget that constrains the expressiveness and avoids overfitting to high-frequency noise.</p>
<p>To maintain structural diversity across output samples, we apply instance-wise modulation to the filter responses via learned affine coefficients <inline-formula id="inf56">
<mml:math id="m64">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> conditioned on <inline-formula id="inf57">
<mml:math id="m65">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> <xref ref-type="disp-formula" rid="e9">Equation 9</xref>.<disp-formula id="e9">
<mml:math id="m66">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>mod</mml:mtext>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>;</mml:mo>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x22c5;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>which allows the generator to adjust local contrast and bias according to latent-conditioned semantics.</p>
</sec>
<sec id="s3-3-2">
<label>3.3.2</label>
<title>Latent Spatial Warping Mechanism</title>
<p>We enhance the generative capacity of the model by introducing a latent-driven spatial warping mechanism, where a coordinate deformation field modulates the geometry of the decoded microstructure. This warping function <inline-formula id="inf58">
<mml:math id="m67">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3be;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is constructed to capture spatial heterogeneity and geometric irregularities observed in real-world materials <xref ref-type="disp-formula" rid="e10">Equation 10</xref>.<disp-formula id="e10">
<mml:math id="m68">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3be;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b4;</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>tanh</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>where <inline-formula id="inf59">
<mml:math id="m69">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes the original coordinate in the spatial domain, <inline-formula id="inf60">
<mml:math id="m70">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is a deformation field generated by a shallow convolutional neural network with trainable parameters <inline-formula id="inf61">
<mml:math id="m71">
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf62">
<mml:math id="m72">
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is a scalar hyperparameter controlling the amplitude of deformation. The <inline-formula id="inf63">
<mml:math id="m73">
<mml:mrow>
<mml:mi>tanh</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> nonlinearity ensures bounded and smooth deformation behavior, promoting spatial continuity and regularity.</p>
<p>The warped field is computed through a pullback operation, where the decoded unwarped field <inline-formula id="inf64">
<mml:math id="m74">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is evaluated at the deformed coordinates <xref ref-type="disp-formula" rid="e11">Equation 11</xref>.<disp-formula id="e11">
<mml:math id="m75">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3be;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>This operation re-parameterizes the spatial layout of the field and allows the generator to model nonstationary features such as gradients, interfaces, and geometric anisotropy that cannot be captured by stationary convolutions alone.</p>
<p>To ensure invertibility and smoothness of the transformation, the deformation field is regularized using a Jacobian-based penalty. Let <inline-formula id="inf65">
<mml:math id="m76">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>J</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denote the Jacobian matrix of <inline-formula id="inf66">
<mml:math id="m77">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3be;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> at point <inline-formula id="inf67">
<mml:math id="m78">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> <xref ref-type="disp-formula" rid="e12">Equation 12</xref>.<disp-formula id="e12">
<mml:math id="m79">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>J</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x2207;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3be;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>I</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b4;</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
<mml:mtext>diag</mml:mtext>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>tanh</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x22c5;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x2207;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>where <inline-formula id="inf68">
<mml:math id="m80">
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the identity matrix and the term involving the derivative of <inline-formula id="inf69">
<mml:math id="m81">
<mml:mrow>
<mml:mi>tanh</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> ensures smooth gradient propagation.</p>
<p>We penalize extreme local distortion by enforcing a regularization on the Frobenius norm of the Jacobian deviation from identity <xref ref-type="disp-formula" rid="e13">Equation 13</xref>.<disp-formula id="e13">
<mml:math id="m82">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>warp</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mo>&#x222b;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>J</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mi>d</mml:mi>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(13)</label>
</disp-formula>which discourages excessive stretching or folding of the coordinate map and ensures physical plausibility of the warped domain.</p>
<p>To preserve volume and avoid folding, we introduce a determinant-based regularizer that promotes diffeomorphic mappings <xref ref-type="disp-formula" rid="e14">Equation 14</xref>.<disp-formula id="e14">
<mml:math id="m83">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>det</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mo>&#x222b;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>det</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>J</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mi>x</mml:mi>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(14)</label>
</disp-formula>
</p>
<p>The total output field <inline-formula id="inf70">
<mml:math id="m84">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> generated via warped coordinates inherits both statistical realism and geometric fidelity. During training, gradients flow through <inline-formula id="inf71">
<mml:math id="m85">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3be;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf72">
<mml:math id="m86">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, enabling the model to learn deformation patterns from data without supervision, while satisfying invertibility and smoothness constraints encoded via <inline-formula id="inf73">
<mml:math id="m87">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>warp</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf74">
<mml:math id="m88">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>det</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</sec>
<sec id="s3-3-3">
<label>3.3.3</label>
<title>Differentiable Topological and Manifold Regularization</title>
<p>To ensure topological characteristics of generated microstructures are preserved, we embed a differentiable approximation of topological invariants into the training objective. In particular, we focus on Betti numbers <inline-formula id="inf75">
<mml:math id="m89">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, which quantify the number of connected components <inline-formula id="inf76">
<mml:math id="m90">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, loops <inline-formula id="inf77">
<mml:math id="m91">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, and voids <inline-formula id="inf78">
<mml:math id="m92">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> in the binarized microstructure <inline-formula id="inf79">
<mml:math id="m93">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The empirical mismatch between the generated and target topology is penalized by the following loss <xref ref-type="disp-formula" rid="e15">Equation 15</xref>.<disp-formula id="e15">
<mml:math id="m94">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>topo</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(15)</label>
</disp-formula>where <inline-formula id="inf80">
<mml:math id="m95">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the dataset-averaged Betti number for dimension <inline-formula id="inf81">
<mml:math id="m96">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf82">
<mml:math id="m97">
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the spatial dimension of the microstructure domain <inline-formula id="inf83">
<mml:math id="m98">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>As direct gradients through topological features are not tractable, we adopt a differentiable proxy using persistent homology. Let <inline-formula id="inf84">
<mml:math id="m99">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mtext>persistence</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> denote the birth&#x2013;death lifetime of the <inline-formula id="inf85">
<mml:math id="m100">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th feature in the <inline-formula id="inf86">
<mml:math id="m101">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th homology group. The smoothed topological count is then approximated <xref ref-type="disp-formula" rid="e16">Equation 16</xref>.<disp-formula id="e16">
<mml:math id="m102">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mtext>persistence</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(16)</label>
</disp-formula>where <inline-formula id="inf87">
<mml:math id="m103">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>exp</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>/</mml:mo>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula> is a soft threshold function that emphasizes persistent (i.e., long-lived) topological features while suppressing noise artifacts. The parameter <inline-formula id="inf88">
<mml:math id="m104">
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
<mml:mo>&#x3e;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> controls the sharpness of the approximation.</p>
<p>To further regularize the geometry of the latent space, we induce a Riemannian metric <inline-formula id="inf89">
<mml:math id="m105">
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> on the latent manifold <inline-formula id="inf90">
<mml:math id="m106">
<mml:mrow>
<mml:mi mathvariant="script">Z</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> via the Jacobian of the generator <inline-formula id="inf91">
<mml:math id="m107">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> <xref ref-type="disp-formula" rid="e17">Equation 17</xref>.<disp-formula id="e17">
<mml:math id="m108">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mfenced open="&#x27e8;" close="&#x27e9;">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x2202;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x2202;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x2202;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x2202;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(17)</label>
</disp-formula>which captures the sensitivity of the generated microstructure to changes in each latent direction and encodes the intrinsic geometry of the generative map.</p>
<p>To discourage excessive curvature in the latent space, which may lead to poorly generalizable representations, we introduce a curvature regularizer based on the Frobenius norm of the second-order derivatives <xref ref-type="disp-formula" rid="e18">Equation 18</xref>.<disp-formula id="e18">
<mml:math id="m109">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>curv</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mtext>Tr</mml:mtext>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>&#x2207;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mi>g</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x22c5;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>&#x2207;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mi>g</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(18)</label>
</disp-formula>
</p>
<p>To encourage smooth topological variation across samples, we introduce a pairwise consistency loss over mini-batches. Let <inline-formula id="inf92">
<mml:math id="m110">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf93">
<mml:math id="m111">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> be two generated microstructures from nearby latent codes <xref ref-type="disp-formula" rid="e19">Equation 19</xref>.<disp-formula id="e19">
<mml:math id="m112">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>betti</mml:mtext>
<mml:mo>-</mml:mo>
<mml:mtext>smooth</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(19)</label>
</disp-formula>which penalizes abrupt topological changes and aligns the learned manifold with the topology-aware data structure.</p>
</sec>
</sec>
<sec id="s3-4">
<label>3.4</label>
<title>Topology-Aware Latent Refinement</title>
<p>To effectively train the proposed MorphoTensor model for microstructural generation, we develop a novel strategy termed Topology-Aware Latent Refinement (TLR). This strategy leverages both data-driven loss formulation and structure-aware regularization to achieve physically consistent, statistically expressive, and topologically faithful microstructure synthesis. The TLR approach integrates multiscale supervision, adaptive perturbation schemes, and homology-aligned optimization (as shown in <xref ref-type="fig" rid="F3">Figure 3</xref>).</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Schematic diagram of the Latent Refinement module. The module integrates three components: (1) Multiscale Variational Learning, which captures both global and local structural patterns; (2) Constraint Projection and Augmentation, which incorporates physical priors and adaptive sampling; (3) Latent Decoding with topological and structural regularization. Together, these components refine latent representations to ensure accurate reconstruction, improved generalization, and physically consistent microstructure synthesis.</p>
</caption>
<graphic xlink:href="fmats-12-1648653-g003.tif">
<alt-text content-type="machine-generated">Diagram of a machine learning model with image and text encoders. An image of a dog running on grass is processed by the Image Encoder, producing a latent representation. The text &#x22;A dog running on grass&#x22; is processed by the Text Encoder. Both paths involve projections leading to latent spaces and final loss evaluation. Arrows indicate the data flow between blocks labeled Image Encoder, Text Encoder, Projection, Latent, and Loss.</alt-text>
</graphic>
</fig>
<sec id="s3-4-1">
<label>3.4.1</label>
<title>Multiscale Variational Learning</title>
<p>Given a dataset of microstructures <inline-formula id="inf94">
<mml:math id="m113">
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, we introduce a stochastic encoder&#x2013;decoder architecture to learn latent representations that preserve both global structure and fine-scale variability (as shown in <xref ref-type="fig" rid="F4">Figure 4</xref>).</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Schematic diagram of the Multiscale Variational Learning. The figure contains three interconnected modules including Global Patterns Expert, which leverages Mamba-style stacked linear, convolutional, and state-space modeling (SSM) blocks to capture coarse global trends, followed by a feedforward layer; Long-Short Router with Multi-Scale Patcher, where long- and short-range time series (TS) are routed to low- and high-resolution learning paths with a probability split of <inline-formula id="inf95">
<mml:math id="m114">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.63</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf96">
<mml:math id="m115">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.37</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, respectively, before being fed into dedicated patterns and variation branches; and Multiscale Variational Learning, combining local window attention, positional encoding, and feedforward networks to encode high-frequency dynamics within hierarchical temporal representations.</p>
</caption>
<graphic xlink:href="fmats-12-1648653-g004.tif">
<alt-text content-type="machine-generated">Diagram illustrating a complex machine learning architecture. It features three main sections: Global Patterns Expert, Multi-Scale Learning, and Multiscale Variational Learning. The Global Patterns Expert includes components like Mamba, Feed Forward, and Encoding Layer, using elements like SSM and Conv. Multi-Scale Learning processes include Long and Short-Range TS, Learning, and Patcher. The Multiscale Variational Learning section includes LWT, Local Window Attention, and Positional Encoding. The flow of data is indicated with arrows, showcasing paths through various stages, with outputs represented as patterns and variations.</alt-text>
</graphic>
</fig>
<p>Each input <inline-formula id="inf97">
<mml:math id="m116">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is encoded into a latent variable <inline-formula id="inf98">
<mml:math id="m117">
<mml:mrow>
<mml:mi>z</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> through a variational posterior <inline-formula id="inf99">
<mml:math id="m118">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>z</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> modeled as a multivariate Gaussian <xref ref-type="disp-formula" rid="e20">Equation 20</xref>.<disp-formula id="e20">
<mml:math id="m119">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>z</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
<mml:mtext>diag</mml:mtext>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(20)</label>
</disp-formula>where <inline-formula id="inf100">
<mml:math id="m120">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf101">
<mml:math id="m121">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are learned mappings implemented via convolutional neural networks with shared encoder weights <inline-formula id="inf102">
<mml:math id="m122">
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>The overall training objective follows the variational autoencoder (VAE) framework and is designed to balance accurate reconstruction with latent regularity. The total VAE loss is written <xref ref-type="disp-formula" rid="e21">Equation 21</xref>.<disp-formula id="e21">
<mml:math id="m123">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>VAE</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="double-struck">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>&#x223c;</mml:mo>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="double-struck">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>z</mml:mi>
<mml:mo>&#x223c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>z</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msub>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>rec</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>KL</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>z</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo stretchy="false">&#x2016;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(21)</label>
</disp-formula>where <inline-formula id="inf103">
<mml:math id="m124">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is a standard normal prior and <inline-formula id="inf104">
<mml:math id="m125">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>KL</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the Kullback&#x2013;Leibler divergence that regularizes the posterior.</p>
<p>To measure reconstruction quality, we adopt a scale-adaptive loss defined over a multiresolution decomposition of the microstructure. Let <inline-formula id="inf105">
<mml:math id="m126">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denote a Gaussian pyramid downsampling operator at the scale level <inline-formula id="inf106">
<mml:math id="m127">
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and define the hierarchical loss <xref ref-type="disp-formula" rid="e22">Equation 22</xref>.<disp-formula id="e22">
<mml:math id="m128">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>rec</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x22c5;</mml:mo>
<mml:mo stretchy="false">&#x2016;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi>u</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">&#x2016;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(22)</label>
</disp-formula>where <inline-formula id="inf107">
<mml:math id="m129">
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the output of the generator, and <inline-formula id="inf108">
<mml:math id="m130">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are scale-specific weights that emphasize higher-resolution errors more strongly, typically chosen as <inline-formula id="inf109">
<mml:math id="m131">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x221d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>To further improve the expressiveness of the latent space, we inject structured noise into <inline-formula id="inf110">
<mml:math id="m132">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> through a reparameterization trick <xref ref-type="disp-formula" rid="e23">Equation 23</xref>.<disp-formula id="e23">
<mml:math id="m133">
<mml:mrow>
<mml:mi>z</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2299;</mml:mo>
<mml:mi>&#x3f5;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
<mml:mi>&#x3f5;</mml:mi>
<mml:mo>&#x223c;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(23)</label>
</disp-formula>which allows gradients to propagate through the stochastic sampling process during training.</p>
<p>Moreover, to prevent posterior collapse and enhance diversity, we regularize the mutual information between <inline-formula id="inf111">
<mml:math id="m134">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf112">
<mml:math id="m135">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> using a contrastive lower bound <xref ref-type="disp-formula" rid="e24">Equation 24</xref>.<disp-formula id="e24">
<mml:math id="m136">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>MI</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>log</mml:mi>
<mml:mfrac>
<mml:mrow>
<mml:mi>exp</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>exp</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(24)</label>
</disp-formula>where <inline-formula id="inf113">
<mml:math id="m137">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">&#x27e8;</mml:mo>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>g</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">&#x27e9;</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is a similarity function computed over learned projections, and <inline-formula id="inf114">
<mml:math id="m138">
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the mini-batch size.</p>
<p>The total loss incorporates all terms with tunable weights <inline-formula id="inf115">
<mml:math id="m139">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>rec</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>KL</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>MI</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> <xref ref-type="disp-formula" rid="e25">Equation 25</xref>.<disp-formula id="e25">
<mml:math id="m140">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>total</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>rec</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>rec</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>KL</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>KL</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>MI</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>MI</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(25)</label>
</disp-formula>This objective enables learning of compact, expressive, and scale-aware latent representations tailored for microstructural variability.</p>
</sec>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Experimental setup</title>
<sec id="s4-1">
<label>4.1</label>
<title>Dataset</title>
<p>For completeness, we also consider results on generic large-scale image datasets (HEDM <xref ref-type="bibr" rid="B28">Muralikrishnan et al., 2023</xref>) and (HTEM <xref ref-type="bibr" rid="B37">Steingrimsson et al., 2023</xref>) as pretraining/transfer baselines; these details are provided in the Appendix. WELD SEAM Dataset (<xref ref-type="bibr" rid="B47">Zhao et al., 2024</xref>) is a fine-grained image classification dataset comprising 8,189 images of weld seams collected from various manufacturing environments. It spans 102 categories, each representing different types or conditions of weld seams. Each category contains between 40 and 258 images characterized by large variations in scale, pose, illumination, and surface finish. The dataset focuses on challenging intra-class similarity and inter-class variation as many weld seams share visual features. The images were obtained through industrial inspection systems and labeled using a combination of automated tools and expert manual verification. It has been widely used for evaluating fine-grained classification algorithms and defect detection models. The high-resolution imagery supports detailed texture and surface feature extraction, crucial for distinguishing subtle differences in weld quality. The MID Dataset (<xref ref-type="bibr" rid="B17">Jackson et al., 2022</xref>) contains 5,640 texture images organized into 47 categories based on human-centric attributes such as striped, dotted, fibrous, and bumpy. It emphasizes perceptual texture properties rather than object identities. Each category includes 120 images collected from diverse natural and artificial sources. The dataset supports research in texture recognition, segmentation, and attribute-based representation learning. All images are annotated according to describable attributes defined by human perception rather than material composition or object context. This makes MID suitable for studying mid-level visual attributes and for training models that interpret abstract semantic properties. The dataset challenges models to generalize texture recognition across variations in scale, illumination, and viewpoint.</p>
<p>In this work, EBSD orientation maps and synthetic phase-field simulations are used as the primary datasets for evaluating classification performance and topological consistency.</p>
</sec>
<sec id="s4-2">
<label>4.2</label>
<title>Experimental details</title>
<p>All experiments were conducted using the PyTorch framework on a server equipped with NVIDIA A100 GPUs (80 GB memory, CUDA 12.1). Mixed-precision training was adopted to accelerate convergence and reduce memory usage. For the HEDM and HTEM datasets, all images were resized to <inline-formula id="inf116">
<mml:math id="m141">
<mml:mrow>
<mml:mn>224</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>224</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> with central cropping for validation; for EBSD and phase-field datasets, we used <inline-formula id="inf117">
<mml:math id="m142">
<mml:mrow>
<mml:mn>256</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>256</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> inputs with random rotation, flipping, and grayscale jittering to preserve structural variability. The backbone was initialized from ResNet-50 pretrained on HEDM, with dataset-specific classifier heads added. Training was performed with the AdamW optimizer (weight decay &#x3d; <inline-formula id="inf118">
<mml:math id="m143">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>1</mml:mn>
<mml:msup>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, initial learning rate &#x3d; <inline-formula id="inf119">
<mml:math id="m144">
<mml:mrow>
<mml:mn>3</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>1</mml:mn>
<mml:msup>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, cosine annealing schedule, 10-epoch warm-up). Batch sizes were 256 for HEDM and 64 for the other datasets. Regularization included label smoothing <inline-formula id="inf120">
<mml:math id="m145">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, dropout (0.5), random erasing, and RandAugment. Evaluation metrics included top-1/top-5 accuracy, mean per-class accuracy, F1 score, and multi-class AUC. For multi-class settings, AUC was computed using a macro-averaged one-vs-rest (OvR) scheme, ensuring balanced treatment of all classes. All reported numbers include 95% confidence intervals (CIs), calculated over three independent runs using the Student <italic>t</italic>-distribution. Statistical significance annotations are standardized across all tables, where <inline-formula id="inf121">
<mml:math id="m146">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.05</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf122">
<mml:math id="m147">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.01</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf123">
<mml:math id="m148">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.001</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> are indicated by <italic>&#x2a;</italic>, <italic>&#x2a;&#x2a;</italic>, and <italic>&#x2a;&#x2a;&#x2a;</italic>, respectively. This ensures consistent reporting and reliable comparison across experiments.</p>
<p>This operator ensures that the generated microstructure maintains a target volume fraction by re-normalizing pixel intensities. The process is differentiable and integrated into the training loop, allowing physics-aware backpropagation without introducing hard constraints in <xref ref-type="statement" rid="Algorithm_1">Algorithm 1</xref>.</p>
<p>
<statement content-type="algorithm" id="Algorithm_1">
<label>Algorithm 1</label>
<p>Constraint Projection Operator <inline-formula id="inf124">
<mml:math id="m149">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a0;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>for Volume Fraction.<list list-type="simple">
<list-item>
<p> Require: Generated image <inline-formula id="inf125">
<mml:math id="m150">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mn>0,1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, target volume fraction <inline-formula id="inf126">
<mml:math id="m151">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>target</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>  1: Compute current volume fraction: <inline-formula id="inf127">
<mml:math id="m152">
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mtext>mean</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>  2: Compute scaling factor: <inline-formula id="inf128">
<mml:math id="m153">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>target</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>/</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>  3: Apply rescaling: <inline-formula id="inf129">
<mml:math id="m154">
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mtext>clip</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mn>0,1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>  4: return Projected image <inline-formula id="inf130">
<mml:math id="m155">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
</list>
</p>
</statement>
</p>
<p>All reported results are averaged over three independent runs. 95% confidence intervals are computed using the Student&#x2019;s t-distribution with 2 degrees of freedom. Statistical significance tests (two-tailed t-tests) are performed where appropriate.</p>
<p>For multi-class AUC computation, we adopt a macro-averaging strategy using the one-vs-rest approach, which calculates the AUC independently for each class and then takes the unweighted mean. This method is particularly suitable for imbalanced class distributions, such as those in the EBSD and phase-field datasets.</p>
<p>The following training configurations apply specifically to microstructural datasets (EBSD and phase-field). For these datasets, input images are resized to 256<inline-formula id="inf131">
<mml:math id="m156">
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>256, and the model is trained from scratch using domain-specific augmentations including rotation, flipping, and grayscale jittering. Evaluation is based on mean per-class accuracy, F1 score, and structural metrics. For completeness, results on generic image classification datasets (HEDM and HTEM) are reported in Appendix, and their corresponding training details are described therein.</p>
<p>All structural metrics are computed on normalized microstructure fields: 2-point correlation values are measured on images scaled to the range [0,1], while length-based descriptors (e.g., chord length and phase size) are expressed as fractions of the total image width.</p>
</sec>
<sec id="s4-3">
<label>4.3</label>
<title>Benchmarking against leading methods</title>
<p>Results on general-purpose datasets (HEDM and HTEM) are used only as generic pretraining and transfer baselines. Our primary evaluations focus on EBSD and phase-field datasets, which directly reflect the microstructural nature of the problem.</p>
<p>To address domain-specific validation, we incorporated two microstructure-centered datasets: (i) EBSD orientation maps from Ti-Al alloy specimens in <xref ref-type="table" rid="T2">Table 2</xref> and (ii) synthetic phase-field simulations using the Cahn-Hilliard equation in <xref ref-type="table" rid="T3">Table 3</xref>. The following tables report classification metrics and topology-aware evaluation on these datasets.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Classification performance on EBSD and Phase-field datasets with 95% confidence intervals. Values are reported as mean <inline-formula id="inf132">
<mml:math id="m157">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 95% CI over three runs. Significance is denoted as &#x2a; <inline-formula id="inf133">
<mml:math id="m158">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.05</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, &#x2a;&#x2a; <inline-formula id="inf134">
<mml:math id="m159">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.01</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, and &#x2a;&#x2a;&#x2a; <inline-formula id="inf135">
<mml:math id="m160">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.001</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Model/Dataset</th>
<th align="center">Accuracy</th>
<th align="center">Recall</th>
<th align="center">F1 score</th>
<th align="center">AUC</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">EBSD (CNN baseline)</td>
<td align="center">
<inline-formula id="inf136">
<mml:math id="m161">
<mml:mrow>
<mml:mn>82.51</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.85</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf137">
<mml:math id="m162">
<mml:mrow>
<mml:mn>80.12</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>1.01</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf138">
<mml:math id="m163">
<mml:mrow>
<mml:mn>81.15</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.92</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf139">
<mml:math id="m164">
<mml:mrow>
<mml:mn>85.33</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.76</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">EBSD (ours)</td>
<td align="center">
<inline-formula id="inf140">
<mml:math id="m165">
<mml:mrow>
<mml:mn mathvariant="bold">88.93</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.67</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf141">
<mml:math id="m166">
<mml:mrow>
<mml:mn mathvariant="bold">87.02</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.59</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf142">
<mml:math id="m167">
<mml:mrow>
<mml:mn mathvariant="bold">87.61</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.73</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf143">
<mml:math id="m168">
<mml:mrow>
<mml:mn mathvariant="bold">91.05</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.65</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">Phase-field (CNN baseline)</td>
<td align="center">
<inline-formula id="inf144">
<mml:math id="m169">
<mml:mrow>
<mml:mn>84.20</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.88</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf145">
<mml:math id="m170">
<mml:mrow>
<mml:mn>83.05</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.97</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf146">
<mml:math id="m171">
<mml:mrow>
<mml:mn>83.41</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>1.08</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf147">
<mml:math id="m172">
<mml:mrow>
<mml:mn>86.78</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.74</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">Phase-field (ours)</td>
<td align="center">
<inline-formula id="inf148">
<mml:math id="m173">
<mml:mrow>
<mml:mn mathvariant="bold">90.37</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.55</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf149">
<mml:math id="m174">
<mml:mrow>
<mml:mn mathvariant="bold">89.12</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.62</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf150">
<mml:math id="m175">
<mml:mrow>
<mml:mn mathvariant="bold">89.55</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.71</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf151">
<mml:math id="m176">
<mml:mrow>
<mml:mn mathvariant="bold">93.20</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.48</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold value indicates the experimental index values obtained using our method in the experiment.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Microstructure-aware metric comparison on generated vs. real samples (95% confidence intervals). Values are reported as mean <inline-formula id="inf152">
<mml:math id="m177">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 95% CI over three runs. Significance is denoted as &#x2a; <inline-formula id="inf153">
<mml:math id="m178">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.05</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, &#x2a;&#x2a; <inline-formula id="inf154">
<mml:math id="m179">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.01</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, and &#x2a;&#x2a;&#x2a; <inline-formula id="inf155">
<mml:math id="m180">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.001</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. All reported structural metrics are based on normalized images: 2-point correlation is computed on [0,1] scaled fields, and length features are expressed as fractions of image width.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Metric</th>
<th align="center">CNN</th>
<th align="center">VAE</th>
<th align="center">Ours</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">2-Point Corr. Error <inline-formula id="inf156">
<mml:math id="m181">
<mml:mrow>
<mml:mi>&#x2193;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf157">
<mml:math id="m182">
<mml:mrow>
<mml:mn>0.072</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.006</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf158">
<mml:math id="m183">
<mml:mrow>
<mml:mn>0.058</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.005</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf159">
<mml:math id="m184">
<mml:mrow>
<mml:mn mathvariant="bold">0.031</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.003</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">Chord length KL-Div. <inline-formula id="inf160">
<mml:math id="m185">
<mml:mrow>
<mml:mi>&#x2193;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf161">
<mml:math id="m186">
<mml:mrow>
<mml:mn>0.105</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.007</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf162">
<mml:math id="m187">
<mml:mrow>
<mml:mn>0.088</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.006</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf163">
<mml:math id="m188">
<mml:mrow>
<mml:mn mathvariant="bold">0.043</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.004</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">Phase size Var. Error <inline-formula id="inf164">
<mml:math id="m189">
<mml:mrow>
<mml:mi>&#x2193;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf165">
<mml:math id="m190">
<mml:mrow>
<mml:mn>0.192</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.008</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf166">
<mml:math id="m191">
<mml:mrow>
<mml:mn>0.133</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.007</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf167">
<mml:math id="m192">
<mml:mrow>
<mml:mn mathvariant="bold">0.057</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.005</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">Persistence diagram W1 <inline-formula id="inf168">
<mml:math id="m193">
<mml:mrow>
<mml:mi>&#x2193;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf169">
<mml:math id="m194">
<mml:mrow>
<mml:mn>1.84</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.10</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf170">
<mml:math id="m195">
<mml:mrow>
<mml:mn>1.35</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.09</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf171">
<mml:math id="m196">
<mml:mrow>
<mml:mn mathvariant="bold">0.69</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.06</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">Euler number Diff. <inline-formula id="inf172">
<mml:math id="m197">
<mml:mrow>
<mml:mi>&#x2193;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf173">
<mml:math id="m198">
<mml:mrow>
<mml:mn>8.2</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.6</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf174">
<mml:math id="m199">
<mml:mrow>
<mml:mn>6.1</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.5</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf175">
<mml:math id="m200">
<mml:mrow>
<mml:mn mathvariant="bold">2.7</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.4</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold value indicates the experimental index values obtained using our method in the experiment.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>
<xref ref-type="table" rid="T4">Table 4</xref> presents the results of our sensitivity analysis. The performance peak occurs at <inline-formula id="inf176">
<mml:math id="m201">
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.10</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf177">
<mml:math id="m202">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>topo</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.5</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, suggesting a balanced trade-off between smooth differentiability and structural fidelity. Larger <inline-formula id="inf178">
<mml:math id="m203">
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> values reduce the sensitivity to topological nuances, while overly aggressive topology weights (e.g., <inline-formula id="inf179">
<mml:math id="m204">
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>2.0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>) distort the geometric manifold, hurting classification accuracy. These findings guide the robust tuning of topological constraints in generative training for microstructure modeling.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Sensitivity analysis of <inline-formula id="inf180">
<mml:math id="m205">
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf181">
<mml:math id="m206">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>topo</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> with 95% confidence intervals. Values are reported as mean <inline-formula id="inf182">
<mml:math id="m207">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 95% CI over three runs. Significance is denoted as &#x2a; <inline-formula id="inf183">
<mml:math id="m208">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.05</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, &#x2a;&#x2a; <inline-formula id="inf184">
<mml:math id="m209">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.01</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, and &#x2a;&#x2a;&#x2a; <inline-formula id="inf185">
<mml:math id="m210">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.001</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. All reported structural metrics are based on normalized images: 2-point correlation is computed on [0,1] scaled fields, and length features are expressed as fractions of image width.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">
<inline-formula id="inf186">
<mml:math id="m211">
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">
<inline-formula id="inf187">
<mml:math id="m212">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>topo</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">Top-1 accuracy (%)</th>
<th align="center">F1 score</th>
<th align="center">W1 Dist. <inline-formula id="inf188">
<mml:math id="m213">
<mml:mrow>
<mml:mi>&#x2193;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">Euler Diff. <inline-formula id="inf189">
<mml:math id="m214">
<mml:mrow>
<mml:mi>&#x2193;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">0.05</td>
<td align="center">0.5</td>
<td align="center">
<inline-formula id="inf190">
<mml:math id="m215">
<mml:mrow>
<mml:mn>87.90</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.72</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf191">
<mml:math id="m216">
<mml:mrow>
<mml:mn>87.02</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.63</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf192">
<mml:math id="m217">
<mml:mrow>
<mml:mn>0.65</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.05</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf193">
<mml:math id="m218">
<mml:mrow>
<mml:mn>2.8</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.4</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="center">0.10</td>
<td align="center">0.5</td>
<td align="center">
<inline-formula id="inf194">
<mml:math id="m219">
<mml:mrow>
<mml:mn mathvariant="bold">88.93</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.55</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf195">
<mml:math id="m220">
<mml:mrow>
<mml:mn mathvariant="bold">87.61</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.52</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf196">
<mml:math id="m221">
<mml:mrow>
<mml:mn mathvariant="bold">0.58</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.04</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf197">
<mml:math id="m222">
<mml:mrow>
<mml:mn mathvariant="bold">2.4</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.3</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="center">0.15</td>
<td align="center">0.5</td>
<td align="center">
<inline-formula id="inf198">
<mml:math id="m223">
<mml:mrow>
<mml:mn>88.10</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.60</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf199">
<mml:math id="m224">
<mml:mrow>
<mml:mn>86.90</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.59</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf200">
<mml:math id="m225">
<mml:mrow>
<mml:mn>0.63</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.05</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf201">
<mml:math id="m226">
<mml:mrow>
<mml:mn>2.7</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.3</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="center">0.10</td>
<td align="center">0.1</td>
<td align="center">
<inline-formula id="inf202">
<mml:math id="m227">
<mml:mrow>
<mml:mn>85.02</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.77</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf203">
<mml:math id="m228">
<mml:mrow>
<mml:mn>84.15</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.68</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf204">
<mml:math id="m229">
<mml:mrow>
<mml:mn>0.91</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.06</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf205">
<mml:math id="m230">
<mml:mrow>
<mml:mn>4.1</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.5</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="center">0.10</td>
<td align="center">1.0</td>
<td align="center">
<inline-formula id="inf206">
<mml:math id="m231">
<mml:mrow>
<mml:mn>87.41</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.62</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf207">
<mml:math id="m232">
<mml:mrow>
<mml:mn>86.21</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.57</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf208">
<mml:math id="m233">
<mml:mrow>
<mml:mn>0.59</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.04</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf209">
<mml:math id="m234">
<mml:mrow>
<mml:mn>2.6</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.3</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="center">0.10</td>
<td align="center">2.0</td>
<td align="center">
<inline-formula id="inf210">
<mml:math id="m235">
<mml:mrow>
<mml:mn>85.78</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.65</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf211">
<mml:math id="m236">
<mml:mrow>
<mml:mn>83.91</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.60</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf212">
<mml:math id="m237">
<mml:mrow>
<mml:mn>0.48</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.03</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf213">
<mml:math id="m238">
<mml:mrow>
<mml:mn>3.9</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.4</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="center">0.30</td>
<td align="center">0.5</td>
<td align="center">
<inline-formula id="inf214">
<mml:math id="m239">
<mml:mrow>
<mml:mn>84.55</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.81</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf215">
<mml:math id="m240">
<mml:mrow>
<mml:mn>83.60</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.70</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf216">
<mml:math id="m241">
<mml:mrow>
<mml:mn>0.78</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.05</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf217">
<mml:math id="m242">
<mml:mrow>
<mml:mn>3.5</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.4</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold value indicates the experimental index values obtained using our method in the experiment.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>
<xref ref-type="table" rid="T5">Table 5</xref> shows that our model preserves both semantic accuracy and topological structure under significant shifts in porosity and grain morphology. Unlike CNNs or VAEs, MorphoTensor maintains low persistence diagram distance and Euler number error, confirming its robustness in generalizing to unseen structural regimes.</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Robustness evaluation on out-of-distribution (OOD) morphologies with 95% confidence intervals. Values are reported as mean <inline-formula id="inf218">
<mml:math id="m243">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 95% CI over three runs. Significance is denoted as &#x2a; <inline-formula id="inf219">
<mml:math id="m244">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.05</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, &#x2a;&#x2a; <inline-formula id="inf220">
<mml:math id="m245">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.01</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, and &#x2a;&#x2a;&#x2a; <inline-formula id="inf221">
<mml:math id="m246">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.001</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. All reported structural metrics are based on normalized images: 2-point correlation is computed on [0,1] scaled fields, and length features are expressed as fractions of image width.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Model</th>
<th align="center">Accuracy (%)</th>
<th align="center">2-Point Corr. <inline-formula id="inf222">
<mml:math id="m247">
<mml:mrow>
<mml:mi>&#x2193;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">Chord length KL <inline-formula id="inf223">
<mml:math id="m248">
<mml:mrow>
<mml:mi>&#x2193;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">W1 (PD) <inline-formula id="inf224">
<mml:math id="m249">
<mml:mrow>
<mml:mi>&#x2193;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">Euler Diff. <inline-formula id="inf225">
<mml:math id="m250">
<mml:mrow>
<mml:mi>&#x2193;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">Porosity range (%)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">CNN baseline</td>
<td align="center">
<inline-formula id="inf226">
<mml:math id="m251">
<mml:mrow>
<mml:mn>78.84</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.83</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf227">
<mml:math id="m252">
<mml:mrow>
<mml:mn>0.097</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.006</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf228">
<mml:math id="m253">
<mml:mrow>
<mml:mn>0.134</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.008</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf229">
<mml:math id="m254">
<mml:mrow>
<mml:mn>1.97</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.11</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf230">
<mml:math id="m255">
<mml:mrow>
<mml:mn>9.4</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.7</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">25&#x2013;60</td>
</tr>
<tr>
<td align="left">VAE baseline</td>
<td align="center">
<inline-formula id="inf231">
<mml:math id="m256">
<mml:mrow>
<mml:mn>81.03</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.76</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf232">
<mml:math id="m257">
<mml:mrow>
<mml:mn>0.082</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.005</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf233">
<mml:math id="m258">
<mml:mrow>
<mml:mn>0.102</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.007</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf234">
<mml:math id="m259">
<mml:mrow>
<mml:mn>1.55</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.09</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf235">
<mml:math id="m260">
<mml:mrow>
<mml:mn>7.8</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.6</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">25&#x2013;60</td>
</tr>
<tr>
<td align="left">MorphoTensor (ours)</td>
<td align="center">
<inline-formula id="inf236">
<mml:math id="m261">
<mml:mrow>
<mml:mn mathvariant="bold">86.42</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.60</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf237">
<mml:math id="m262">
<mml:mrow>
<mml:mn mathvariant="bold">0.048</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.004</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf238">
<mml:math id="m263">
<mml:mrow>
<mml:mn mathvariant="bold">0.061</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.005</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf239">
<mml:math id="m264">
<mml:mrow>
<mml:mn mathvariant="bold">0.71</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.05</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf240">
<mml:math id="m265">
<mml:mrow>
<mml:mn mathvariant="bold">3.2</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn mathvariant="bold">0.4</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">25&#x2013;60</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold value indicates the experimental index values obtained using our method in the experiment.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>All structural metrics are computed on normalized microstructure fields. 2-point correlation functions are evaluated over images scaled to [0, 1], and length-based descriptors (e.g., chord length and phase size) are expressed as fractions of the total image width. These conventions ensure comparability across datasets of different spatial resolutions.</p>
</sec>
<sec id="s4-4">
<label>4.4</label>
<title>Feature removal study</title>
<p>Results on general datasets (HEDM and HTEM) provide a better understanding of the model's baseline performance in large-scale environments.</p>
<p>
<xref ref-type="table" rid="T6">Table 6</xref> demonstrates the effects of different components of the MorphoTensor generative pipeline on the classification performance. The inclusion of synthetic microstructures generated under topological and physical constraints consistently improves the accuracy, recall, and AUC. Ablating topology loss or latent warping results in a degradation of performance, indicating that these modules are critical for preserving structural consistency in generated samples and enhancing classifier robustness.</p>
<table-wrap id="T6" position="float">
<label>TABLE 6</label>
<caption>
<p>Ablation study on the impact of MorphoTensor pipeline components in classification tasks (EBSD dataset).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Configuration</th>
<th align="center">Accuracy</th>
<th align="center">Recall</th>
<th align="center">F1 score</th>
<th align="center">AUC</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Baseline ResNet50 (no augmentation)</td>
<td align="center">82.51</td>
<td align="center">80.12</td>
<td align="center">81.15</td>
<td align="center">85.33</td>
</tr>
<tr>
<td align="left">&#x2b; MorphoTensor synthetic pretraining</td>
<td align="center">
<bold>88.93</bold>
</td>
<td align="center">
<bold>87.02</bold>
</td>
<td align="center">
<bold>87.61</bold>
</td>
<td align="center">
<bold>91.05</bold>
</td>
</tr>
<tr>
<td align="left">&#x2b; MorphoTensor w/o topology loss</td>
<td align="center">86.27</td>
<td align="center">84.15</td>
<td align="center">84.80</td>
<td align="center">88.62</td>
</tr>
<tr>
<td align="left">&#x2b; MorphoTensor w/o constraint projection</td>
<td align="center">85.49</td>
<td align="center">83.11</td>
<td align="center">83.76</td>
<td align="center">87.91</td>
</tr>
<tr>
<td align="left">&#x2b; MorphoTensor w/o latent warping</td>
<td align="center">86.02</td>
<td align="center">84.03</td>
<td align="center">84.52</td>
<td align="center">88.33</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold value indicates the experimental index values obtained using our method in the experiment.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>
<xref ref-type="table" rid="T7">Table 7</xref> presents the computational overhead introduced by the persistent homology (PH) loss during training on the EBSD dataset. Compared to the baseline configuration without PH losses, the inclusion of topology-aware regularization increases the per-epoch training time by approximately 28% and incurs a modest memory overhead of 319 MB. On a per-batch basis, the training time increases from 98.3 m to 126.0 m. Despite this increase in cost, the performance gains in topological fidelity and microstructure realism (as discussed below) justify the additional computation, especially in high-stakes applications involving materials informatics or microstructure-sensitive design tasks.</p>
<table-wrap id="T7" position="float">
<label>TABLE 7</label>
<caption>
<p>Runtime and memory overhead from persistent homology losses (EBSD dataset).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Configuration</th>
<th align="center">Per epoch time (s)</th>
<th align="center">Per batch time (ms)</th>
<th align="center">GPU memory (MB)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Without PH loss</td>
<td align="center">101.4</td>
<td align="center">98.3</td>
<td align="center">3968</td>
</tr>
<tr>
<td align="left">With PH loss <inline-formula id="inf241">
<mml:math id="m266">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>topo</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.5</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<bold>130.1</bold>
</td>
<td align="center">
<bold>126.0</bold>
</td>
<td align="center">
<bold>4287</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold value indicates the experimental index values obtained using our method in the experiment.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>
<xref ref-type="table" rid="T8">Table 8</xref> compares the performance of three configurations on a publicly available EBSD dataset using both classification metrics and microstructure-aware topology metrics. While the CNN baseline achieves acceptable accuracy, it performs poorly on structural metrics such as two-point correlation error, chord length KL divergence (measured as a fraction of image width), and persistence diagram distance. Incorporating the MorphoTensor model without topology loss improves both accuracy and structure preservation. However, the full model with persistent homology loss achieves the best results across all metrics, indicating that topological regularization significantly enhances the geometric and physical plausibility of the generated or processed microstructures. This validates the core hypothesis of our work&#x2014;that topology-aware generative modeling leads to structurally faithful representations in computational materials science.</p>
<table-wrap id="T8" position="float">
<label>TABLE 8</label>
<caption>
<p>Evaluation of MorphoTensor on EBSD dataset (microstructure-aware metrics).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Model</th>
<th align="center">Accuracy</th>
<th align="center">2-Point Corr. <inline-formula id="inf242">
<mml:math id="m267">
<mml:mrow>
<mml:mi>&#x2193;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">Chord length KL <inline-formula id="inf243">
<mml:math id="m268">
<mml:mrow>
<mml:mi>&#x2193;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">W1(PH) <inline-formula id="inf244">
<mml:math id="m269">
<mml:mrow>
<mml:mi>&#x2193;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">Euler Diff. <inline-formula id="inf245">
<mml:math id="m270">
<mml:mrow>
<mml:mi>&#x2193;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">Training time (h)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">CNN baseline</td>
<td align="center">82.51</td>
<td align="center">0.072</td>
<td align="center">0.105</td>
<td align="center">1.84</td>
<td align="center">8.2</td>
<td align="center">3.1</td>
</tr>
<tr>
<td align="left">MorphoTensor (no PH loss)</td>
<td align="center">86.10</td>
<td align="center">0.051</td>
<td align="center">0.068</td>
<td align="center">1.25</td>
<td align="center">4.4</td>
<td align="center">4.3</td>
</tr>
<tr>
<td align="left">MorphoTensor (w/PH loss)</td>
<td align="center">
<bold>88.93</bold>
</td>
<td align="center">
<bold>0.031</bold>
</td>
<td align="center">
<bold>0.043</bold>
</td>
<td align="center">
<bold>0.69</bold>
</td>
<td align="center">
<bold>2.7</bold>
</td>
<td align="center">5.5</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold value indicates the experimental index values obtained using our method in the experiment.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>For full reproducibility, we provide an anonymous code repository that includes training scripts, model configurations, and instructions for dataset preparation: <ext-link ext-link-type="uri" xlink:href="https://snippets.cacher.io/snippet/0a9c95f0fa961047e7cd">https://snippets.cacher.io/snippet/0a9c95f0fa961047e7cd</ext-link>. The repository covers dataset-specific preprocessing (e.g., EBSD map cleaning and simulation of phase-field morphologies), hardware-agnostic training pipelines, and logging templates. This enables independent validation of all reported results. Upon final publication, this repository will be made publicly available under an open-source license.</p>
</sec>
</sec>
<sec id="s5">
<label>5</label>
<title>Conclusions and future work</title>
<p>This work aims to overcome key shortcomings of conventional deep learning methods when applied to microstructural image analysis in the field of computational materials science. Existing methods often struggle with preserving the complex topology and spatial heterogeneity that characterize real-world material microstructures. To overcome this, we developed a novel deep learning framework built around the structured generative model MorphoTensor, which integrates physical, geometric, and topological priors into the learning process. This model introduces hierarchical tensorial embeddings that capture crucial characteristics such as directionality, anisotropy, and spatial locality. We proposed a Topology-Aware Latent Refinement (TALR) strategy, which leverages persistent homology and differentiable Betti numbers to ensure topological fidelity. This comprehensive design allows the model to unify statistical encoding, topological analysis, and latent space alignment. Our experiments, conducted across both synthetic and real microscopy datasets, confirm substantial improvements in classification accuracy, robustness, and generalization compared to conventional convolutional networks and autoencoders.</p>
<p>While the results are promising, there remain some limitations. First, the model&#x2019;s reliance on complex topological tools such as persistent homology introduces computational overhead, which could hinder scalability in real-time or high-throughput applications. Second, although our model generalizes well across datasets, its performance on entirely unseen microstructural morphologies especially those outside the training distribution&#x2014;still warrants further investigation. Looking ahead, future research should aim to enhance the computational efficiency of the TALR module and expand the framework to accommodate 3D volumetric datasets. Integrating active learning and physics-based simulation feedback loops also presents a compelling direction for enhancing the adaptability and physical validity of learned representations.</p>
<p>While our current framework operates on 2D microstructural images, the core components&#x2014;tensorized encoding, spatial warping, topological losses, and physical constraints&#x2014;generalize naturally to 3D volumetric data. Extending MorphoTensor to 3D would involve using 3D convolutional operators in the encoder and decoder, volumetric warping fields regularized via 3D Jacobian determinants, and persistent homology computed over voxelized inputs. Notably, efficient algorithms for computing persistent diagrams in 3D exist and can be integrated into the current training pipeline. This direction is particularly promising for applications involving X-ray tomography, 3D EBSD, or synthetic phase-field volumes and represents a key avenue for future work.</p>
<p>Beyond the architectural and learning-based contributions, our framework offers tangible benefits for downstream materials science workflows. For instance, the accurate preservation of chord-length distributions directly supports permeability estimation in porous materials and fatigue life modeling in polycrystalline alloys, where feature spacing and persistence affect crack initiation. Similarly, the ability to reproduce orientation distributions from EBSD-like inputs enhances the predictive modeling of anisotropic mechanical properties, such as elastic modulus and thermal expansion. By maintaining topological consistency and structural diversity, our method strengthens microstructure&#x2013;property linkages critical to alloy design loops, defect screening, and performance certification in computational materials pipelines. These connections highlight the broader utility of topology-aware generative modeling in practical materials informatics tasks.</p>
<p>While additional experiments on HEDM and HTEM datasets are included in the Appendix as transfer baselines, the main evidence of our method is provided by EBSD and phase-field datasets, which are most representative of microstructural analysis.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material. The code supporting the findings of this study is available via Cacher: <ext-link ext-link-type="uri" xlink:href="https://snippets.cacher.io/snippet/0a9c95f0fa961047e7cd">https://snippets.cacher.io/snippet/0a9c95f0fa961047e7cd</ext-link>. Further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>HL: Conceptualization, Methodology, Software, Validation, Writing &#x2013; original draft. PZ: Formal analysis, Investigation, Data curation, Writing &#x2013; original draft. CT: Writing &#x2013; original draft, writing &#x2013; review and editing, Visualization, Supervision, Funding acquisition.</p>
</sec>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>The author declares that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The authors declare that no Generative AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<fn-group>
<fn fn-type="custom" custom-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2894871/overview">Shahed Rezaei</ext-link>, Access e.V., Germany</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1899629/overview">Alexandre Viardin</ext-link>, Access e.V., Germany</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3120218/overview">Aleena Baby</ext-link>, Access e.V., Germany</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3241015/overview">Nayan Kumar Sarkar</ext-link>, IILM University, India</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alotaibi</surname>
<given-names>J. G.</given-names>
</name>
<name>
<surname>Eid Alajmi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kadirgama</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Samylingam</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Aslfattahi</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Kok</surname>
<given-names>C. K.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>Enhancing engine oil performance with graphene-cellulose nanoparticles: insights into thermophysical properties and tribological behavior</article-title>. <source>Front. Mater.</source> <volume>12</volume>, <fpage>1549117</fpage>. <pub-id pub-id-type="doi">10.3389/fmats.2025.1549117</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ashtiani</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Geers</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Aflatouni</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>An on-chip photonic deep neural network for image classification</article-title>. <source>Nature</source> <volume>606</volume>, <fpage>501</fpage>&#x2013;<lpage>506</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-022-04714-0</pub-id>
<pub-id pub-id-type="pmid">35650432</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Azizi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Mustafa</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Ryan</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Beaver</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Freyberg</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Deaton</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). &#x201c;<article-title>Big self-supervised models advance medical image classification</article-title>,&#x201d; in <source>IEEE international conference on computer vision</source>.</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bazi</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Bashmal</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Rahhal</surname>
<given-names>M. M. A.</given-names>
</name>
<name>
<surname>Dayil</surname>
<given-names>R. A.</given-names>
</name>
<name>
<surname>Ajlan</surname>
<given-names>N. A.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Vision transformers for remote sensing image classification</article-title>. <source>Remote Sens.</source> <volume>13</volume>, <fpage>516</fpage>. <pub-id pub-id-type="doi">10.3390/rs13030516</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Bhojanapalli</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Chakrabarti</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Glasner</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Unterthiner</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Veit</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Understanding robustness of transformers for image classification</article-title>,&#x201d; in <source>IEEE international conference on computer vision</source>.</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bostanabad</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Kearney</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Brinson</surname>
<given-names>L. C.</given-names>
</name>
<name>
<surname>Apley</surname>
<given-names>D. W.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Computational microstructure characterization and reconstruction: review of the state-of-the-art techniques</article-title>. <source>Prog. Mater. Sci.</source> <volume>95</volume>, <fpage>1</fpage>&#x2013;<lpage>41</lpage>. <pub-id pub-id-type="doi">10.1016/j.pmatsci.2018.01.005</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>C.-F.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Panda</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2021a</year>). &#x201c;<article-title>Crossvit: Cross-attention multi-scale vision transformer for image classification</article-title>,&#x201d; in <source>IEEE international conference on computer vision</source>.</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bai</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Miao</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2021b</year>). <article-title>Review of image classification algorithms based on convolutional neural networks</article-title>. <source>Remote Sens.</source> <volume>13</volume>, <fpage>4712</fpage>. <pub-id pub-id-type="doi">10.3390/rs13224712</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dai</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Transmed: transformers advance multi-modal medical image classification</article-title>. <source>Diagnostics</source> <volume>11</volume>, <fpage>1384</fpage>. <pub-id pub-id-type="doi">10.3390/diagnostics11081384</pub-id>
<pub-id pub-id-type="pmid">34441318</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>DeCost</surname>
<given-names>B. L.</given-names>
</name>
<name>
<surname>Holm</surname>
<given-names>E. A.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>A computer vision approach for automated analysis and classification of microstructural image data</article-title>. <source>Comput. Mater. Sci.</source> <volume>110</volume>, <fpage>126</fpage>&#x2013;<lpage>133</lpage>. <pub-id pub-id-type="doi">10.1016/j.commatsci.2015.08.011</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dong</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zou</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2022a</year>). <article-title>Exploring vision transformers for polarimetric sar image classification</article-title>. <source>IEEE Trans. Geoscience Remote Sens.</source> <volume>60</volume>, <fpage>1</fpage>&#x2013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1109/tgrs.2021.3137383</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dong</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Bao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Gu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>W.</given-names>
</name>
<etal/>
</person-group> (<year>2022b</year>). <article-title>Clip itself is a strong fine-tuner: achieving 85.7% and 88.0% top-1 accuracy with vit-b and vit-l on imagenet</article-title>. <source>arXiv Prepr. arXiv:2212.06138</source>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/2212.06138">https://arxiv.org/abs/2212.06138</ext-link>
</comment>.</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Elpeltagy</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sallam</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Automatic prediction of covid- 19 from chest images using modified resnet50</article-title>. <source>Multimedia tools Appl.</source> <volume>80</volume>, <fpage>26451</fpage>&#x2013;<lpage>26463</lpage>. <pub-id pub-id-type="doi">10.1007/s11042-021-10783-6</pub-id>
<pub-id pub-id-type="pmid">33967592</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Feng</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Conv2next: reconsidering conv next network design for image recognition</article-title>,&#x201d; in <source>2022 international conference on computers and artificial intelligence technologies (CAIT)</source> (<publisher-name>IEEE</publisher-name>), <fpage>53</fpage>&#x2013;<lpage>60</lpage>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://ieeexplore.ieee.org/abstract/document/10072172">https://ieeexplore.ieee.org/abstract/document/10072172</ext-link>
</comment>.</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hong</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Plaza</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Chanussot</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Graph convolutional networks for hyperspectral image classification</article-title>. <source>IEEE Trans. Geoscience Remote Sens.</source> <volume>59</volume>, <fpage>5966</fpage>&#x2013;<lpage>5978</lpage>. <pub-id pub-id-type="doi">10.1109/tgrs.2020.3015157</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hong</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Plaza</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Spectralformer: rethinking hyperspectral image classification with transformers</article-title>. <source>IEEE Trans. Geoscience Remote Sens.</source> <volume>60</volume>, <fpage>1</fpage>&#x2013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1109/tgrs.2021.3130716</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jackson</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Owsiak</surname>
<given-names>A. P.</given-names>
</name>
<name>
<surname>Goertz</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Diehl</surname>
<given-names>P. F.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Getting to the root of the issue (s): expanding the study of issues in mids (the mid-issue dataset, version 1.0)</article-title>. <source>J. Confl. Resolut.</source> <volume>66</volume>, <fpage>1514</fpage>&#x2013;<lpage>1542</lpage>. <pub-id pub-id-type="doi">10.1177/00220027221080967</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kalidindi</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>De Graef</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Materials data science: current status and future outlook</article-title>. <source>Annu. Rev. Mater. Res.</source> <volume>45</volume>, <fpage>171</fpage>&#x2013;<lpage>193</lpage>. <pub-id pub-id-type="doi">10.1146/annurev-matsci-070214-020844</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Kim</surname>
<given-names>H. E.</given-names>
</name>
<name>
<surname>Cosa-Linan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Santhanam</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Jannesari</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Maros</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ganslandt</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2022</year>). <source>Transfer learning for medical image classification: a literature review</source>. <publisher-name>BMC Medical Imaging</publisher-name>.<comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://link.springer.com/article/10.1186/s12880-022-00793-7">https://link.springer.com/article/10.1186/s12880-022-00793-7</ext-link>
</comment>.</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Koonce</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2021a</year>). &#x201c;<article-title>Efficientnet</article-title>,&#x201d; in <source>Convolutional neural networks with swift for tensorflow: image recognition and dataset categorization</source> (<publisher-name>Springer</publisher-name>), <fpage>109</fpage>&#x2013;<lpage>123</lpage>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://link.springer.com/book/10.1007/978-1-4842-6168-2">https://link.springer.com/book/10.1007/978-1-4842-6168-2</ext-link>
</comment>.</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Koonce</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2021b</year>). &#x201c;<article-title>Mobilenetv3</article-title>,&#x201d; in <source>In <italic>Convolutional neural networks with swift for tensorflow: image recognition and dataset categorization</italic>
</source>. <publisher-name>Springer</publisher-name>, <fpage>125</fpage>&#x2013;<lpage>144</lpage>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://link.springer.com/book/10.1007/978-1-4842-6168-2">https://link.springer.com/book/10.1007/978-1-4842-6168-2</ext-link>
</comment>.</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Eliceiri</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Dual-stream multiple instance learning network for whole slide image classification with self-supervised contrastive learning</article-title>,&#x201d; in <source>Computer vision and pattern recognition</source>.</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Research progress of amorphous micro-nano structured materials</article-title>. <source>Front. Mater.</source> <volume>12</volume>, <fpage>1589830</fpage>. <pub-id pub-id-type="doi">10.3389/fmats.2025.1589830</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mai</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Jeong</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Quispe</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>H. J.</given-names>
</name>
<name>
<surname>Sanner</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Online continual learning in image classification: an empirical survey</article-title>. <source>Neurocomputing</source> <volume>469</volume>, <fpage>28</fpage>&#x2013;<lpage>51</lpage>. <pub-id pub-id-type="doi">10.1016/j.neucom.2021.10.021</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Masana</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Twardowski</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Menta</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Bagdanov</surname>
<given-names>A. D.</given-names>
</name>
<name>
<surname>van de Weijer</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Class-incremental learning: survey and performance evaluation on image classification</article-title>. <source>IEEE Trans. Pattern Analysis Mach. Intell.</source> <volume>45</volume>, <fpage>5513</fpage>&#x2013;<lpage>5533</lpage>. <pub-id pub-id-type="doi">10.1109/tpami.2022.3213473</pub-id>
<pub-id pub-id-type="pmid">36215375</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Mascarenhas</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Agarwal</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>A comparison between vgg16, vgg19 and resnet50 architecture frameworks for image classification</article-title>,&#x201d; in <source>2021 international conference on disruptive technologies for multi-disciplinary research and applications (CENTCON)</source>.</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maur&#xed;cio</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Domingues</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Bernardino</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Comparing vision transformers and convolutional neural networks for image classification: a literature review</article-title>. <source>Appl. Sci</source>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.mdpi.com/2076-3417/13/9/5521">https://www.mdpi.com/2076-3417/13/9/5521</ext-link>
</comment>.</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Muralikrishnan</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Conry</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Marvel</surname>
<given-names>C. J.</given-names>
</name>
<name>
<surname>Harmer</surname>
<given-names>M. P.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Observations of unexpected grain boundary migration in srtio3</article-title>. <source>Scr. Mater.</source> <volume>222</volume>, <fpage>115055</fpage>. <pub-id pub-id-type="doi">10.1016/j.scriptamat.2022.115055</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peng</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Ning</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Du</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Domain adaptation in remote sensing image classification: a survey</article-title>. <source>IEEE J. Sel. Top. Appl. Earth Observations Remote Sens.</source> <volume>15</volume>, <fpage>9842</fpage>&#x2013;<lpage>9859</lpage>. <pub-id pub-id-type="doi">10.1109/jstars.2022.3220875</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Global filter networks for image classification</article-title>. <source>Neural Inf. Process. Syst</source>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://proceedings.neurips.cc/paper/2021/hash/07e87c2f4fc7f7c96116d8e2a92790f5-Abstract.html">https://proceedings.neurips.cc/paper/2021/hash/07e87c2f4fc7f7c96116d8e2a92790f5-Abstract.html</ext-link>
</comment>.</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rezaei</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Asl</surname>
<given-names>R. N.</given-names>
</name>
<name>
<surname>Taghikhani</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Moeineddin</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kaliske</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Apel</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2024a</year>). <article-title>Finite operator learning: bridging neural operators and numerical methods for efficient parametric solution and optimization of pdes</article-title>. <source>arXiv Prepr. arXiv:2407.04157</source>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/2407.04157">https://arxiv.org/abs/2407.04157</ext-link>
</comment>.</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rezaei</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Moeineddin</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Harandi</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2024b</year>). <article-title>Learning solutions of thermodynamics-based nonlinear constitutive material models using physics-informed neural networks</article-title>. <source>Comput. Mech.</source> <volume>74</volume>, <fpage>333</fpage>&#x2013;<lpage>366</lpage>. <pub-id pub-id-type="doi">10.1007/s00466-023-02435-3</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rezaei</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Asl</surname>
<given-names>R. N.</given-names>
</name>
<name>
<surname>Faroughi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Asgharzadeh</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Harandi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Koopas</surname>
<given-names>R. N.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>A finite operator learning technique for mapping the elastic properties of microstructures to their mechanical deformations</article-title>. <source>Int. J. Numer. Methods Eng.</source> <volume>126</volume>, <fpage>e7637</fpage>. <pub-id pub-id-type="doi">10.1002/nme.7637</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Roy</surname>
<given-names>S. K.</given-names>
</name>
<name>
<surname>Deria</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Hong</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Rasti</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Plaza</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Chanussot</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Multimodal fusion transformer for remote sensing image classification</article-title>. <source>IEEE Trans. Geoscience Remote Sens.</source> <volume>61</volume>, <fpage>1</fpage>&#x2013;<lpage>20</lpage>. <pub-id pub-id-type="doi">10.1109/tgrs.2023.3286826</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ru</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Fang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>Rheology, permeability and microstructure of seawater-based slurry for slurry shield tunneling: insights from laboratory tests</article-title>. <source>Front. Mater.</source> <volume>12</volume>, <fpage>1592537</fpage>. <pub-id pub-id-type="doi">10.3389/fmats.2025.1592537</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sheykhmousa</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mahdianpari</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ghanbari</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Mohammadimanesh</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Ghamisi</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Homayouni</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Support vector machine <italic>versus</italic> random forest for remote sensing image classification: a meta-analysis and systematic review</article-title>. <source>IEEE J. Sel. Top. Appl. Earth Observations Remote Sens.</source> <volume>13</volume>, <fpage>6308</fpage>&#x2013;<lpage>6325</lpage>. <pub-id pub-id-type="doi">10.1109/jstars.2020.3026724</pub-id>
</mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Steingrimsson</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Agrawal</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Kulkarni</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Thoma</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Liaw</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Construction of multi-dimensional functions for optimization of additive-manufacturing process parameters</article-title>. <source>arXiv Prepr. arXiv:2311.06398</source>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/2311.06398">https://arxiv.org/abs/2311.06398</ext-link>
</comment>
</mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Spectral&#x2013;spatial feature tokenization transformer for hyperspectral image classification</article-title>. <source>IEEE Trans. Geoscience Remote Sens.</source> <volume>60</volume>, <fpage>1</fpage>&#x2013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1109/tgrs.2022.3144158</pub-id>
</mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Taori</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Dave</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Shankar</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Carlini</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Recht</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Schmidt</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Measuring robustness to natural distribution shifts in image classification</article-title>. <source>Neural Inf. Process. Syst</source>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://proceedings.neurips.cc/paper/2020/hash/d8330f857a17c53d217014ee776bfd50-Abstract.html">https://proceedings.neurips.cc/paper/2020/hash/d8330f857a17c53d217014ee776bfd50-Abstract.html</ext-link>
</comment>.</mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Tian</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Krishnan</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Tenenbaum</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Isola</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Rethinking few-shot image classification: a good embedding is all you need?</article-title>,&#x201d; in <source>European conference on computer vision</source>.</mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Touvron</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Bojanowski</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Caron</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Cord</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>El-Nouby</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Grave</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Resmlp: feedforward networks for image classification with data-efficient training</article-title>. <source>IEEE Trans. Pattern Analysis Mach. Intell.</source> <volume>45</volume>, <fpage>5314</fpage>&#x2013;<lpage>5321</lpage>. <pub-id pub-id-type="doi">10.1109/tpami.2022.3206148</pub-id>
<pub-id pub-id-type="pmid">36094972</pub-id>
</mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Touvron</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Cord</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>J&#xe9;gou</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Deit iii: revenge of the vit</article-title>,&#x201d; in <source>European conference on computer vision</source> (<publisher-name>Springer</publisher-name>), <fpage>516</fpage>&#x2013;<lpage>533</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-031-20053-3_30</pub-id>
</mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>W.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Transformer-based unsupervised contrastive learning for histopathological image classification</article-title>. <source>Med. Image Anal.</source> <volume>81</volume>, <fpage>102559</fpage>. <pub-id pub-id-type="doi">10.1016/j.media.2022.102559</pub-id>
<pub-id pub-id-type="pmid">35952419</pub-id>
</mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ke</surname>
<given-names>B.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Medmnist v2 - a large-scale lightweight benchmark for 2d and 3d biomedical image classification</article-title>. <source>Sci. Data</source> <volume>10</volume>, <fpage>41</fpage>. <pub-id pub-id-type="doi">10.1038/s41597-022-01721-8</pub-id>
<pub-id pub-id-type="pmid">36658144</pub-id>
</mixed-citation>
</ref>
<ref id="B45">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Deepemd: Few-shot image classification with differentiable earth mover&#x2019;s distance and structured classifiers</article-title>. <source>Comput. Vis. Pattern Recognit.</source>, <fpage>12200</fpage>&#x2013;<lpage>12210</lpage>. <pub-id pub-id-type="doi">10.1109/cvpr42600.2020.01222</pub-id>
</mixed-citation>
</ref>
<ref id="B46">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Tao</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Du</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Single-source domain expansion network for cross-scene hyperspectral image classification</article-title>. <source>IEEE Trans. Image Process.</source> <volume>32</volume>, <fpage>1498</fpage>&#x2013;<lpage>1512</lpage>. <pub-id pub-id-type="doi">10.1109/tip.2023.3243853</pub-id>
<pub-id pub-id-type="pmid">37027628</pub-id>
</mixed-citation>
</ref>
<ref id="B47">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Enhancing weld seam recognition in industrial robotics through advanced deep learning techniques</article-title>. <source>17th Int. Sci. Pract. Conf. &#x201c;The latest Technol. Dev. Sci. Bus. Education&#x201d;(April 30&#x2013;May 03, 2024) Lond. Great Britain. Int. Sci. Group</source> <volume>2024</volume>, <fpage>446</fpage>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://books.google.com.hk/books?hl=zh-CN&#x26;lr=&#x26;id=OTEZEQAAQBAJ&#x26;oi=fnd&#x26;pg=PA390&#x26;dq=Enhancing+weld+seam+recognition++in&#x26;&#x23;+;industrial+robotics+through+advanced+deep+learning+techniques&#x26;ots=Szgf9H1JJW&#x26;sig=DrnIpUqaVZeYFotbqutg9qCHPQI&#x26;redir_esc=y#v=onepage&#x26;q=Enhancing%20weld%20seam%20recognition%20%20in%20industrial%20robotics%20through%20advanced%20deep%20learning%20techniques&#x26;f=false">https://books.google.com.hk/books?hl&#x3d;zh-CN&#x26;lr&#x3d;&#x26;id&#x3d;OTEZEQAAQBAJ&#x26;oi&#x3d;fnd&#x26;pg&#x3d;PA390&#x26;dq&#x3d;Enhancing&#x2b;weld&#x2b;seam&#x2b;recognition&#x2b;&#x2b;in&#x26;&#x23;&#x2b;;industrial&#x2b;robotics&#x2b;through&#x2b;advanced&#x2b;deep&#x2b;learning&#x2b;techniques&#x26;ots&#x3d;Szgf9H1JJW&#x26;sig&#x3d;DrnIpUqaVZeYFotbqutg9qCHPQI&#x26;redir_esc&#x3d;y#v&#x3d;onepage&#x26;q&#x3d;Enhancing%20weld%20seam%20recognition%20%20in%20industrial%20robotics%20through%20advanced%20deep%20learning%20techniques&#x26;f&#x3d;false</ext-link>
</comment>.</mixed-citation>
</ref>
<ref id="B48">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Rotation-invariant attention network for hyperspectral image classification</article-title>. <source>IEEE Trans. Image Process.</source> <volume>31</volume>, <fpage>4251</fpage>&#x2013;<lpage>4265</lpage>. <pub-id pub-id-type="doi">10.1109/tip.2022.3177322</pub-id>
<pub-id pub-id-type="pmid">35635815</pub-id>
</mixed-citation>
</ref>
<ref id="B49">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhuang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ke</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bian</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Deep subdomain adaptation network for image classification</article-title>. <source>IEEE Trans. Neural Netw. Learn. Syst.</source> <volume>32</volume>, <fpage>1713</fpage>&#x2013;<lpage>1722</lpage>. <pub-id pub-id-type="doi">10.1109/tnnls.2020.2988928</pub-id>
<pub-id pub-id-type="pmid">32365037</pub-id>
</mixed-citation>
</ref>
</ref-list>
</back>
</article>