<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Bioeng. Biotechnol.</journal-id>
<journal-title>Frontiers in Bioengineering and Biotechnology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Bioeng. Biotechnol.</abbrev-journal-title>
<issn pub-type="epub">2296-4185</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1465823</article-id>
<article-id pub-id-type="doi">10.3389/fbioe.2024.1465823</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Bioengineering and Biotechnology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>VP-net: an end-to-end deep learning network for elastic wave velocity prediction in human skin <italic>in vivo</italic> using optical coherence elastography</article-title>
<alt-title alt-title-type="left-running-head">Zhang et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fbioe.2024.1465823">10.3389/fbioe.2024.1465823</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Zhang</surname>
<given-names>Yilong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/751513/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Liao</surname>
<given-names>Jinpeng</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2793439/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Feng</surname>
<given-names>Zhengshuyi</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2586680/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yang</surname>
<given-names>Wenyue</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Perelli</surname>
<given-names>Alessandro</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2837964/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Zhiqiong</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1386947/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Li</surname>
<given-names>Chunhui</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1746670/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Huang</surname>
<given-names>Zhihong</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1239496/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Centre of Medical Engineering and Technology</institution>, <institution>University of Dundee</institution>, <addr-line>Dundee</addr-line>, <country>United Kingdom</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>School of Physics and Engineering Technology</institution>, <institution>University of York</institution>, <addr-line>York</addr-line>, <country>United Kingdom</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>College of Medicine and Biological Information Engineering</institution>, <institution>Northeastern University</institution>, <addr-line>Shenyang</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/103266/overview">Yang Liu</ext-link>, Hong Kong Polytechnic University, Hong Kong, SAR China</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2809797/overview">Gwanghyun Jo</ext-link>, Hanyang University, Republic of Korea</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1068512/overview">Kristen M. Meiburger</ext-link>, Polytechnic University of Turin, Italy</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Chunhui Li, <email>c.li@dundee.ac.uk</email>
</corresp>
<fn fn-type="equal" id="fn001">
<label>
<sup>&#x2020;</sup>
</label>
<p>These authors have contributed equally to this work</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>14</day>
<month>10</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>12</volume>
<elocation-id>1465823</elocation-id>
<history>
<date date-type="received">
<day>16</day>
<month>07</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>30</day>
<month>09</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Zhang, Liao, Feng, Yang, Perelli, Wang, Li and Huang.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Zhang, Liao, Feng, Yang, Perelli, Wang, Li and Huang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Acne vulgaris, one of the most common skin conditions, affects up to 85% of late adolescents, currently no universally accepted assessment system. The biomechanical properties of skin provide valuable information for the assessment and management of skin conditions. Wave-based optical coherence elastography (OCE) quantitatively assesses these properties of tissues by analyzing induced elastic wave velocities. However, velocity estimation methods require significant expertise and lengthy image processing times, limiting the clinical translation of OCE technology. Recent advances in machine learning offer promising solutions to simplify velocity estimation process.</p>
</sec>
<sec>
<title>Methods</title>
<p>In this study, we proposed a novel end-to-end deep-learning model, named velocity prediction network (VP-Net), aiming to accurately predict elastic wave velocity from raw OCE data of in vivo healthy and abnormal human skin. A total of 16,424 raw phase slices from 1% to 5% agar-based tissue-mimicking phantoms, 28,270 slices from in vivo human skin sites including the palm, forearm, back of the hand from 16 participants, and 580 slices of facial closed comedones were acquired to train, validate, and test VP-Net.</p>
</sec>
<sec>
<title>Results</title>
<p>VP-Net demonstrated highly accurate velocity prediction performance compared to other deep-learning-based methods, as evidenced by small evaluation metrics. Furthermore, VP-Net exhibited low model complexity and parameter requirements, enabling end-to-end velocity prediction from a single raw phase slice in 1.32&#xa0;ms, enhancing processing speed by a factor of &#x223c;100 compared to a conventional wave velocity estimation method. Additionally, we employed gradient-weighted class activation maps to showcase VP-Net&#x2019;s proficiency in discerning wave propagation patterns from raw phase slices. VP-Net predicted wave velocities that were consistent with the ground truth velocities in agar phantom, two age groups (20s and 30s) of multiple human skin sites and closed comedones datasets.</p>
</sec>
<sec>
<title>Discussion</title>
<p>This study indicates that VP-Net could rapidly and accurately predict elastic wave velocities related to biomechanical properties of <italic>in vivo</italic> healthy and abnormal skin, offering potential clinical applications in characterizing skin aging, as well as assessing and managing the treatment of acne vulgaris.</p>
</sec>
</abstract>
<kwd-group>
<kwd>optical coherence elastography</kwd>
<kwd>deep learning</kwd>
<kwd>convolutional neuronal network (CNN)</kwd>
<kwd>surface acoustic wave (SAW)</kwd>
<kwd>agar-based tissue-mimicking phantoms</kwd>
<kwd>
<italic>In vivo</italic> human skin</kwd>
<kwd>closed comedones</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Biomechanics</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Skin, as the body&#x2019;s largest organ, serves to regulate body fluid and temperature and forms a protective barrier shielding the organism against pathogens and injuries from the environment (<xref ref-type="bibr" rid="B36">Proksch et al., 2008</xref>). Skin disease is one of the most common human illnesses, affecting nearly 900 million people, more than one-third of the global population (<xref ref-type="bibr" rid="B10">Hay et al., 2014</xref>). Among these, acne vulgaris is a prevalent chronic skin inflammatory disease affecting up to 85% of late adolescents (<xref ref-type="bibr" rid="B30">Lynn et al., 2016</xref>), resulting in various consequences, including scarring, dyspigmentation, and psychological impacts (<xref ref-type="bibr" rid="B35">Og&#xe9; et al., 2019</xref>). However, there is currently no universally accepted assessment system for acne vulgaris.</p>
<p>The biomechanical properties of skin are primarily determined by its structural components (<xref ref-type="bibr" rid="B14">Joodaki and Panzer, 2018</xref>). Elastography is the functional modality to provide information on the biomechanical properties of tissues. Among different elastography modalities, optical coherence elastography (OCE), derived from optical coherence tomography (OCT), has an ultra-fast sampling rate, micrometer imaging resolutions and millimeter depth penetration (&#x223c;one to two&#xa0;mm) (<xref ref-type="bibr" rid="B23">Larin and Sampson, 2017</xref>). A notable branch of OCE technology is wave-based OCE, an <italic>in situ</italic> non-destructive approach that quantitatively estimates biomechanical properties in soft tissues using elastic waves (<xref ref-type="bibr" rid="B25">Liang and Boppart, 2009</xref>). Biomechanical properties, especially elasticity (<xref ref-type="bibr" rid="B7">Everett and Sommers, 2013</xref>), have been proven to be a potential biomarker for characterizing skin aging (<xref ref-type="bibr" rid="B4">Couturaud et al., 1995</xref>), understanding physiology, pathological cases, and monitoring treatment (<xref ref-type="bibr" rid="B2">Balbir-Gurman et al., 2002</xref>; <xref ref-type="bibr" rid="B34">Neto et al., 2013</xref>; <xref ref-type="bibr" rid="B16">Killaars et al., 2015</xref>). In OCE, wave propagation in tissue occurs when an elastic wave is generated by excitation and then transmits through other regions of the tissue. The velocity of the wave is intrinsically related to the biomechanical properties of the tissues (<xref ref-type="bibr" rid="B18">Kirby et al., 2017</xref>). OCE&#x2019;s millimeter penetration depth confines motion measurements to regions near tissue boundaries, where surface acoustic waves (SAWs) are the dominant wave type (<xref ref-type="bibr" rid="B50">Zvietcovich and Larin, 2022</xref>). SAW velocities can be estimated by analyzing the phase term of the complex OCT signal. Typically, the phase difference between successive scans is utilized to detect sub-resolution axial differential displacement within a sample (<xref ref-type="bibr" rid="B41">Song et al., 2013</xref>), followed by the use of a time-of-flight approach to measure SAW velocities. By selecting an appropriate elasticity model, the biomechanical properties of the tissue can then be determined (<xref ref-type="bibr" rid="B50">Zvietcovich and Larin, 2022</xref>). While wave-based OCE has gained increasing interest in recent years, its application to <italic>in vivo</italic> skin conditions remains in its early stages. Two pre-clinical studies have shown the ability of wave-based OCE to characterize mechanical properties in animal models of systemic sclerosis (<xref ref-type="bibr" rid="B6">Du et al., 2016</xref>) and skin burns (<xref ref-type="bibr" rid="B28">Liu et al., 2024</xref>). However, only one wave-based OCE system has been translated to a clinical trial in human subjects for the assessment of systemic sclerosis <italic>in vivo</italic> (<xref ref-type="bibr" rid="B27">Liu et al., 2019</xref>). The major challenges limiting the clinical translation of OCE technology are the high level of expertise required and the inability to produce real-time results (<xref ref-type="bibr" rid="B43">Sun et al., 2011</xref>). In particular, biomechanical property analysis often demands complex image processing for wave feature extraction and velocity estimation (<xref ref-type="bibr" rid="B42">Song et al., 2015</xref>; <xref ref-type="bibr" rid="B19">Kirby et al., 2019</xref>), which could extend processing times to potentially several minutes or longer, limiting its use in real-time clinical settings.</p>
<p>Deep learning holds considerable promise for enhancing the efficiency of the processing of wave-based OCE by discerning and analyzing raw data. Currently, deep learning-assisted OCE analysis is still in the early stages. Schlaefer&#x2019;s group (<xref ref-type="bibr" rid="B31">Neidhardt et al., 2020</xref>; <xref ref-type="bibr" rid="B32">Neidhardt et al., 2021</xref>; <xref ref-type="bibr" rid="B33">Neidhardt et al., 2023</xref>) demonstrated elastic velocity prediction for OCE data by using convolutional neural networks (CNNs) with dense connections. These methods have been proved based on homogeneous tissue-mimicking materials (<xref ref-type="bibr" rid="B31">Neidhardt et al., 2020</xref>; <xref ref-type="bibr" rid="B32">Neidhardt et al., 2021</xref>) and <italic>ex vivo</italic> chicken heart (<xref ref-type="bibr" rid="B33">Neidhardt et al., 2023</xref>) studies. However, there are inherent differences in structural (<xref ref-type="bibr" rid="B21">Labroo et al., 2021</xref>) and physical (<xref ref-type="bibr" rid="B9">Godin and Touitou, 2007</xref>) properties between heterogeneous animal and human tissue. Additionally, involuntary movements (<xref ref-type="bibr" rid="B20">Kirkpatrick et al., 2006</xref>) and breathing motion artefacts (<xref ref-type="bibr" rid="B8">Fang et al., 2019</xref>) frequently occur during <italic>in vivo</italic> human OCE acquisitions. Consequently, their CNN models might need to adapt the intricate textures of wave patterns from <italic>in vivo</italic> human data, instead of focusing on velocity prediction, leading to less optimal for <italic>in vivo</italic> human applications.</p>
<p>In this study, we propose a novel velocity prediction network (VP-Net), that predicts bulk (body) wave velocities in <italic>vivo</italic> human healthy and abnormal skin sites from raw OCE data. The network architecture incorporates a squeeze-and-excitation (SE) block (<xref ref-type="bibr" rid="B13">Hu et al., 2018</xref>) and a separable convolution block, enabling efficient feature reuse and integration without significantly increasing model complexity. Compared to existing CNN models, VP-Net could accurately predict elastic wave velocity from each raw phase slice directly, maintaining the lowest model complexity and inference time. VP-Net demonstrated high accuracy in predicting elastic wave velocities in multiple healthy skin sites and distinguishing age-related velocity changes between 20s and 30s age groups. Closed comedones, a type of acne lesions (<xref ref-type="bibr" rid="B24">Lavers, 2014</xref>), were also investigated in this study. VP-Net&#x2019;s successfully predicted high velocities in comedones, indicating elevated skin elasticity. To the best of our knowledge, this is the first study to quantify the biomechanical properties of facial acne lesions using OCE technology and to develop an elastic wave velocity prediction model in human <italic>in vivo</italic> using deep learning. VP-Net achieved a processing speed of 1.32&#xa0;ms per slice, approximately 100 times faster than a conventional velocity estimation method. Therefore, VP-Net offers real-time elastic wave velocity prediction in human skin <italic>in vivo</italic>, providing potential clinical applications in characterizing skin aging, as well as assessing and managing the treatment of acne vulgaris.</p>
<p>Our study has five main contributions: 1) Our model demonstrated consistent and repeatable velocity predictions on tissue-mimicking phantoms, which are homogenous and have consistent biomechanical properties for each concentration. 2) To the best of our knowledge, this is the first study to deploy a deep learning method to directly predict biomechanical property-related velocities for <italic>in vivo</italic> human healthy and abnormal datasets, showcasing its potential for skin condition diagnosis. 3) We conducted a comprehensive comparison with various neural networks and an ablation study on VP-Net to validate the efficacy of our proposed model. 4) Compared to existing models, the proposed VP-Net has the fastest inference time and the lowest model complexity while providing accurate SAW velocity predictions, even when applications shifted from tissue-mimicking materials to <italic>in vivo</italic> human skin. 5) We used gradient-based class activation maps (Grad-CAM) to visualize the model&#x2019;s process in predicting velocities.</p>
<p>This paper is structured as follows: The Methods section describes the details of our proposed velocity prediction deep learning model and the OCE data processing strategies to generate raw phase slices and ground truth velocities. The Results section presents the performance metrics, ablation study, and visual explanations of our network&#x2019;s efficacy in predicting velocities. Additionally, the predicted bulk velocities of agar phantoms and healthy skin sites from participants across two age groups, as well as abnormal skin, are shown. Finally, we conclude the paper with a summary of our key contributions, a discussion on the factors affecting model performance, and potential improvements for future research.</p>
</sec>
<sec sec-type="methods" id="s2">
<title>2 Methods</title>
<sec id="s2-1">
<title>2.1 Definition of deep learning-based OCE velocity prediction pipeline</title>
<p>To facilitate accurate and fast determination of biomechanical properties, specifically Young&#x2019;s modulus, from OCE imaging, an automated prediction of bulk SAW velocity is essential. <xref ref-type="fig" rid="F1">Figure 1</xref> illustrates a schematic of our proposed OCE velocity prediction pipeline.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Schematic of the deep learning-based optical coherence tomography elastography (OCE) velocity prediction pipeline.</p>
</caption>
<graphic xlink:href="fbioe-12-1465823-g001.tif"/>
</fig>
<p>In this study, we designed our neural model to function as a linear regression model to predict SAW velocity from the input of single raw phase slices, <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msubsup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>M</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, with <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>L</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mn>300</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> where the number of images <italic>L</italic> is specified according to the dataset used in the experimental results and <inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>320</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>. The definition is provided in <xref ref-type="disp-formula" rid="e1">Equation 1</xref>:<disp-formula id="e1">
<mml:math id="m4">
<mml:mrow>
<mml:mover accent="true">
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>where <inline-formula id="inf4">
<mml:math id="m5">
<mml:mrow>
<mml:mover accent="true">
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mo>&#x2b;</mml:mo>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mtext>with&#x2009;</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes the model-predicted velocity at a given depth layer <inline-formula id="inf5">
<mml:math id="m6">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf6">
<mml:math id="m7">
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the neural model employed in our study. During the training stage, the model-predicted velocity was compared with the ground truth velocity <inline-formula id="inf7">
<mml:math id="m8">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> calculated by a conventional elastic wave velocity estimation (<xref ref-type="bibr" rid="B41">Song et al., 2013</xref>). This comparison facilitated the calculation of the training loss, which subsequently guided the updating of the model&#x2019;s trainable parameters.</p>
</sec>
<sec id="s2-2">
<title>2.2 Velocity prediction network (VP-Net) architecture</title>
<p>The architecture of our proposed velocity prediction network (VP-Net) is depicted in <xref ref-type="fig" rid="F2">Figure 2</xref>. VP-Net includes four downsample stages to extract the features and reduce the size of the feature maps from the input 2D raw phase signal in spatial-temporal dimensions, thereby predicting the velocity. VP-Net has fewer parameters and less computational demand than models like VGG16 (<xref ref-type="bibr" rid="B40">Simonyan and Zisserman, 2014</xref>) and ResNet18 (<xref ref-type="bibr" rid="B11">He et al., 2016</xref>), significantly reducing the resources for model inference and training. VP-Net is mainly formed with three blocks: convolution-batch normalization-ReLU (CBR) block, separable convolution block, and SE-Block. The network is described in detail in the following sections.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Architecture of VP-Net model. <bold>(A)</bold> CBR Block. <bold>(B)</bold> Separable Conv Block. <bold>(C)</bold> SE-Block.</p>
</caption>
<graphic xlink:href="fbioe-12-1465823-g002.tif"/>
</fig>
<sec id="s2-2-1">
<title>2.2.1 CBR block</title>
<p>As shown in <xref ref-type="fig" rid="F2">Figure 2A</xref>, the CBR Block consists of a 2D convolution layer (Conv2D), a batch normalization layer (BN), and a ReLU activation layer. Taking the input is <inline-formula id="inf8">
<mml:math id="m9">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mtext>in</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and the output is <inline-formula id="inf9">
<mml:math id="m10">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mtext>out</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, the forward process of the CBR block can be written as <xref ref-type="disp-formula" rid="e2">Equation 2</xref>:<disp-formula id="e2">
<mml:math id="m11">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mtext>out</mml:mtext>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mtext>ReLU</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mtext>BN</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mtext>Conv</mml:mtext>
<mml:mn>2</mml:mn>
<mml:mi mathvariant="normal">D</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mtext>in</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
</p>
<p>In terms of the setting of five CBR blocks in VP-Net, as shown in <xref ref-type="fig" rid="F2">Figure 2</xref>, the first CBR block has a kernel size of 11, a stride of 4, and a filter size of 16, providing a trainable and overlapped image patch extraction function. Moreover, the large kernel size (i.e., 11) can provide a larger receptive field, which is essential to this study since the input raw phase signals include a time signal. The second CBR block has a kernel size of 3, a stride of 1, and a filter size of 16, further extracting the features from the image patches from the first CBR block. The third and fourth CBR blocks have the same kernel size of 7, a stride of 2, and a filter size of 32 and 64, respectively. The fifth CBR block has a kernel size of 3, a stride of 1, and a filter size of 128.</p>
</sec>
<sec id="s2-2-2">
<title>2.2.2 Separable conv block</title>
<p>To achieve a lower model complexity, we introduced the separable convolution block to VP-Net. Compared to the 2D convolution layer, a separable convolution block can extract the features based on the channel-wise and spatial-wise, while reducing the model complexity and computational resource demanded. Assume the input feature is <inline-formula id="inf10">
<mml:math id="m12">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mtext>in</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and the output feature is <inline-formula id="inf11">
<mml:math id="m13">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mtext>out</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, the forward process of the separable conv block can be written as <xref ref-type="disp-formula" rid="e3">Equation 3</xref>:<disp-formula id="e3">
<mml:math id="m14">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mtext>out</mml:mtext>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mtext>ReLU</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mtext>BN</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mtext>Conv</mml:mtext>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mtext>ReLU</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mtext>BN</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mtext>Conv</mml:mtext>
<mml:mtext>Dw</mml:mtext>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mtext>in</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
</p>
<p>Regarding the setup of the three separable conv blocks in VP-Net, all depth-wise convolution layers and 1 &#xd7; 1 convolution layers have the same filter size as the <inline-formula id="inf12">
<mml:math id="m15">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mtext>in</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. The kernel size of all depth-wise convolution layers is 3. The stride of all depth-wise convolution layers and 1 &#xd7; 1 convolution layers is 1.</p>
</sec>
<sec id="s2-2-3">
<title>2.2.3 SE block</title>
<p>To improve the efficiency of the feature reuse, we also introduced the squeeze-and-excitation (SE) block (<xref ref-type="bibr" rid="B13">Hu et al., 2018</xref>) to VP-Net, which can improve model performance by adaptively recalibrating channel-wise feature responses, thereby improving the model&#x2019;s representational power and accuracy of velocity prediction. Taking the input as <inline-formula id="inf13">
<mml:math id="m16">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mtext>in</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> with a shape of H &#xd7; W &#xd7; C, and the output is <inline-formula id="inf14">
<mml:math id="m17">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mtext>out</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, the forward process of the SE block can be expressed as <xref ref-type="disp-formula" rid="e4">Equation 4</xref>:<disp-formula id="e4">
<mml:math id="m18">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mtext>out</mml:mtext>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mtext>in</mml:mtext>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mtext>Sigmoid</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mtext>Linear</mml:mtext>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mtext>ReLU</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mtext>Linear</mml:mtext>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mtext>GAP</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mtext>in</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>where GAP is global averaging pooling. After processing by GAP, the shape of the feature is converted from H &#xd7; W &#xd7; C to 1 &#xd7; 1 &#xd7; C. Linear stands for linear projection operation and the units of the <inline-formula id="inf15">
<mml:math id="m19">
<mml:mrow>
<mml:msub>
<mml:mtext>Linear</mml:mtext>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is set as C/4, and the units of the <inline-formula id="inf16">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mtext>Linear</mml:mtext>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is set as C.</p>
</sec>
</sec>
<sec id="s2-3">
<title>2.3 Data pre-processing</title>
<p>The acquired raw OCE volume (512 depth &#xd7; 512 lateral &#xd7; 512 time pixels) were cropped to 320 &#xd7; 320 pixels along the lateral and time axes to get rid of the head of the piezoelectric actuator and retain the region of interest. The raw phase <inline-formula id="inf17">
<mml:math id="m21">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>&#x3c6;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>z</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> of the complex OCT data was linear normalized to be in the range of 0&#x2013;1 by <xref ref-type="disp-formula" rid="e5">Equation 5</xref>:<disp-formula id="e5">
<mml:math id="m22">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c6;</mml:mi>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>z</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x3c6;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>z</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mfrac>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>0.5</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>0.5</mml:mn>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
</p>
<p>The temporal-spatial normalized raw phase slices served as the input of deep learning models.</p>
</sec>
<sec id="s2-4">
<title>2.4 Ground truth elastic wave velocity estimation</title>
<p>In order to provide accurate bulk SAW velocities as ground truth for model development, a conventional wave velocity estimation method was employed, including phase change measurement, noise filter applications for wave extraction and a time-of-flight approach for velocity estimation. First, the phase difference (&#x394;<italic>&#x3c6;</italic>(<italic>x, z, t</italic>)) between two consecutive A-lines (along the temporal axis) at each spatial position was calculated to compute deformation. The axial displacement at each lateral location was then measured from the phase difference (<xref ref-type="bibr" rid="B45">Wang et al., 2007</xref>). Next, the following noise filters were applied to the spatial-temporal displacement data. A directional filter was applied to minimize the distortion effect by reflected/refracted elastic waves on the original forwarding waves (<xref ref-type="bibr" rid="B19">Kirby et al., 2019</xref>). A low pass filter with a cutoff frequency of 2&#xa0;kHz was applied to further eliminate high-frequency noise (<xref ref-type="bibr" rid="B19">Kirby et al., 2019</xref>). The remaining noise was reduced by using a 3D median filter of the kernel size of 11 &#xd7; 5 in all directions (<xref ref-type="bibr" rid="B31">Neidhardt et al., 2020</xref>). Finally, the displacement was normalized by dividing it by the maximum value of each particle along the time axis. For velocity estimation, a time-of-flight approach (<xref ref-type="bibr" rid="B41">Song et al., 2013</xref>) was used, which involved tracking the main peak of the waveform along the propagation direction. In this work, the main peak of the wavefront is defined as the maximum of the normalized displacement along lateral locations. For a given depth layer (<inline-formula id="inf18">
<mml:math id="m23">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>), the ground truth bulk velocity (<inline-formula id="inf19">
<mml:math id="m24">
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) was estimated by calculating the slope of the space-time main wavefront peak curve along lateral locations, expressed as <xref ref-type="disp-formula" rid="e6">Equation 6</xref> (<xref ref-type="bibr" rid="B41">Song et al., 2013</xref>):<disp-formula id="e6">
<mml:math id="m25">
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x394;</mml:mo>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x394;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
</p>
<p>Where <inline-formula id="inf20">
<mml:math id="m26">
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mo>&#x2b;</mml:mo>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mtext>with&#x2009;</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf21">
<mml:math id="m27">
<mml:mrow>
<mml:mo>&#x394;</mml:mo>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represents the distance traveled by the main peak of the SAW wavefront along lateral locations during time shift <inline-formula id="inf22">
<mml:math id="m28">
<mml:mrow>
<mml:mo>&#x394;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. SAW velocity over the depth layer (<inline-formula id="inf23">
<mml:math id="m29">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>) was obtained using linear least squares regression fitting the time shits to the corresponding propagation distances (<xref ref-type="bibr" rid="B22">Lan et al., 2021</xref>), continuing until <inline-formula id="inf24">
<mml:math id="m30">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>300</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> or until reaching the maximum iteration limit when the relative difference of two continuous coefficient estimates exceeded 1 &#xd7; 10<sup>-6</sup> <inline-formula id="inf25">
<mml:math id="m31">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>300</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> For the abnormal skin dataset, only the lesion region was selected and fitted. In this study, the above procedures for SAW velocity estimation were designed to provide accurate ground truth for generating the labels needed during supervised training and did not influence the model&#x2019;s performance in velocity prediction once trained.</p>
</sec>
<sec id="s2-5">
<title>2.5 Experimental data acquisition and dataset</title>
<sec id="s2-5-1">
<title>2.5.1 Agar-based tissue-mimicking phantom</title>
<p>Eight concentrations of agar-based tissue-mimicking phantoms ranging from 1% to 5% with an interval of 0.5% were fabricated. The general protocol for producing the agar phantom has been described in detail in our previous study (<xref ref-type="bibr" rid="B26">Li et al., 2015</xref>). Each phantom underwent scanning at three locations with three repetitions. For algorithm development, 16,424 normalized raw phase slices of agar phantoms (sourced from 7 OCE scans for each concentration) were used for model training. A random selection of 1,147 slices was used for model validation, and 4,854 slices (sourced from 2 OCE scans for each concentration) were used for model testing.</p>
</sec>
<sec id="s2-5-2">
<title>2.5.2 <italic>In vivo</italic> human healthy skin</title>
<p>Sixteen healthy adults, including nine males and seven females from the 20s and 30s age groups, with no history of skin or medical conditions, were enrolled in this study. Each participant underwent scanning at three sites (palm, forearm, and back of hand) with three acquisitions at each site. The study was approved by the School of Science and Engineering Research Ethics Committee (SSEREC) of the University of Dundee, which also conformed to the tenets of the Declaration of Helsinki. Informed consent was obtained from each subject prior to the OCE imaging.</p>
<p>For algorithm development, overall, 28,270 normalized raw phase slices were produced from 16 participants&#x2019; OCE data. Of them, 17,671 slices (sourced from 10 participants, with an equal split of 5 each from the 20s and 30s age groups) were used for model training, 4,340 slices from 2 participants (one from each age group) were set aside for validation, and 6,259 slices from 4 independent participants (two from each age group) were used for model test preventing data leakage.</p>
</sec>
<sec id="s2-5-3">
<title>2.5.3 <italic>In vivo</italic> human abnormal skin</title>
<p>Seven facial closed comedones from two enrolled adults were scanned using OCE imaging, with three acquisitions taken for each comedo. For model training, we utilized 580 raw phase slices sourced from 3 OCE scans. An additional 129 slices from 1 OCE scan were used for validation, and 641 slices from 3 OCE scans were used for testing.</p>
<p>The velocities of agar phantoms have been well studied (<xref ref-type="bibr" rid="B47">Yang et al., 2022</xref>; <xref ref-type="bibr" rid="B3">Brewin et al., 2015</xref>), and the wave patterns of homogeneous agar phantoms tend to be straightforward and clear (<xref ref-type="bibr" rid="B46">Wang and Larin, 2015</xref>). Thus, the existing agar phantom datasets served as a validation of our VP-Net&#x2019;s accuracy. Importantly, the wide range of agar phantom velocities covered both healthy and abnormal human skin velocities, thereby enhancing the model&#x2019;s performance through convergence of predictions. The imbalance between the smaller number of agar phantom slices and the larger number of human skin slices ensured that the model placed more weight on learning from <italic>in vivo</italic> data, characterized by multiple wave patterns, high noise and artifacts.</p>
</sec>
</sec>
<sec id="s2-6">
<title>2.6 Experimental setup and data acquisition</title>
<p>A lab-built OCE system consisting of a phase-sensitive OCT (PhS-OCT) system and an external SAW generation system was used in this study. <xref ref-type="fig" rid="F3">Figure 3</xref> presents the schematic of the experimental set of the OCE system, along with photographs capturing the agar-based tissue-mimicking phantom (<xref ref-type="fig" rid="F3">Figure 3</xref>, a) and <italic>in vivo</italic> human skin (<xref ref-type="fig" rid="F3">Figure 3B</xref>) during data acquisition. The PhS-OCT, with a central wavelength of 1,310 &#xb1; 110&#xa0;nm and sampling frequency of 92&#xa0;kHz, detected mechanically induced SAWs in the skin. The axial sampling distance and lateral sampling distance were measured as 4.7&#xa0;&#x3bc;m/pixel and 21.7&#xa0;&#x3bc;m/pixel, respectively.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Schematic of the experimental setup for the generation and detection of SAW on sample using a piezoelectric actuator and the PhS-OCT system, and photographs of <bold>(A)</bold> agar-based tissue-mimicking phantom and <bold>(B)</bold> <italic>in vivo</italic> human skin data acquisition. DAQ, Data acquisition; NI, national instrument; PC: polarization controller; PhS-OCT, phase-sensitive optical coherence tomography.</p>
</caption>
<graphic xlink:href="fbioe-12-1465823-g003.tif"/>
</fig>
<p>A piezoelectric actuator (PC4QR, Thorlabs Inc., Newton, NJ, United States of America) was set at an angle of 45&#xb0; contact with the skin to generate SAW. The piezoelectric actuator was triggered by the waveform generator, which could generate the square wave with a frequency of 2&#xa0;kHz, a peak-to-peak voltage of 10&#xa0;mV, and a duty cycle of 60%.</p>
<p>An M-B scanning protocol was employed to acquire the propagation of the SAWs. One complete acquisition was completed within 3.9&#xa0;s. The size of the effective imaging plane was &#x223c;2&#xa0;mm &#xd7; 11&#xa0;mm (depth &#xd7; lateral distance). All data was acquired through a customized LabVIEW interface (LabView 2020; National Instruments, Austin, TX, United States) and stored in the computer for processing.</p>
</sec>
<sec id="s2-7">
<title>2.7 Model training details</title>
<p>All neural networks used in the study were built and trained based on TensorFlow 2.9.0 backend (<xref ref-type="bibr" rid="B1">Abadi et al., 2016</xref>). The training took place on a Nvidia RTX 4090 with 24&#xa0;GB memory. The training epoch of VP-Net was set as 1,000, with a batch size of 32. An Adam optimizer (<xref ref-type="bibr" rid="B17">Kingma and Ba, 2014</xref>) with a learning rate of 0.001 was used to update trainable weights in the models. The mean-absolute-error (MAE) was utilized as the loss function since we found that the mean-square-error (MSE) function would bring the unstable training of all neural networks in this study. An early stop strategy was used to save the best performance model&#x2019;s weights when the metrics validation loss of MAE was not decreased in 30 training epochs, preventing overfitting during the model training. Data augmentation, such as rotation and flipping, were not used since those methods would affect the patterns and properties of perturbations, leading to unstable training.</p>
</sec>
<sec id="s2-8">
<title>2.8 Evaluation metrics</title>
<p>To evaluate the performance of the proposed deep learning-based velocity prediction for OCE, MSE and MAE were used to calculate the difference between the model-predicted velocity (<inline-formula id="inf26">
<mml:math id="m32">
<mml:mrow>
<mml:mover accent="true">
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula>) and the ground truth velocity (<inline-formula id="inf27">
<mml:math id="m33">
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) obtained by the linear fitting of the wavefront curve. The MSE and MAE are given by <xref ref-type="disp-formula" rid="e7">Equations 7</xref>, <xref ref-type="disp-formula" rid="e8">8</xref>, respectively.<disp-formula id="e7">
<mml:math id="m34">
<mml:mrow>
<mml:mtext>MSE</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mover accent="true">
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>
<disp-formula id="e8">
<mml:math id="m35">
<mml:mrow>
<mml:mtext>MAE</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mover accent="true">
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>
</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>3 Results</title>
<sec id="s3-1">
<title>3.1 Comparison with neural networks on velocity prediction</title>
<p>The performance of our VP-Net on bulk SAW velocities of agar and human skin datasets was evaluated with various published deep-learning networks, including the VGG16/19 (<xref ref-type="bibr" rid="B40">Simonyan and Zisserman, 2014</xref>), ResNet18/34/50/101 (<xref ref-type="bibr" rid="B11">He et al., 2016</xref>), DenseNet121/169 (<xref ref-type="bibr" rid="B12">Huang et al., 2017</xref>), and MobileNetV2 (<xref ref-type="bibr" rid="B38">Sandler et al., 2018</xref>). The training details and training strategy of the compared-used models were consistent with VP-Net. The evaluation was based on the test set to avoid data leakage. The evaluation metrics were MAE and MSE, and a lower value of resultant indicated a more accurate velocity prediction.</p>
<p>
<xref ref-type="table" rid="T1">Table 1</xref> and <xref ref-type="table" rid="T2">Table 2</xref> demonstrate the comparison results of MAE and MSE among various networks based on the eight concentrations of the agar-based tissue-mimicking phantoms from the test set. VP-Net had the best MSE and MAE performance in the 1.5%, 3.0% and 4.0% agar phantoms. Furthermore, VP-Net had similar MAE (0.225) and MSE (0.393) values to the mobileNetV2 (MAE: 0.183; MSE: 0.325) in the 2.5% agar phantom. However, VP-Net had a relatively low performance in 1.0%, 3.5%, and 5.0% agar phantoms from the test set.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>MSE of neural networks for SAW velocity prediction on agar phantoms.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Model</th>
<th align="left">1.0% agar</th>
<th align="left">1.5% agar</th>
<th align="left">2.0% agar</th>
<th align="left">2.5% agar</th>
<th align="left">3.0% agar</th>
<th align="left">3.5% agar</th>
<th align="left">4.0% agar</th>
<th align="left">4.5% agar</th>
<th align="left">5.0% agar</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">VGG16</td>
<td align="left">1.517 &#xb1; 0.309</td>
<td align="left">0.223 &#xb1; 0.236</td>
<td align="left">0.200 &#xb1; 0.237</td>
<td align="left">0.241 &#xb1; 0.255</td>
<td align="left">0.069 &#xb1; 0.099</td>
<td align="left">0.760 &#xb1; 0.963</td>
<td align="left">0.181 &#xb1; 0.226</td>
<td align="left">0.468 &#xb1; 0.595</td>
<td align="left">0.797 &#xb1; 1.007</td>
</tr>
<tr>
<td align="left">VGG19</td>
<td align="left">1.519 &#xb1; 0.366</td>
<td align="left">0.213 &#xb1; 0.226</td>
<td align="left">0.167 &#xb1; 0.197</td>
<td align="left">0.246 &#xb1; 0.258</td>
<td align="left">0.077 &#xb1; 0.108</td>
<td align="left">0.725 &#xb1; 1.020</td>
<td align="left">0.188 &#xb1; 0.286</td>
<td align="left">0.280 &#xb1; 0.377</td>
<td align="left">
<bold>0.513 &#xb1; 0.648</bold>
</td>
</tr>
<tr>
<td align="left">ResNet18</td>
<td align="left">4.799 &#xb1; 3.310</td>
<td align="left">0.290 &#xb1; 0.365</td>
<td align="left">0.330 &#xb1; 0.784</td>
<td align="left">0.367 &#xb1; 0.432</td>
<td align="left">0.849 &#xb1; 0.474</td>
<td align="left">0.634 &#xb1; 0.776</td>
<td align="left">2.354 &#xb1; 1.342</td>
<td align="left">3.406 &#xb1; 2.068</td>
<td align="left">5.169 &#xb1; 3.287</td>
</tr>
<tr>
<td align="left">ResNet34</td>
<td align="left">0.830 &#xb1; 0.440</td>
<td align="left">0.205 &#xb1; 0.225</td>
<td align="left">0.208 &#xb1; 0.330</td>
<td align="left">0.203 &#xb1; 0.293</td>
<td align="left">0.491 &#xb1; 0.287</td>
<td align="left">0.565 &#xb1; 0.750</td>
<td align="left">0.261 &#xb1; 0.305</td>
<td align="left">0.525 &#xb1; 0.659</td>
<td align="left">0.858 &#xb1; 1.137</td>
</tr>
<tr>
<td align="left">ResNet50</td>
<td align="left">
<bold>0.217 &#xb1; 2.547</bold>
</td>
<td align="left">0.290 &#xb1; 0.345</td>
<td align="left">1.500 &#xb1; 6.456</td>
<td align="left">5.641 &#xb1; 4.618</td>
<td align="left">1.746 &#xb1; 0.609</td>
<td align="left">0.896 &#xb1; 1.154</td>
<td align="left">0.267 &#xb1; 0.364</td>
<td align="left">
<bold>0.184 &#xb1; 0.246</bold>
</td>
<td align="left">2.334 &#xb1; 1.466</td>
</tr>
<tr>
<td align="left">ResNet101</td>
<td align="left">1.885 &#xb1; 0.329</td>
<td align="left">0.305 &#xb1; 0.306</td>
<td align="left">
<bold>0.152 &#xb1; 0.199</bold>
</td>
<td align="left">0.294 &#xb1; 0.274</td>
<td align="left">1.107 &#xb1; 0.383</td>
<td align="left">1.571 &#xb1; 1.802</td>
<td align="left">2.683 &#xb1; 1.255</td>
<td align="left">3.081 &#xb1; 1.712</td>
<td align="left">5.067 &#xb1; 2.817</td>
</tr>
<tr>
<td align="left">DenseNet121</td>
<td align="left">5.253 &#xb1; 2.301</td>
<td align="left">1.094 &#xb1; 0.677</td>
<td align="left">0.796 &#xb1; 0.925</td>
<td align="left">0.388 &#xb1; 0.687</td>
<td align="left">0.089 &#xb1; 0.103</td>
<td align="left">
<bold>0.503 &#xb1; 0.625</bold>
</td>
<td align="left">0.643 &#xb1; 0.577</td>
<td align="left">1.264 &#xb1; 1.134</td>
<td align="left">2.286 &#xb1; 1.883</td>
</tr>
<tr>
<td align="left">DenseNet169</td>
<td align="left">4.298 &#xb1; 2.212</td>
<td align="left">0.559 &#xb1; 0.488</td>
<td align="left">0.927 &#xb1; 1.126</td>
<td align="left">0.247 &#xb1; 0.467</td>
<td align="left">0.291 &#xb1; 0.226</td>
<td align="left">0.558 &#xb1; 0.674</td>
<td align="left">1.972 &#xb1; 1.071</td>
<td align="left">2.682 &#xb1; 1.594</td>
<td align="left">4.932 &#xb1; 2.783</td>
</tr>
<tr>
<td align="left">MobileNetV2</td>
<td align="left">5.024 &#xb1; 1.334</td>
<td align="left">1.280 &#xb1; 0.786</td>
<td align="left">0.495 &#xb1; 0.509</td>
<td align="left">
<bold>0.183 &#xb1; 0.309</bold>
</td>
<td align="left">0.162 &#xb1; 0.153</td>
<td align="left">0.590 &#xb1; 0.788</td>
<td align="left">0.266 &#xb1; 0.315</td>
<td align="left">0.490 &#xb1; 0.606</td>
<td align="left">1.058 &#xb1; 1.292</td>
</tr>
<tr>
<td align="left">VP-Net</td>
<td align="left">1.158 &#xb1; 1.294</td>
<td align="left">
<bold>0.121 &#xb1; 0.128</bold>
</td>
<td align="left">0.369 &#xb1; 0.420</td>
<td align="left">0.225 &#xb1; 0.256</td>
<td align="left">
<bold>0.057 &#xb1; 0.090</bold>
</td>
<td align="left">0.742 &#xb1; 1.350</td>
<td align="left">
<bold>0.149 &#xb1; 0.192</bold>
</td>
<td align="left">0.636 &#xb1; 0.696</td>
<td align="left">0.995 &#xb1; 1.074</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The results shown as mean &#xb1; standard deviation; The best value of MSE, for each agar phantom highlighted in bold.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>MAE of neural networks for SAW velocity prediction on agar phantoms.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Model</th>
<th align="left">1.0% agar</th>
<th align="left">1.5% agar</th>
<th align="left">2.0% agar</th>
<th align="left">2.5% agar</th>
<th align="left">3.0% agar</th>
<th align="left">3.5% agar</th>
<th align="left">4.0% agar</th>
<th align="left">4.5% agar</th>
<th align="left">5.0% agar</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">VGG16</td>
<td align="left">1.225 &#xb1; 0.125</td>
<td align="left">0.394 &#xb1; 0.260</td>
<td align="left">0.364 &#xb1; 0.260</td>
<td align="left">0.415 &#xb1; 0.262</td>
<td align="left">0.209 &#xb1; 0.159</td>
<td align="left">0.709 &#xb1; 0.507</td>
<td align="left">0.348 &#xb1; 0.246</td>
<td align="left">0.557 &#xb1; 0.397</td>
<td align="left">0.727 &#xb1; 0.518</td>
</tr>
<tr>
<td align="left">VGG19</td>
<td align="left">1.224 &#xb1; 0.143</td>
<td align="left">0.385 &#xb1; 0.254</td>
<td align="left">0.332 &#xb1; 0.238</td>
<td align="left">0.421 &#xb1; 0.263</td>
<td align="left">0.223 &#xb1; 0.165</td>
<td align="left">0.681 &#xb1; 0.511</td>
<td align="left">0.336 &#xb1; 0.274</td>
<td align="left">0.426 &#xb1; 0.315</td>
<td align="left">
<bold>0.583 &#xb1; 0.417</bold>
</td>
</tr>
<tr>
<td align="left">ResNet18</td>
<td align="left">2.079 &#xb1; 0.689</td>
<td align="left">0.436 &#xb1; 0.316</td>
<td align="left">0.417 &#xb1; 0.395</td>
<td align="left">0.507 &#xb1; 0.332</td>
<td align="left">0.879 &#xb1; 0.274</td>
<td align="left">0.652 &#xb1; 0.458</td>
<td align="left">1.462 &#xb1; 0.464</td>
<td align="left">1.750 &#xb1; 0.586</td>
<td align="left">2.151 &#xb1; 0.735</td>
</tr>
<tr>
<td align="left">ResNet34</td>
<td align="left">0.881 &#xb1; 0.231</td>
<td align="left">0.377 &#xb1; 0.250</td>
<td align="left">0.361 &#xb1; 0.279</td>
<td align="left">0.362 &#xb1; 0.267</td>
<td align="left">0.666 &#xb1; 0.219</td>
<td align="left">0.607 &#xb1; 0.444</td>
<td align="left">0.424 &#xb1; 0.285</td>
<td align="left">0.592 &#xb1; 0.418</td>
<td align="left">0.747 &#xb1; 0.548</td>
</tr>
<tr>
<td align="left">ResNet50</td>
<td align="left">
<bold>0.258 &#xb1; 0.387</bold>
</td>
<td align="left">0.444 &#xb1; 0.305</td>
<td align="left">0.602 &#xb1; 1.067</td>
<td align="left">2.248 &#xb1; 0.767</td>
<td align="left">1.301 &#xb1; 0.230</td>
<td align="left">0.766 &#xb1; 0.557</td>
<td align="left">0.413 &#xb1; 0.311</td>
<td align="left">
<bold>0.344 &#xb1; 0.25</bold>6</td>
<td align="left">1.440 &#xb1; 0.510</td>
</tr>
<tr>
<td align="left">ResNet101</td>
<td align="left">1.368 &#xb1; 0.121</td>
<td align="left">0.466 &#xb1; 0.298</td>
<td align="left">
<bold>0.322 &#xb1; 0.219</bold>
</td>
<td align="left">0.470 &#xb1; 0.270</td>
<td align="left">1.035 &#xb1; 0.190</td>
<td align="left">1.040 &#xb1; 0.699</td>
<td align="left">1.588 &#xb1; 0.402</td>
<td align="left">1.681 &#xb1; 0.507</td>
<td align="left">2.158 &#xb1; 0.641</td>
</tr>
<tr>
<td align="left">DenseNet121</td>
<td align="left">2.243 &#xb1; 0.472</td>
<td align="left">0.988 &#xb1; 0.344</td>
<td align="left">0.759 &#xb1; 0.470</td>
<td align="left">0.464 &#xb1; 0.415</td>
<td align="left">0.250 &#xb1; 0.165</td>
<td align="left">
<bold>0.579 &#xb1; 0.410</bold>
</td>
<td align="left">0.709 &#xb1; 0.375</td>
<td align="left">0.998 &#xb1; 0.518</td>
<td align="left">1.365 &#xb1; 0.651</td>
</tr>
<tr>
<td align="left">DenseNet169</td>
<td align="left">2.016 &#xb1; 0.485</td>
<td align="left">0.659 &#xb1; 0.354</td>
<td align="left">0.809 &#xb1; 0.522</td>
<td align="left">0.367 &#xb1; 0.335</td>
<td align="left">0.490 &#xb1; 0.224</td>
<td align="left">0.613 &#xb1; 0.428</td>
<td align="left">1.344 &#xb1; 0.408</td>
<td align="left">1.557 &#xb1; 0.508</td>
<td align="left">2.124 &#xb1; 0.649</td>
</tr>
<tr>
<td align="left">MobileNetV2</td>
<td align="left">2.223 &#xb1; 0.290</td>
<td align="left">1.071 &#xb1; 0.365</td>
<td align="left">0.599 &#xb1; 0.370</td>
<td align="left">
<bold>0.325 &#xb1; 0.278</bold>
</td>
<td align="left">0.353 &#xb1; 0.195</td>
<td align="left">0.619 &#xb1; 0.455</td>
<td align="left">0.427 &#xb1; 0.289</td>
<td align="left">0.573 &#xb1; 0.402</td>
<td align="left">0.843 &#xb1; 0.589</td>
</tr>
<tr>
<td align="left">VP-Net</td>
<td align="left">0.913 &#xb1; 0.569</td>
<td align="left">
<bold>0.295 &#xb1; 0.185</bold>
</td>
<td align="left">0.505 &#xb1; 0.338</td>
<td align="left">0.393 &#xb1; 0.266</td>
<td align="left">
<bold>0.184 &#xb1; 0.150</bold>
</td>
<td align="left">0.672 &#xb1; 0.539</td>
<td align="left">
<bold>0.313 &#xb1; 0.227</bold>
</td>
<td align="left">0.673 &#xb1; 0.428</td>
<td align="left">0.837 &#xb1; 0.542</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The results shown as mean &#xb1; standard deviation; The best value of MAE, for each agar phantom highlighted in bold.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>
<xref ref-type="table" rid="T3">Table 3</xref> shows the comparison of VP-Net with various networks based on <italic>in vivo</italic> human healthy and abnormal skin datasets. The proposed VP-Net performed the best for the back of hand (MSE: 1.585; MAE: 0.992) and forearm (MAE: 0.997). The ResNet101 demonstrated the lowest MSE (1.844) and MAE (1.133) for the palm. For the closed comedones dataset, VP-Net showed the second-best performance, with MSE of 1.051 and MAE of 0.863.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>MSE and MAE of neural networks for SAW velocity prediction on <italic>in vivo</italic> human skin.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="left">Model</th>
<th colspan="2" align="left">Back of hand</th>
<th colspan="2" align="left">Palm</th>
<th colspan="2" align="left">Forearm</th>
<th colspan="2" align="left">Closed comedones</th>
</tr>
<tr>
<th align="left">MSE</th>
<th align="left">MAE</th>
<th align="left">MSE</th>
<th align="left">MAE</th>
<th align="left">MSE</th>
<th align="left">MAE</th>
<th align="left">MSE</th>
<th align="left">MAE</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">VGG16</td>
<td align="left">1.805 &#xb1; 2.016</td>
<td align="left">1.130 &#xb1; 0.727</td>
<td align="left">2.264 &#xb1; 2.664</td>
<td align="left">1.228 &#xb1; 0.870</td>
<td align="left">2.054 &#xb1; 2.574</td>
<td align="left">1.027 &#xb1; 0.834</td>
<td align="left">
<bold>0.643 &#xb1; 0.665</bold>
</td>
<td align="left">
<bold>0.702 &#xb1; 0.388</bold>
</td>
</tr>
<tr>
<td align="left">VGG19</td>
<td align="left">2.373 &#xb1; 2.998</td>
<td align="left">1.287 &#xb1; 0.847</td>
<td align="left">2.428 &#xb1; 2.850</td>
<td align="left">1.269 &#xb1; 0.904</td>
<td align="left">
<bold>1.725 &#xb1; 2.592</bold>
</td>
<td align="left">1.012 &#xb1; 0.837</td>
<td align="left">1.118 &#xb1; 1.048</td>
<td align="left">0.929 &#xb1; 0.506</td>
</tr>
<tr>
<td align="left">ResNet18</td>
<td align="left">21.424 &#xb1; 13.997</td>
<td align="left">4.274 &#xb1; 1.777</td>
<td align="left">4.461 &#xb1; 4.919</td>
<td align="left">1.759 &#xb1; 1.169</td>
<td align="left">10.404 &#xb1; 8.477</td>
<td align="left">2.890 &#xb1; 1.432</td>
<td align="left">4.045 &#xb1; 2.673</td>
<td align="left">1.856 &#xb1; 0.774</td>
</tr>
<tr>
<td align="left">ResNet34</td>
<td align="left">10.797 &#xb1; 10.183</td>
<td align="left">2.893 &#xb1; 1.558</td>
<td align="left">4.739 &#xb1; 5.169</td>
<td align="left">1.820 &#xb1; 1.195</td>
<td align="left">5.080 &#xb1; 6.163</td>
<td align="left">1.866 &#xb1; 1.264</td>
<td align="left">5.481 &#xb1; 3.232</td>
<td align="left">2.206 &#xb1; 0.783</td>
</tr>
<tr>
<td align="left">ResNet50</td>
<td align="left">3.340 &#xb1; 7.083</td>
<td align="left">1.378 &#xb1; 1.200</td>
<td align="left">7.981 &#xb1; 8.224</td>
<td align="left">2.405 &#xb1; 1.482</td>
<td align="left">4.507 &#xb1; 9.203</td>
<td align="left">1.478 &#xb1; 1.524</td>
<td align="left">12.435 &#xb1; 7.287</td>
<td align="left">3.363 &#xb1; 1.059</td>
</tr>
<tr>
<td align="left">ResNet101</td>
<td align="left">1.941 &#xb1; 2.484</td>
<td align="left">1.143 &#xb1; 0.796</td>
<td align="left">
<bold>1.844 &#xb1; 2.105</bold>
</td>
<td align="left">
<bold>1.133 &#xb1; 0.748</bold>
</td>
<td align="left">2.052 &#xb1; 3.080</td>
<td align="left">1.103 &#xb1; 0.914</td>
<td align="left">1.474 &#xb1; 1.505</td>
<td align="left">1.050 &#xb1; 0.609</td>
</tr>
<tr>
<td align="left">DenseNet121</td>
<td align="left">11.885 &#xb1; 8.404</td>
<td align="left">3.170 &#xb1; 1.355</td>
<td align="left">3.342 &#xb1; 3.857</td>
<td align="left">1.502 &#xb1; 1.042</td>
<td align="left">7.578 &#xb1; 6.893</td>
<td align="left">2.448 &#xb1; 1.260</td>
<td align="left">3.352 &#xb1; 2.467</td>
<td align="left">1.661 &#xb1; 0.771</td>
</tr>
<tr>
<td align="left">DenseNet169</td>
<td align="left">7.913 &#xb1; 6.914</td>
<td align="left">2.506 &#xb1; 1.279</td>
<td align="left">3.281 &#xb1; 3.914</td>
<td align="left">1.489 &#xb1; 1.032</td>
<td align="left">5.470 &#xb1; 5.626</td>
<td align="left">2.033 &#xb1; 1.157</td>
<td align="left">2.714 &#xb1; 2.436</td>
<td align="left">1.451 &#xb1; 0.781</td>
</tr>
<tr>
<td align="left">MobileNetV2</td>
<td align="left">10.498 &#xb1; 7.765</td>
<td align="left">2.988 &#xb1; 1.254</td>
<td align="left">5.198 &#xb1; 5.636</td>
<td align="left">1.912 &#xb1; 1.241</td>
<td align="left">7.235 &#xb1; 6.279</td>
<td align="left">2.431 &#xb1; 1.152</td>
<td align="left">9.856 &#xb1; 4.769</td>
<td align="left">3.031 &#xb1; 0.817</td>
</tr>
<tr>
<td align="left">VP-Net</td>
<td align="left">
<bold>1.585 &#xb1; 2.283</bold>
</td>
<td align="left">
<bold>0.992 &#xb1; 0.775</bold>
</td>
<td align="left">2.450 &#xb1; 2.903</td>
<td align="left">1.274 &#xb1; 0.910</td>
<td align="left">2.007 &#xb1; 3.502</td>
<td align="left">
<bold>0.997 &#xb1; 1.007</bold>
</td>
<td align="left">1.051 &#xb1; 1.681</td>
<td align="left">0.863 &#xb1; 0.554</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The results shown as mean &#xb1; standard deviation; The best values of MSE, and MAE for each human skin site and closed comedones highlighted in bold.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3-2">
<title>3.2 Influence of VP-Net size</title>
<p>To investigate the influence of VP-Net size on prediction performance and model efficiency, we varied the filter sizes utilized in VP-Net. Our proposed VP-Net architecture included five CBR blocks and three separable convolution blocks (<xref ref-type="fig" rid="F2">Figure 2</xref>). The baseline VP-Net (VP-Net-B) was defined with initial filter sizes for the five CBR blocks set to <inline-formula id="inf28">
<mml:math id="m36">
<mml:mrow>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:msub>
<mml:mi mathvariant="normal">S</mml:mi>
<mml:mtext>CBR</mml:mtext>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> {16, 16, 32, 64, 128}, and for the three separable convolution blocks, <inline-formula id="inf29">
<mml:math id="m37">
<mml:mrow>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:msub>
<mml:mi mathvariant="normal">S</mml:mi>
<mml:mtext>SCB</mml:mtext>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> {32, 64, 128}. We also proposed two additional VP-Net sizes, called VP-Net-S (<inline-formula id="inf30">
<mml:math id="m38">
<mml:mrow>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:msub>
<mml:mi mathvariant="normal">S</mml:mi>
<mml:mtext>CBR</mml:mtext>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> {16, 16, 16, 32, 64}; <inline-formula id="inf31">
<mml:math id="m39">
<mml:mrow>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:msub>
<mml:mi mathvariant="normal">S</mml:mi>
<mml:mtext>SCB</mml:mtext>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> {16, 32, 64}) and VP-Net-L (<inline-formula id="inf32">
<mml:math id="m40">
<mml:mrow>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:msub>
<mml:mi mathvariant="normal">S</mml:mi>
<mml:mtext>CBR</mml:mtext>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> {32, 32, 64, 128, 256}; <inline-formula id="inf33">
<mml:math id="m41">
<mml:mrow>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:msub>
<mml:mi mathvariant="normal">S</mml:mi>
<mml:mtext>SCB</mml:mtext>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> {64, 128, 256}).</p>
<p>
<xref ref-type="table" rid="T4">Table 4</xref> and <xref ref-type="table" rid="T5">Table 5</xref> compare the evaluation metrics among the three VP-Net sizes on agar phantoms and <italic>in vivo</italic> human skin datasets. VP-Net-L demonstrated relatively high performance in the 1%, 3%, 4.5% and 5% agar phantoms but did not achieve the best metrics for human skin. VP-Net-B achieved the lowest MSE (1.585) and MAE (0.992) on the back of hand, the lowest MAE (0.997) on the forearm, and similar MSE and MAE values to VP-Net-S on the palm. In the closed comedones, VP-Net-S had the best performance with MSE of 0.659 and MAE of 0.661.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Comparison of VP-Net sizes for SAW velocity prediction on agar phantoms.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Model</th>
<th align="left"/>
<th align="left">Agar 1.0%</th>
<th align="left">Agar 1.5%</th>
<th align="left">Agar 2.0%</th>
<th align="left">Agar 2.5%</th>
<th align="left">Agar 3.0%</th>
<th align="left">Agar 3.5%</th>
<th align="left">Agar 4.0%</th>
<th align="left">Agar 4.5%</th>
<th align="left">Agar 5.0%</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">VP-Net-S</td>
<td rowspan="3" align="left">MSE</td>
<td align="left">0.188 &#xb1; 0.085</td>
<td align="left">0.121 &#xb1; 0.137</td>
<td align="left">
<bold>0.197 &#xb1; 0.238</bold>
</td>
<td align="left">
<bold>0.126 &#xb1; 0.182</bold>
</td>
<td align="left">0.088 &#xb1; 0.096</td>
<td align="left">2.770 &#xb1; 2.899</td>
<td align="left">0.179 &#xb1; 0.223</td>
<td align="left">0.717 &#xb1; 0.778</td>
<td align="left">1.087 &#xb1; 1.079</td>
</tr>
<tr>
<td align="left">VP-Net-B</td>
<td align="left">1.158 &#xb1; 1.294</td>
<td align="left">
<bold>0.121 &#xb1; 0.128</bold>
</td>
<td align="left">0.369 &#xb1; 0.420</td>
<td align="left">0.225 &#xb1; 0.256</td>
<td align="left">0.057 &#xb1; 0.090</td>
<td align="left">
<bold>0.742 &#xb1; 1.350</bold>
</td>
<td align="left">
<bold>0.149 &#xb1; 0.192</bold>
</td>
<td align="left">0.636 &#xb1; 0.696</td>
<td align="left">0.995 &#xb1; 1.074</td>
</tr>
<tr>
<td align="left">VP-Net-L</td>
<td align="left">
<bold>0.178 &#xb1; 0.120</bold>
</td>
<td align="left">0.130 &#xb1; 0.153</td>
<td align="left">0.377 &#xb1; 0.416</td>
<td align="left">0.204 &#xb1; 0.255</td>
<td align="left">
<bold>0.042 &#xb1; 0.070</bold>
</td>
<td align="left">4.268 &#xb1; 3.137</td>
<td align="left">0.159 &#xb1; 0.188</td>
<td align="left">
<bold>0.608 &#xb1; 0.684</bold>
</td>
<td align="left">
<bold>0.884 &#xb1; 0.968</bold>
</td>
</tr>
<tr>
<td align="left">VP-Net-S</td>
<td rowspan="3" align="left">MAE</td>
<td align="left">0.420 &#xb1; 0.106</td>
<td align="left">
<bold>0.291 &#xb1; 0.190</bold>
</td>
<td align="left">
<bold>0.359 &#xb1; 0.261</bold>
</td>
<td align="left">
<bold>0.275 &#xb1; 0.225</bold>
</td>
<td align="left">0.250 &#xb1; 0.159</td>
<td align="left">1.404 &#xb1; 0.894</td>
<td align="left">0.346 &#xb1; 0.244</td>
<td align="left">0.716 &#xb1; 0.452</td>
<td align="left">0.892 &#xb1; 0.540</td>
</tr>
<tr>
<td align="left">VP-Net-B</td>
<td align="left">0.913 &#xb1; 0.569</td>
<td align="left">0.295 &#xb1; 0.185</td>
<td align="left">0.505 &#xb1; 0.338</td>
<td align="left">0.393 &#xb1; 0.266</td>
<td align="left">0.184 &#xb1; 0.150</td>
<td align="left">
<bold>0.672 &#xb1; 0.539</bold>
</td>
<td align="left">
<bold>0.313 &#xb1; 0.227</bold>
</td>
<td align="left">0.673 &#xb1; 0.428</td>
<td align="left">0.837 &#xb1; 0.542</td>
</tr>
<tr>
<td align="left">VP-Net-L</td>
<td align="left">
<bold>0.396 &#xb1; 0.144</bold>
</td>
<td align="left">0.296 &#xb1; 0.205</td>
<td align="left">0.512 &#xb1; 0.339</td>
<td align="left">0.367 &#xb1; 0.264</td>
<td align="left">0.157 &#xb1; 0.131</td>
<td align="left">1.889 &#xb1; 0.835</td>
<td align="left">0.329 &#xb1; 0.225</td>
<td align="left">
<bold>0.653 &#xb1; 0.425</bold>
</td>
<td align="left">
<bold>0.785 &#xb1; 0.517</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The results shown as mean &#xb1; standard deviation; The best values of MSE, and MAE for each agar phantom highlighted in bold.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Comparison of VP-Net sizes for SAW velocity prediction on <italic>in vivo</italic> human skin.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="left">Model</th>
<th colspan="2" align="left">Back of hand</th>
<th colspan="2" align="left">Palm</th>
<th colspan="2" align="left">Forearm</th>
<th colspan="2" align="left">Skin Face Acne</th>
</tr>
<tr>
<th align="left">MSE</th>
<th align="left">MAE</th>
<th align="left">MSE</th>
<th align="left">MAE</th>
<th align="left">MSE</th>
<th align="left">MAE</th>
<th align="left">MSE</th>
<th align="left">MAE</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">VP-Net-S</td>
<td align="left">1.689 &#xb1; 2.093</td>
<td align="left">1.065 &#xb1; 0.744</td>
<td align="left">
<bold>2.329 &#xb1; 2.867</bold>
</td>
<td align="left">
<bold>1.245 &#xb1; 0.883</bold>
</td>
<td align="left">
<bold>2.002 &#xb1; 3.356</bold>
</td>
<td align="left">1.035 &#xb1; 0.965</td>
<td align="left">
<bold>0.659 &#xb1; 0.891</bold>
</td>
<td align="left">
<bold>0.661 &#xb1; 0.471</bold>
</td>
</tr>
<tr>
<td align="left">VP-Net-B</td>
<td align="left">
<bold>1.585 &#xb1; 2.283</bold>
</td>
<td align="left">
<bold>0.992 &#xb1; 0.775</bold>
</td>
<td align="left">2.450 &#xb1; 2.903</td>
<td align="left">1.274 &#xb1; 0.910</td>
<td align="left">2.007 &#xb1; 3.502</td>
<td align="left">
<bold>0.997 &#xb1; 1.007</bold>
</td>
<td align="left">1.051 &#xb1; 1.681</td>
<td align="left">0.863 &#xb1; 0.554</td>
</tr>
<tr>
<td align="left">VP-Net-L</td>
<td align="left">1.749 &#xb1; 2.391</td>
<td align="left">1.058 &#xb1; 0.793</td>
<td align="left">2.358 &#xb1; 2.721</td>
<td align="left">1.252 &#xb1; 0.889</td>
<td align="left">2.130 &#xb1; 3.478</td>
<td align="left">1.059 &#xb1; 1.004</td>
<td align="left">0.660 &#xb1; 0.599</td>
<td align="left">0.722 &#xb1; 0.372</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The results shown as mean &#xb1; standard deviation; The best values of MSE, and MAE for each human skin site and closed comedones highlighted in bold.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3-3">
<title>3.3 Model complexity analysis</title>
<p>The model&#x2019;s inference efficiency among various batch sizes for input data was evaluated (<xref ref-type="fig" rid="F4">Figure 4</xref>). We utilized the same computation platform to compare the processing time between the conventional velocity estimation method and the neural network-based methods, as shown in <xref ref-type="fig" rid="F4">Figure 4A</xref>. <xref ref-type="fig" rid="F4">Figure 4B</xref> demonstrates the inference time comparison between the neural networks. VP-Net performance outperformed the other neural networks on both CPU and GPU. Moreover, when the batch size increased, VP-Net achieved a higher throughput than the other networks. The model complexity comparison was compared based on the floating-point operations (FLOPs) and network parameters, as shown in <xref ref-type="fig" rid="F4">Figure 4C</xref>. VP-Net family had the relatively lowest FLOPs compared to the other neural networks, and VP-Net-S and VP-Net-B have the lowest and second-lowest parameters, respectively.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Model complexity comparison results. <bold>(A)</bold> Latency time of the various methods based on CPU (Intel i9-12900K). <bold>(B)</bold> Latency time of the various methods based on GPU (Nvidia RTX 4090). <bold>(C)</bold> Model parameters and floating-point operations (FLOPs) comparison.</p>
</caption>
<graphic xlink:href="fbioe-12-1465823-g004.tif"/>
</fig>
</sec>
<sec id="s3-4">
<title>3.4 Interpretation of proposed deep learning network</title>
<p>Gradient-weighted class activation maps (Grad-CAM) (<xref ref-type="bibr" rid="B39">Selvaraju et al., 2017</xref>) were employed to interpret the decision-making process of VP-Net when predicting wave velocity from a single raw phase slice. Distinct from the original Grad-CAM, which generated activation maps based on the model&#x2019;s output class label, this experiment used the model-predicted SAW velocity to produce the Grad-CAMs. Based on the model architecture (<xref ref-type="fig" rid="F2">Figure 2</xref>), we generated Grad-CAMs from the first convolution layer of each CBR block. These maps emphasize areas crucial for the model&#x2019;s prediction, providing in-depth information on its internal operations.</p>
<p>
<xref ref-type="fig" rid="F5">Figure 5</xref> shows an example of the raw phase slice from agar phantom (<xref ref-type="fig" rid="F5">Figures 5</xref>, <xref ref-type="fig" rid="F1">1A</xref>), human healthy skin (<xref ref-type="fig" rid="F5">Figures 5, 1&#x2013;4A</xref>), and abnormal skin (<xref ref-type="fig" rid="F5">Figures 5A</xref>), accompanied by their respective Grad-CAM (<xref ref-type="fig" rid="F5">Figures 5C&#x2013;F</xref>) produced from our purposed VP-Net. The raw phase slices were selected from the test set, which were not presented in the model training and validation stages. Corresponding axial displacement slices (<xref ref-type="fig" rid="F5">Figure 5B</xref>) were used to estimate the ground truth SAWvelocity. Our VP-Net demonstrated high accuracy in velocity prediction for tissue-mimicking phantoms, three healthy skin sites, and abnormal skin, with differences between predicted and ground truth velocities being less than 0.3&#xa0;m/s.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Repretative normalized raw phase slices, axial displacement slices, Gradient-weighted Class Activation Map (Grad-CAM) from the 2D convolution layers in VP-Net for tissue-mimicking phantom, <italic>in vivo</italic> human healthy skin site, and <italic>in vivo</italic> human abnormal skin. (1) 2% agar-based tissue-mimicking phantom, at 1,175&#xa0;&#xb5;m depth, (2) palm from a male in the 20s age group male, at 131.6&#xa0;&#xb5;m depth, (3) back of hand from a male in the 30s age group, at a depth of 197.4&#xa0;&#xb5;m, (4) forearm, a female in the 20s group, at a depth of 470&#xa0;&#xb5;m. (5) a closed comedo from a male in the 20s group, at a depth of 225.6&#xa0;&#xb5;m <bold>(A)</bold> raw phase slice, <bold>(B)</bold> axial displacement slice, <bold>(C</bold>&#x2013;<bold>F)</bold> Grad-CAMs from the 1st, 3rd, 5th, and 7th 2D convolution layers, respectively; Predicted velocity by VP-Net and ground truth velocity for each sample displayed beneath raw phase and displacement slices, respectively.</p>
</caption>
<graphic xlink:href="fbioe-12-1465823-g005.tif"/>
</fig>
<p>The perturbations caused by SAW propagation, surrounded by massive noise, were noticeable in all the raw phase slices (<xref ref-type="fig" rid="F5">Figure 5A</xref>). Due to the homogeneous properties of the agar phantom, the processed displacement slice (<xref ref-type="fig" rid="F5">Figures 5</xref>, <xref ref-type="fig" rid="F1">1B</xref>) displayed clear wave propagation and intense signals with less noise and fewer artifacts, even at a significant depth of 1,175&#xa0;&#xb5;m. The Grad-CAMs revealed a clear shape of the main wave propagation at the first and third convolution layers.</p>
<p>For the palm, which displayed a clear and distinct pattern of wave propagation on the displacement slice (<xref ref-type="fig" rid="F5">Figures 5</xref>, <xref ref-type="fig" rid="F2">2B</xref>), the Grad-CAMs appeared to identify the main wave&#x2019;s contour and texture, as reflected in the outputs of the first to third convolution layers (<xref ref-type="fig" rid="F5">Figures 5, 2D, 1F</xref>). In the back of the hand, some distortion was observed, possibly caused by movement (<xref ref-type="fig" rid="F5">Figures 5</xref>, <xref ref-type="fig" rid="F3">3B</xref>). Interestingly, the model seemed to recognize the main wave&#x2019;s textures and shape, focusing less on the distorted region (<xref ref-type="fig" rid="F5">Figures 5, 3C, 2G</xref>). The forearm slice, taken from a deeper depth (470&#xa0;&#xb5;m), exhibited more noise in its reconstructed displacement slice (<xref ref-type="fig" rid="F5">Figures 5</xref>, <xref ref-type="fig" rid="F4">4B</xref>). Still, the model&#x2019;s first to third convolution layers (<xref ref-type="fig" rid="F5">Figures 5, 4C, 3D</xref>) appeared to capture the main wave&#x2019;s texture.</p>
<p>Regarding the abnormal skin dataset, the wave pattern changed due to the boundary between closed comedones and the surrounding healthy skin at a lateral distance of 4.5&#xa0;mm on the displacement slice (<xref ref-type="fig" rid="F5">Figures 5B</xref>). In the first convolution layer (<xref ref-type="fig" rid="F5">Figures 5C</xref>), only the SAW propagation across the closed comedo region was shown as a high-intensity pattern. From the deeper convolution layers of the agar phantom and human skins (<xref ref-type="fig" rid="F5">Figures 5E, F</xref>), which likely indicated the high-level features extracted, intensity changes around the wave propagation region could be noticed.</p>
</sec>
<sec id="s3-5">
<title>3.5 Prediction of SAW velocities using VP-Net</title>
<p>The bulk SAW velocities of agar-based tissue-mimicking phantoms, <italic>in vivo</italic> healthy human skin, and abnormal skin were predicted using our trained VP-Net on the test set. The input raw phase slices from the test set were not included in the training and validation datasets. <xref ref-type="table" rid="T6">Table 6</xref> summarizes the SAW velocities predicted by VP-Net, compared with the ground truth velocities estimated using the flight-of-flight approach.</p>
<table-wrap id="T6" position="float">
<label>TABLE 6</label>
<caption>
<p>SAW velocities of agar-based tissue-mimicking phantoms, healthy skin at three sites between 20s and 30s age group, and abnormal skin estimated from time-of-flight approach and proposed VP-Net.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Sample</th>
<th align="left">Number of OCE scans</th>
<th align="left">Number of raw phase slices</th>
<th align="left">Age group</th>
<th align="left">Approach</th>
<th align="left">SAW velocity (m/s)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="2" align="left">1% agar</td>
<td rowspan="2" align="left">2</td>
<td rowspan="2" align="left">400</td>
<td rowspan="2" align="left">-</td>
<td align="left">Time-of-flight</td>
<td align="left">2.66 &#xb1; 0.09</td>
</tr>
<tr>
<td align="left">VP-Net</td>
<td align="left">3.09 &#xb1; 0.05</td>
</tr>
<tr>
<td rowspan="2" align="left">1.5% agar</td>
<td rowspan="2" align="left">2</td>
<td rowspan="2" align="left">421</td>
<td rowspan="2" align="left">-</td>
<td align="left">Time-of-flight</td>
<td align="left">4.39 &#xb1; 0.32</td>
</tr>
<tr>
<td align="left">VP-Net</td>
<td align="left">4.36 &#xb1; 0.13</td>
</tr>
<tr>
<td rowspan="2" align="left">2% agar</td>
<td rowspan="2" align="left">2</td>
<td rowspan="2" align="left">293</td>
<td rowspan="2" align="left">-</td>
<td align="left">Time-of-flight</td>
<td align="left">6.26 &#xb1; 0.32</td>
</tr>
<tr>
<td align="left">VP-Net</td>
<td align="left">6.39 &#xb1; 0.28</td>
</tr>
<tr>
<td rowspan="2" align="left">2.5% agar</td>
<td rowspan="2" align="left">2</td>
<td rowspan="2" align="left">533</td>
<td rowspan="2" align="left">-</td>
<td align="left">Time-of-flight</td>
<td align="left">8.88 &#xb1; 0.25</td>
</tr>
<tr>
<td align="left">VP-Net</td>
<td align="left">8.99 &#xb1; 0.22</td>
</tr>
<tr>
<td rowspan="2" align="left">3% agar</td>
<td rowspan="2" align="left">2</td>
<td rowspan="2" align="left">411</td>
<td rowspan="2" align="left">-</td>
<td align="left">Time-of-flight</td>
<td align="left">9.53 &#xb1; 0.18</td>
</tr>
<tr>
<td align="left">VP-Net</td>
<td align="left">9.33 &#xb1; 0.12</td>
</tr>
<tr>
<td rowspan="2" align="left">3.5% agar</td>
<td rowspan="2" align="left">2</td>
<td rowspan="2" align="left">418</td>
<td rowspan="2" align="left">-</td>
<td align="left">Time-of-flight</td>
<td align="left">11.557 &#xb1; 0.49</td>
</tr>
<tr>
<td align="left">VP-Net</td>
<td align="left">11.708 &#xb1; 0.52</td>
</tr>
<tr>
<td rowspan="2" align="left">4% agar</td>
<td rowspan="2" align="left">2</td>
<td rowspan="2" align="left">350</td>
<td rowspan="2" align="left">-</td>
<td align="left">Time-of-flight</td>
<td align="left">14.00 &#xb1; 0.34</td>
</tr>
<tr>
<td align="left">VP-Net</td>
<td align="left">13.87 &#xb1; 0.22</td>
</tr>
<tr>
<td rowspan="2" align="left">4.5% agar</td>
<td rowspan="2" align="left">2</td>
<td rowspan="2" align="left">452</td>
<td rowspan="2" align="left">-</td>
<td align="left">Time-of-flight</td>
<td align="left">14.53 &#xb1; 0.42</td>
</tr>
<tr>
<td align="left">VP-Net</td>
<td align="left">14.14 &#xb1; 0.41</td>
</tr>
<tr>
<td rowspan="2" align="left">5% agar</td>
<td rowspan="2" align="left">2</td>
<td rowspan="2" align="left">446</td>
<td rowspan="2" align="left">-</td>
<td align="left">Time-of-flight</td>
<td align="left">15.96 &#xb1; 0.51</td>
</tr>
<tr>
<td align="left">VP-Net</td>
<td align="left">15.10 &#xb1; 0.29</td>
</tr>
<tr>
<td rowspan="4" align="left">Palm</td>
<td rowspan="2" align="left">8</td>
<td rowspan="2" align="left">1524</td>
<td rowspan="2" align="left">20s</td>
<td align="left">Time-of-flight</td>
<td align="left">6.72 &#xb1; 0.79</td>
</tr>
<tr>
<td align="left">VP-Net</td>
<td align="left">6.78 &#xb1; 0.82</td>
</tr>
<tr>
<td rowspan="2" align="left">9</td>
<td rowspan="2" align="left">1311</td>
<td rowspan="2" align="left">30s</td>
<td align="left">Time-of-flight</td>
<td align="left">8.32 &#xb1; 0.79</td>
</tr>
<tr>
<td align="left">VP-Net</td>
<td align="left">8.10 &#xb1; 0.74</td>
</tr>
<tr>
<td rowspan="4" align="left">Forearm</td>
<td rowspan="2" align="left">6</td>
<td rowspan="2" align="left">1145</td>
<td rowspan="2" align="left">20s</td>
<td align="left">Time-of-flight</td>
<td align="left">4.35 &#xb1; 0.75</td>
</tr>
<tr>
<td align="left">VP-Net</td>
<td align="left">4.29 &#xb1; 0.33</td>
</tr>
<tr>
<td rowspan="2" align="left">7</td>
<td rowspan="2" align="left">865</td>
<td rowspan="2" align="left">30s</td>
<td align="left">Time-of-flight</td>
<td align="left">5.44 &#xb1; 1.24</td>
</tr>
<tr>
<td align="left">VP-Net</td>
<td align="left">5.33 &#xb1; 1.06</td>
</tr>
<tr>
<td rowspan="4" align="left">Back of hand</td>
<td rowspan="2" align="left">6</td>
<td rowspan="2" align="left">616</td>
<td rowspan="2" align="left">20s</td>
<td align="left">Time-of-flight</td>
<td align="left">3.75 &#xb1; 0.34</td>
</tr>
<tr>
<td align="left">VP-Net</td>
<td align="left">4.40 &#xb1; 0.29</td>
</tr>
<tr>
<td rowspan="2" align="left">7</td>
<td rowspan="2" align="left">619</td>
<td rowspan="2" align="left">30s</td>
<td align="left">Time-of-flight</td>
<td align="left">4.31 &#xb1; 0.38</td>
</tr>
<tr>
<td align="left">VP-Net</td>
<td align="left">4.55 &#xb1; 0.22</td>
</tr>
<tr>
<td rowspan="2" align="left">Closed comedones</td>
<td rowspan="2" align="left">3</td>
<td rowspan="2" align="left">642</td>
<td rowspan="2" align="left">20s</td>
<td align="left">Time-of-flight</td>
<td align="left">9.08 &#xb1; 0.69</td>
</tr>
<tr>
<td align="left">VP-Net</td>
<td align="left">9.19 &#xb1; 0.41</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The actual and predicted SAW velocities of the agar phantoms increased with concentration. The phantoms showed stability and consistency, showing that the mean predicted velocities were close to the actual velocities, indicated by a standard deviation of less than 0.5. For healthy human skin, the network-predicted bulk SAW velocities for both age groups (20s and 30s) across the three skin sites closely aligned with the actual velocities obtained from the conventional method. The palm exhibited the highest SAW velocities, approximately 8&#xa0;m/s in the 30s group and 6&#xa0;m/s in the 20s group, followed by the forearm, with approximately 4&#xa0;m/s in the 20s group and 5&#xa0;m/s in the 30s group. For the back of the hand, VP-Net predicted velocities were higher than those obtained by the conventional method by 0.6&#xa0;m/s in the 20s group, and 0.2&#xa0;m/s in the 30s group. For closed comedones, VP-Net predicted velocity was close to the conventional method, with a high velocity of approximately 9&#xa0;m/s, indicating higher biomechanical properties.</p>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>4 Discussion</title>
<p>Wave-based OCE has been one of the most studied OCE branches, producing a fundamental impact in the quantitative and nondestructive biomechanical characterization of tissues. However, the long processing time limits its real-time and clinical applications (<xref ref-type="bibr" rid="B43">Sun et al., 2011</xref>). In this study, we proposed a rapid, high-efficiency, and high-accuracy deep-learning-based velocity prediction network (VP-Net) to predict biomechanical property-related velocity. We comprehensively evaluated the network with homogenous agar-based tissue-mimicking phantoms, <italic>in vivo</italic> human healthy and abnormal skin. Compared to the conventional OCE velocity estimation method (<xref ref-type="bibr" rid="B50">Zvietcovich and Larin, 2022</xref>), VP-Net could directly predict velocity from a single raw OCE slice, which provided end-to-end processing and eliminates the requirement for complex processing. Therefore, the proposed VP-Net has great potential to be translated into clinical practice for characterizing skin aging, as well as assessing and managing the treatment of acne vulgaris.</p>
<p>In the discussion, the results will be analyzed and compared with the findings from other studies. First, we conducted a comprehensive comparison with a series of existing deep-learning models, including VGG16/19, ResNet18/34/50/101, DenseNet121/169, and MobileNetV2. The evaluation results in <xref ref-type="table" rid="T1">Table 1</xref> and <xref ref-type="table" rid="T2">Table 2</xref> show that the mean MSE and MAE errors were approximately below 0.5 in agar phantoms, with concentrations ranging from 1.5% to 4%, indicating high accuracy in predicting the velocities in these agar phantoms. However, for the agar phantoms with low (1%) and higher concentrations (4.5% and 5%), the mean errors from VP-Net were relatively higher than 0.5. We hypothesize this is due to the unbalanced data distribution in the training datasets, as the velocity distributions for these concentrations had fewer slices (5,127 slices). Regarding the <italic>in vivo</italic> human skin (<xref ref-type="table" rid="T3">Table 3</xref>), VP-Net achieved the best performance in the back of hand (MSE: 1.585; MAE: 0.992) and had the lowest MAE of 0.863 in the forearm. In the palm, VP-Net performed similarly to ResNet101 in terms of MSE and MAE. For closed comedones, VP-Net had the second-lowest MSE and MAE. Thus, VP-Net demonstrated high accuracy in predicting biomechanical property-related velocities, indicating its potential for early diagnosis of skin conditions.</p>
<p>An ablation study was conducted to investigate the influence of VP-Net sizes on performance. As shown in <xref ref-type="table" rid="T4">Table 4</xref>, increasing the size of VP-Net did not improve accuracy for agar phantoms with 1.5%&#x2013;4.0% concentrations. However, for agar phantoms with 1.5%&#x2013;2.5% concentrations, decreasing the size of VP-Net improved performance. In the human skin dataset (<xref ref-type="table" rid="T5">Table 5</xref>), VP-Net-B provided the lowest MAE (0.992) and MSE (1.585) errors in the back of hand, and the lowest MAE (0.997) and second-lowest MSE (2.007) in the forearm. In the palm and closed comedones, reducing the size of VP-Net again provided the best performance in terms of MSE and MAE errors. Compared to VP-Net-S and VP-Net-L, VP-Net-B offered the best trade-off between prediction performance and model complexity.</p>
<p>Additionally, we evaluated the computational demand of VP-Net in both GPU and CPU environments, comparing inference time and model complexity among various methods, as presented in <xref ref-type="fig" rid="F4">Figure 4</xref>. <xref ref-type="fig" rid="F4">Figure 4A, B</xref> illustrate that VP-Net had the lowest inference time in both environments. Specifically, <xref ref-type="fig" rid="F4">Figure 4A</xref> shows that VP-Net accelerated the velocity prediction procedure by a factor of 100 compared to the conventional method. <xref ref-type="fig" rid="F4">Figure 4B</xref> further indicates that VP-Net-S and VP-Net-B had the lowest model complexity and network parameters, respectively.</p>
<p>Grad-CAM (<xref ref-type="fig" rid="F5">Figure 5</xref>) was employed to interpret VP-Net&#x2019;s velocity prediction processes. When the wave propagation pattern was clear and had single wave mode details (<xref ref-type="fig" rid="F5">Figure 5</xref> (1B and 2B)), the full wave propagation path (<xref ref-type="fig" rid="F5">Figure 5</xref> (1C,D and 2&#xa0;C,D)), was seen in the shallow convolution layers (first to third). In contrast, when artifacts induced by motion, far-end noise, or low intensities at deeper depths were present (<xref ref-type="fig" rid="F3">Figures 5</xref>, <xref ref-type="fig" rid="F4">3B</xref>, <xref ref-type="fig" rid="F5">4B</xref>), only the high-quality portions of the wave patterns were emphasized in these layers (<xref ref-type="fig" rid="F3">Figures 5</xref>, <xref ref-type="fig" rid="F4">3C</xref>, <xref ref-type="fig" rid="F5">D</xref>). This may indicate that the model effectively filtered significant noise from the raw phase slices to extract useful and accurate wave information. For abnormal skin, only the high-velocity wave propagating through the comedo region was displayed as the highest intensity curve in the first convolution layer (<xref ref-type="fig" rid="F5">Figure 5C</xref>). We believe that the comprehensive training dataset, which included high-quality slices at surface depths, low-intensity wave images at deeper depths, motion artifacts, and boundaries between abnormal and healthy regions, enhanced the model&#x2019;s ability to analyze difficult situations and accurately predict the velocity of abnormal regions (<xref ref-type="bibr" rid="B29">Li et al., 2022</xref>).</p>
<p>Biomechanical properties, specifically elasticity (Young&#x2019;s modulus), can be estimated directly from velocity measurements. The bulk Young&#x2019;s modulus (<inline-formula id="inf34">
<mml:math id="m42">
<mml:mrow>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>) can be calculated from the predicted SAW velocity (<inline-formula id="inf35">
<mml:math id="m43">
<mml:mrow>
<mml:mover accent="true">
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mi>R</mml:mi>
</mml:msub>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula>) using the formula <inline-formula id="inf36">
<mml:math id="m44">
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>3.35</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:msup>
<mml:mover accent="true">
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mi>R</mml:mi>
</mml:msub>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, assuming skin mass density (&#x3c1;) of 1.02&#xa0;g/cm<sup>2</sup> and a Poisson&#x2019;s ratio (&#x3c5;) of 0.5 (<xref ref-type="bibr" rid="B25">Liang and Boppart, 2009</xref>). By converting the velocities in <xref ref-type="table" rid="T6">Table 6</xref> to elasticity, the bulk Young&#x2019;s modulus for 1%&#x2013;2% agar phantoms predicted from raw phase slices were 44 &#xb1; 14&#xa0;kPa, 66 &#xb1; 4&#xa0;kPa, and 149 &#xb1; 14&#xa0;kPa, respectively. These results are in good agreement with the values obtained by Yang et al. (<xref ref-type="bibr" rid="B47">Yang et al., 2022</xref>). The Young&#x2019;s modulus for the 2% agar phantom predicted by our model was 286 &#xb1; 12&#xa0;kPa, comparable to the 254&#xa0;kPa reported by Brewin et al. (<xref ref-type="bibr" rid="B3">Brewin et al., 2015</xref>). In our study, the average predicted Young&#x2019;s moduli for the three skin sites were 188 &#xb1; 58&#xa0;kPa for the palm, 79 &#xb1; 42&#xa0;kPa for the forearm, and 64 &#xb1; 35&#xa0;kPa for the back of hand, was consistent with values documented in our previous study (<xref ref-type="bibr" rid="B49">Zhou et al., 2020</xref>). Also, the values aligned with other reference values. For instance, The bulk Young&#x2019;s modulus for the palm was reported at 108 &#xb1; 48&#xa0;kPa (<xref ref-type="bibr" rid="B48">Zhang et al., 2011</xref>), forearm at 42 &#xb1; 32&#xa0;kPa (<xref ref-type="bibr" rid="B48">Zhang et al., 2011</xref>) and 129 &#xb1; 88&#xa0;kPa (<xref ref-type="bibr" rid="B5">Diridollou et al., 2000</xref>); Back of hand 11&#x2013;23&#xa0;kPa (<xref ref-type="bibr" rid="B44">Wakhlu et al., 2017</xref>). Notably, a difference in bulk Young&#x2019;s modulus between age groups (20s and 30s) was observed: for the palm, 154 &#xb1; 37&#xa0;kPa in the 20s group vs 220 &#xb1; 40&#xa0;kPa in the 30s group; for the forearm, 62 &#xb1; 9&#xa0;kPa in the 20s group vs 95 &#xb1; 38&#xa0;kPa in the 30s group; and for the back of the hand, 65 &#xb1; 9&#xa0;kPa in the 20s group vs 69 &#xb1; 7&#xa0;kPa in the 30s group. Regarding abnormal skin, no previous studies have reported the Young&#x2019;s modulus of facial skin diseases. For comparison, the mean stiffness of malignant neck tumors was 226.4&#xa0;kPa as measured by ultrasound elastography (<xref ref-type="bibr" rid="B37">Rold&#xe1;n, 2016</xref>), similar to the predicted Young&#x2019;s modulus of closed comedones at 308 &#xb1; 35&#xa0;kPa. Thus, our proposed VP-Net demonstrated its efficacy by accurately obtaining bulk velocity from a single image with noisy raw phase information.</p>
<p>
<xref ref-type="bibr" rid="B32">Neidhardt et al. (2021)</xref> reported a densely connected network for predicting concentrations of gelatin phantoms by analyzing shear wave OCE data. They later expanded this approach to aid in force estimation on gelatin phantoms and <italic>ex vivo</italic> chicken hearts (<xref ref-type="bibr" rid="B33">Neidhardt et al., 2023</xref>). Their model could process both 3D (depth &#xd7; lateral distance &#xd7; time) and 4D (depth &#xd7; lateral distance &#xd7; vertical distance &#xd7; time) volumes, with each dimension of 32 pixels, and was capable of performing classification in real-time. While their methods had valuable contributions, particularly for real-time and 4D analysis, there may be challenges when applying this approach to <italic>in vivo</italic> studies and clinical translations. First, the input depth for each volume in their model required 32 pixels, approximately 235&#xa0;&#xb5;m. In contrast, our proposed deep learning network could predict velocity from each single slice, with a single depth layer of approximately 4.7&#xa0;&#xb5;m. In addition, their low spatial sampling points limited the spatial resolution of the raw volume, resulting in reduced elastography resolution (<xref ref-type="bibr" rid="B19">Kirby et al., 2019</xref>). This constrains its applications to address motion artifacts and complicated wave patterns, which frequently occur <italic>in vivo</italic> OCE acquisitions. Next, its field of view was restricted to 3&#xa0;mm, which could be insufficient for measuring abnormal skin conditions, typically around 6&#xa0;mm in diameter (<xref ref-type="bibr" rid="B15">Kasmi and Mokrani, 2016</xref>). In our study, the scanning range was up to 11&#xa0;mm, and we successfully predicted the bulk velocities of closed comedones with diameters greater than 4.2&#xa0;mm. Thus, VP-Net may offer an advantage in predicting biomechanical property-related velocity from a single image, handling high noise and artifacts, and is particularly suitable for both healthy and abnormal <italic>in vivo</italic> scans.</p>
<p>While our work represents a significant advancement, further research is needed to refine the deep learning model, particularly its translation to clinical settings. By including a more diverse range of participants, we intend to enhance the robustness of our model, ensuring accurate wave velocity predictions across all biological genders. Additionally, we plan to substantially enlarge our dataset to explore the potential of vision transformers for predicting the biomechanical properties of both healthy and abnormal human skin.</p>
</sec>
<sec sec-type="conclusion" id="s5">
<title>5 Conclusion</title>
<p>In conclusion, we developed an end-to-end deep learning-based velocity prediction network (VP-Net) for predicting elastic wave velocities associated with biomechanical properties using OCE. VP-Net demonstrated the ability to provide real-time elastic wave velocity predictions without the need for expertise and complex image processing. In <italic>vivo</italic> applications on both healthy and abnormal human skin, VP-Net accurately differentiated age-related changes in elastic velocities across multiple skin sites and detected high velocities in closed comedones. Therefore, VP-Net holds significant potential for clinical applications in characterizing skin aging, as well as assessing and managing the treatment of acne vulgaris.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec id="s7">
<title>Ethics statement</title>
<p>The studies involving humans were approved by University Research Ethics Committee (UREC), University of Dundee. The studies were conducted in accordance with the local legislation and institutional requirements. The participants provided their written informed consent to participate in this study.</p>
</sec>
<sec id="s8">
<title>Author contributions</title>
<p>YZ: Conceptualization, Data curation, Formal Analysis, Investigation, Methodology, Software, Validation, Visualization, Writing&#x2013;original draft, Writing&#x2013;review and editing. JL: Conceptualization, Formal Analysis, Investigation, Methodology, Software, Validation, Visualization, Writing&#x2013;original draft, Writing&#x2013;review and editing. ZF: Data curation, Investigation, Writing&#x2013;review and editing. WY: Data curation, Writing&#x2013;review and editing. AP: Methodology, Validation, Writing&#x2013;review and editing. ZW: Validation, Writing&#x2013;review and editing. CL: Conceptualization, Methodology, Project administration, Supervision, Writing&#x2013;review and editing. ZH: Project administration, Resources, Supervision, Writing&#x2013;review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s9">
<title>Funding</title>
<p>The author(s) declare that no financial support was received for the research, authorship, and/or publication of this article.</p>
</sec>
<ack>
<p>Alessandro Perelli is supported by the Royal Academy of Engineering under the RAEng/Leverhulme Trust Research Fellowships programme (LTRF-2324&#x2013;20&#x2013;160).</p>
</ack>
<sec sec-type="COI-statement" id="s10">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abadi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Agarwal</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Barham</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Brevdo</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Citro</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Tensorflow: large-scale machine learning on heterogeneous distributed systems</article-title>. <source>arXiv Prepr. arXiv:1603.04467</source>. <pub-id pub-id-type="doi">10.48550/arXiv.1603.04467</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Balbir-Gurman</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Denton</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Nichols</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Knight</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Nahir</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Martin</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2002</year>). <article-title>Non-invasive measurement of biomechanical skin properties in systemic sclerosis</article-title>. <source>Ann. rheumatic Dis.</source> <volume>61</volume>, <fpage>237</fpage>&#x2013;<lpage>241</lpage>. <pub-id pub-id-type="doi">10.1136/ard.61.3.237</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brewin</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Birch</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mehta</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Reeves</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Shaw</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kruse</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Characterisation of elastic and acoustic properties of an agar-based tissue mimicking material</article-title>. <source>Ann. Biomed. Eng.</source> <volume>43</volume>, <fpage>2587</fpage>&#x2013;<lpage>2596</lpage>. <pub-id pub-id-type="doi">10.1007/s10439-015-1294-7</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Couturaud</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Coutable</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Khaiat</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>1995</year>). <article-title>Skin biomechanical properties: <italic>in vivo</italic> evaluation of influence of age and body site by a non&#x2010;invasive method</article-title>. <source>Skin Res. Technol.</source> <volume>1</volume>, <fpage>68</fpage>&#x2013;<lpage>73</lpage>. <pub-id pub-id-type="doi">10.1111/j.1600-0846.1995.tb00020.x</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Diridollou</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Patat</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Gens</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Vaillant</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Black</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Lagarde</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2000</year>). <article-title>
<italic>In vivo</italic> model of the mechanical properties of the human skin under suction</article-title>. <source>Skin Res. Technol.</source> <volume>6</volume>, <fpage>214</fpage>&#x2013;<lpage>221</lpage>. <pub-id pub-id-type="doi">10.1034/j.1600-0846.2000.006004214.x</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Du</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>C.-H.</given-names>
</name>
<name>
<surname>Lei</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hicks</surname>
<given-names>M. J.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Rapid, noninvasive quantitation of skin disease in systemic sclerosis using optical coherence elastography</article-title>. <source>J. Biomed. Opt.</source> <volume>21</volume>, <fpage>1</fpage>&#x2013;<lpage>046002</lpage>. <pub-id pub-id-type="doi">10.1117/1.jbo.21.4.046002</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Everett</surname>
<given-names>J. S.</given-names>
</name>
<name>
<surname>Sommers</surname>
<given-names>M. S.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Skin viscoelasticity: physiologic mechanisms, measurement issues, and application to nursing science</article-title>. <source>Biol. Res. Nurs.</source> <volume>15</volume>, <fpage>338</fpage>&#x2013;<lpage>346</lpage>. <pub-id pub-id-type="doi">10.1177/1099800411434151</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Krajancich</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Chin</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zilkens</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Curatolo</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Frewer</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Handheld probe for quantitative micro-elastography</article-title>. <source>Biomed. Opt. Express</source> <volume>10</volume>, <fpage>4034</fpage>&#x2013;<lpage>4049</lpage>. <pub-id pub-id-type="doi">10.1364/boe.10.004034</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Godin</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Touitou</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Transdermal skin delivery: predictions for humans from <italic>in vivo</italic>, <italic>ex vivo</italic> and animal models</article-title>. <source>Adv. drug Deliv. Rev.</source> <volume>59</volume>, <fpage>1152</fpage>&#x2013;<lpage>1161</lpage>. <pub-id pub-id-type="doi">10.1016/j.addr.2007.07.004</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hay</surname>
<given-names>R. J.</given-names>
</name>
<name>
<surname>Johns</surname>
<given-names>N. E.</given-names>
</name>
<name>
<surname>Williams</surname>
<given-names>H. C.</given-names>
</name>
<name>
<surname>Bolliger</surname>
<given-names>I. W.</given-names>
</name>
<name>
<surname>Dellavalle</surname>
<given-names>R. P.</given-names>
</name>
<name>
<surname>Margolis</surname>
<given-names>D. J.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>The global burden of skin disease in 2010: an analysis of the prevalence and impact of skin conditions</article-title>. <source>J. Investigative Dermatology</source> <volume>134</volume>, <fpage>1527</fpage>&#x2013;<lpage>1534</lpage>. <pub-id pub-id-type="doi">10.1038/jid.2013.446</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>He</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Ren</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Deep residual learning for image recognition</article-title>,&#x201d; in <source>Proceedings of the IEEE conference on computer vision and pattern recognition</source>, <fpage>770</fpage>&#x2013;<lpage>778</lpage>.</citation>
</ref>
<ref id="B12">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Van Der Maaten</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Weinberger</surname>
<given-names>K. Q.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Densely connected convolutional networks</article-title>,&#x201d; in <source>Proceedings of the IEEE conference on computer vision and pattern recognition</source>, <fpage>4700</fpage>&#x2013;<lpage>4708</lpage>.</citation>
</ref>
<ref id="B13">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Hu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Squeeze-and-excitation networks</article-title>,&#x201d; in <source>Proceedings of the IEEE conference on computer vision and pattern recognition</source>, <fpage>7132</fpage>&#x2013;<lpage>7141</lpage>.</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Joodaki</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Panzer</surname>
<given-names>M. B.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Skin mechanical properties and modeling: a review</article-title>. <source>Proc. Institution Mech. Eng. Part H J. Eng. Med.</source> <volume>232</volume>, <fpage>323</fpage>&#x2013;<lpage>343</lpage>. <pub-id pub-id-type="doi">10.1177/0954411918759801</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kasmi</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Mokrani</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Classification of malignant melanoma and benign skin lesions: implementation of automatic ABCD rule</article-title>. <source>IET Image Process.</source> <volume>10</volume>, <fpage>448</fpage>&#x2013;<lpage>455</lpage>. <pub-id pub-id-type="doi">10.1049/iet-ipr.2015.0385</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Killaars</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Penha</surname>
<given-names>T. L.</given-names>
</name>
<name>
<surname>Heuts</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Van Der Hulst</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Piatkowski</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Biomechanical properties of the skin in patients with breast cancer-related lymphedema compared to healthy individuals</article-title>. <source>Lymphatic Res. Biol.</source> <volume>13</volume>, <fpage>215</fpage>&#x2013;<lpage>221</lpage>. <pub-id pub-id-type="doi">10.1089/lrb.2014.0049</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kingma</surname>
<given-names>D. P.</given-names>
</name>
<name>
<surname>Ba</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Adam: a method for stochastic optimization</article-title>. <source>arXiv Prepr. arXiv:1412.6980</source>. <pub-id pub-id-type="doi">10.48550/arXiv.1412.6980</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kirby</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Pelivanov</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ambrozinski</surname>
<given-names>&#x141;.</given-names>
</name>
<name>
<surname>Yoon</surname>
<given-names>S. J.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Optical coherence elastography in ophthalmology</article-title>. <source>J. Biomed. Opt.</source> <volume>22</volume>, <fpage>1</fpage>&#x2013;<lpage>121720</lpage>. <pub-id pub-id-type="doi">10.1117/1.jbo.22.12.121720</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kirby</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Pitre</surname>
<given-names>J. J.</given-names>
<suffix>JR</suffix>
</name>
<name>
<surname>Gao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Pelivanov</surname>
<given-names>I.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Spatial resolution in dynamic optical coherence elastography</article-title>. <source>J. Biomed. Opt.</source> <volume>24</volume>, <fpage>1</fpage>&#x2013;<lpage>096006</lpage>. <pub-id pub-id-type="doi">10.1117/1.jbo.24.9.096006</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kirkpatrick</surname>
<given-names>S. J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>R. K.</given-names>
</name>
<name>
<surname>Duncan</surname>
<given-names>D. D.</given-names>
</name>
<name>
<surname>Kulesz-Martin</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Imaging the mechanical stiffness of skin lesions by <italic>in vivo</italic> acousto-optical elastography</article-title>. <source>Opt. express</source> <volume>14</volume>, <fpage>9770</fpage>&#x2013;<lpage>9779</lpage>. <pub-id pub-id-type="doi">10.1364/oe.14.009770</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Labroo</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Irvin</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Johnson</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Sieverts</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Miess</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Robinson</surname>
<given-names>I.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Physical characterization of swine and human skin: correlations between Raman spectroscopy, Tensile testing, Atomic force microscopy (AFM), Scanning electron microscopy (SEM), and Multiphoton microscopy (MPM)</article-title>. <source>Skin Res. Technol.</source> <volume>27</volume>, <fpage>501</fpage>&#x2013;<lpage>510</lpage>. <pub-id pub-id-type="doi">10.1111/srt.12976</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lan</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Aglyamov</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>Larin</surname>
<given-names>K. V.</given-names>
</name>
<name>
<surname>Twa</surname>
<given-names>M. D.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>
<italic>In vivo</italic> human corneal shear-wave optical coherence elastography</article-title>. <source>Optometry Vis. Sci.</source> <volume>98</volume>, <fpage>58</fpage>&#x2013;<lpage>63</lpage>. <pub-id pub-id-type="doi">10.1097/opx.0000000000001633</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Larin</surname>
<given-names>K. V.</given-names>
</name>
<name>
<surname>Sampson</surname>
<given-names>D. D.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Optical coherence elastography &#x2013; OCT at work in tissue biomechanics [Invited]</article-title>. <source>Biomed. Opt. express</source> <volume>8</volume>, <fpage>1172</fpage>&#x2013;<lpage>1202</lpage>. <pub-id pub-id-type="doi">10.1364/boe.8.001172</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lavers</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Diagnosis and management of acne vulgaris</article-title>. <source>Nurse Prescr.</source> <volume>12</volume>, <fpage>330</fpage>&#x2013;<lpage>336</lpage>. <pub-id pub-id-type="doi">10.12968/npre.2014.12.7.330</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Boppart</surname>
<given-names>S. A.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Biomechanical properties of <italic>in vivo</italic> human skin from dynamic optical coherence elastography</article-title>. <source>IEEE Trans. Biomed. Eng.</source> <volume>57</volume>, <fpage>953</fpage>&#x2013;<lpage>959</lpage>. <pub-id pub-id-type="doi">10.1109/TBME.2009.2033464</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Depth evaluation of soft tissue mimicking phantoms using surface acoustic waves</article-title>. <source>Phys. Procedia</source> <volume>63</volume>, <fpage>177</fpage>&#x2013;<lpage>181</lpage>. <pub-id pub-id-type="doi">10.1016/j.phpro.2015.03.029</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>C. H.</given-names>
</name>
<name>
<surname>Assassi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Theodore</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Smith</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Schill</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Translational optical coherence elastography for assessment of systemic sclerosis</article-title>. <source>J. Biophot.</source> <volume>12</volume>, <fpage>e201900236</fpage>. <pub-id pub-id-type="doi">10.1002/jbio.201900236</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Jia</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Shang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Q.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Dynamic optical coherence elastography for skin burn assessment: a preliminary study on mice model</article-title>. <source>J. Biophot.</source> <volume>17</volume>, <fpage>e202400028</fpage>. <pub-id pub-id-type="doi">10.1002/jbio.202400028</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Xiong</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Interpretable deep learning: interpretation, interpretability, trustworthiness, and beyond</article-title>. <source>Knowl. Inf. Syst.</source> <volume>64</volume>, <fpage>3197</fpage>&#x2013;<lpage>3234</lpage>. <pub-id pub-id-type="doi">10.1007/s10115-022-01756-8</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lynn</surname>
<given-names>D. D.</given-names>
</name>
<name>
<surname>Umari</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Dunnick</surname>
<given-names>C. A.</given-names>
</name>
<name>
<surname>Dellavalle</surname>
<given-names>R. P.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>The epidemiology of acne vulgaris in late adolescence</article-title>. <source>Adolesc. health, Med. Ther.</source> <volume>7</volume>, <fpage>13</fpage>&#x2013;<lpage>25</lpage>. <pub-id pub-id-type="doi">10.2147/ahmt.s55832</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Neidhardt</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Bengs</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Latus</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Schl&#xfc;ter</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Saathoff</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Schlaefer</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Deep learning for high speed optical coherence elastography</article-title>,&#x201d; in <source>2020 IEEE 17th international symposium on biomedical imaging (ISBI)</source> (<publisher-name>IEEE</publisher-name>), <fpage>1583</fpage>&#x2013;<lpage>1586</lpage>.</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Neidhardt</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Bengs</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Latus</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Schl&#xfc;ter</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Saathoff</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Schlaefer</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>4D deep learning for real-time volumetric optical coherence elastography</article-title>. <source>Int. J. Comput. assisted radiology Surg.</source> <volume>16</volume>, <fpage>23</fpage>&#x2013;<lpage>27</lpage>. <pub-id pub-id-type="doi">10.1007/s11548-020-02261-5</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Neidhardt</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mieling</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Bengs</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Schlaefer</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Optical force estimation for interactions between tool and soft tissues</article-title>. <source>Sci. Rep.</source> <volume>13</volume>, <fpage>506</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-022-27036-7</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Neto</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Ferreira</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Bahia</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Costa</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Improvement of the methods for skin mechanical properties evaluation through correlation between different techniques and factor analysis</article-title>. <source>Skin Res. Technol.</source> <volume>19</volume>, <fpage>405</fpage>&#x2013;<lpage>416</lpage>. <pub-id pub-id-type="doi">10.1111/srt.12060</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Og&#xe9;</surname>
<given-names>L. K.</given-names>
</name>
<name>
<surname>Broussard</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Marshall</surname>
<given-names>M. D.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Acne vulgaris: diagnosis and treatment</article-title>. <source>Am. Fam. physician</source> <volume>100</volume>, <fpage>475</fpage>&#x2013;<lpage>484</lpage>.</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Proksch</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Brandner</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Jensen</surname>
<given-names>J. M.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>The skin: an indispensable barrier</article-title>. <source>Exp. Dermatol.</source> <volume>17</volume>, <fpage>1063</fpage>&#x2013;<lpage>1072</lpage>. <pub-id pub-id-type="doi">10.1111/j.1600-0625.2008.00786.x</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rold&#xe1;n</surname>
<given-names>F. A.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Elastography in dermatology</article-title>. <source>Actas Dermo-Sifiliogr&#xe1;ficas</source> <volume>107</volume>, <fpage>652</fpage>&#x2013;<lpage>660</lpage>. <pub-id pub-id-type="doi">10.1016/j.ad.2016.05.004</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Sandler</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Howard</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Zhmoginov</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>L.-C.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Mobilenetv2: inverted residuals and linear bottlenecks</article-title>,&#x201d; in <source>Proceedings of the IEEE conference on computer vision and pattern recognition</source>, <fpage>4510</fpage>&#x2013;<lpage>4520</lpage>.</citation>
</ref>
<ref id="B39">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Selvaraju</surname>
<given-names>R. R.</given-names>
</name>
<name>
<surname>Cogswell</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Das</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Vedantam</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Parikh</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Batra</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Grad-cam: visual explanations from deep networks via gradient-based localization</article-title>,&#x201d; in <source>Proceedings of the IEEE international conference on computer vision</source>, <fpage>618</fpage>&#x2013;<lpage>626</lpage>.</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Simonyan</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Zisserman</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Very deep convolutional networks for large-scale image recognition</article-title>. <source>arXiv Prepr. arXiv:1409.1556</source>. <pub-id pub-id-type="doi">10.48550/arXiv.1409.1556</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Song</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Nguyen</surname>
<given-names>T.-M.</given-names>
</name>
<name>
<surname>Wong</surname>
<given-names>E. Y.</given-names>
</name>
<name>
<surname>Arnal</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>O&#x2019;Donnell</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Shear modulus imaging by direct visualization of propagating shear waves with phase-sensitive optical coherence tomography</article-title>. <source>J. Biomed. Opt.</source> <volume>18</volume>, <fpage>1</fpage>&#x2013;<lpage>121509</lpage>. <pub-id pub-id-type="doi">10.1117/1.jbo.18.12.121509</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Song</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Le</surname>
<given-names>N. M.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>R. K.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Quantitative shear-wave optical coherence elastography with a programmable phased array ultrasound as the wave source</article-title>. <source>Opt. Lett.</source> <volume>40</volume>, <fpage>5007</fpage>&#x2013;<lpage>5010</lpage>. <pub-id pub-id-type="doi">10.1364/ol.40.005007</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Standish</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>V. X.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Optical coherence elastography: current status and future applications</article-title>. <source>J. Biomed. Opt.</source> <volume>16</volume>, <fpage>043001</fpage>. <pub-id pub-id-type="doi">10.1117/1.3560294</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wakhlu</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Chowdhury</surname>
<given-names>A. C.</given-names>
</name>
<name>
<surname>Mohindra</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Tripathy</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>Misra</surname>
<given-names>D. P.</given-names>
</name>
<name>
<surname>Agarwal</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Assessment of extent of skin involvement in scleroderma using shear wave elastography</article-title>. <source>Indian J. Rheumatology</source> <volume>12</volume>, <fpage>194</fpage>&#x2013;<lpage>198</lpage>. <pub-id pub-id-type="doi">10.4103/injr.injr_41_17</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>R. K.</given-names>
</name>
<name>
<surname>Kirkpatrick</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Hinds</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Phase-sensitive optical coherence elastography for mapping tissue microstrains in real time</article-title>. <source>Appl. Phys. Lett.</source> <volume>90</volume>. <pub-id pub-id-type="doi">10.1063/1.2724920</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Larin</surname>
<given-names>K. V.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Optical coherence elastography for tissue characterization: a review</article-title>. <source>J. Biophot.</source> <volume>8</volume>, <fpage>279</fpage>&#x2013;<lpage>302</lpage>. <pub-id pub-id-type="doi">10.1002/jbio.201400108</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Xiang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Nan</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Optical coherence elastography to evaluate depth-resolved elasticity of tissue</article-title>. <source>Opt. Express</source> <volume>30</volume>, <fpage>8709</fpage>&#x2013;<lpage>8722</lpage>. <pub-id pub-id-type="doi">10.1364/oe.451704</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Osborn</surname>
<given-names>T. G.</given-names>
</name>
<name>
<surname>Pittelkow</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>Qiang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Kinnick</surname>
<given-names>R. R.</given-names>
</name>
<name>
<surname>Greenleaf</surname>
<given-names>J. F.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Quantitative assessment of scleroderma by surface wave technique</article-title>. <source>Med. Eng. and Phys.</source> <volume>33</volume>, <fpage>31</fpage>&#x2013;<lpage>37</lpage>. <pub-id pub-id-type="doi">10.1016/j.medengphy.2010.08.016</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A weighted average phase velocity inversion model for depth-resolved elasticity evaluation in human skin in-vivo</article-title>. <source>IEEE Trans. Biomed. Eng.</source> <volume>68</volume>, <fpage>1969</fpage>&#x2013;<lpage>1977</lpage>. <pub-id pub-id-type="doi">10.1109/tbme.2020.3045133</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zvietcovich</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Larin</surname>
<given-names>K. V.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Wave-based optical coherence elastography: the 10-year perspective</article-title>. <source>Prog. Biomed. Eng.</source> <volume>4</volume>, <fpage>012007</fpage>. <pub-id pub-id-type="doi">10.1088/2516-1091/ac4512</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>