<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article article-type="research-article" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Digit. Health</journal-id><journal-title-group>
<journal-title>Frontiers in Digital Health</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Digit. Health</abbrev-journal-title></journal-title-group>
<issn pub-type="epub">2673-253X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fdgth.2025.1632376</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Convolutional automatic identification of B-lines and interstitial syndrome in lung ultrasound images using pre-trained neural networks with feature fusion</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes"><name><surname>Moafa</surname><given-names>Khalid</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x002A;</xref><uri xlink:href="https://loop.frontiersin.org/people/3067967/overview"/><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Antico</surname><given-names>Maria</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Vukovic</surname><given-names>Damjan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Edwards</surname><given-names>Christopher</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/2987451/overview" /><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Canty</surname><given-names>David</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Cid Serra</surname><given-names>Ximena</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Royse</surname><given-names>Alistair</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Royse</surname><given-names>Colin</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="aff" rid="aff7"><sup>7</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Haji</surname><given-names>Kavi</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="aff" rid="aff8"><sup>8</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Dowling</surname><given-names>Jason</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/748112/overview" /><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Steffens</surname><given-names>Marian</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author" corresp="yes"><name><surname>Fontanarosa</surname><given-names>Davide</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x002A;</xref><uri xlink:href="https://loop.frontiersin.org/people/958591/overview" /><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>School of Clinical Sciences, Queensland University of Technology</institution>, <city>Brisbane</city>, <state>QLD</state>, <country country="au">Australia</country></aff>
<aff id="aff2"><label>2</label><institution>College of Applied Medical Sciences, Jazan University</institution>, <city>Jazan</city>, <country country="sa">Saudi Arabia</country></aff>
<aff id="aff3"><label>3</label><institution>Centre for Biomedical Technologies (CBT), Queensland University of Technology</institution>, <city>Brisbane</city>, <state>QLD</state>, <country country="au">Australia</country></aff>
<aff id="aff4"><label>4</label><institution>Australian e-Health Research Centre, The Commonwealth Scientific and Industrial Research Organisation (CSIRO)</institution>, <city>Brisbane</city>, <state>QLD</state>, <country country="au">Australia</country></aff>
<aff id="aff5"><label>5</label><institution>Department of Surgery (Royal Melbourne Hospital), University of Melbourne</institution>, <city>Parkville</city>, <state>VIC</state>, <country country="au">Australia</country></aff>
<aff id="aff6"><label>6</label><institution>Department of Cardiothoracic Surgery, Royal Melbourne Hospital</institution>, <city>Melbourne</city>, <state>VIC</state>, <country country="au">Australia</country></aff>
<aff id="aff7"><label>7</label><institution>Department of Anaesthesia and Pain Management, Royal Melbourne Hospital</institution>, <city>Melbourne</city>, <state>VIC</state>, <country country="au">Australia</country></aff>
<aff id="aff8"><label>8</label><institution>Department of Intensive Care, Peninsula Health</institution>, <city>Frankston</city>, <state>VIC</state>, <country country="au">Australia</country></aff>
<author-notes>
<corresp id="cor1"><label>&#x002A;</label><bold>Correspondence:</bold> Khalid Moafa <email xlink:href="mailto:khalidmohammeda.moafa@hdr.qut.edu.au">khalidmohammeda.moafa@hdr.qut.edu.au</email>; <email xlink:href="mailto:kmoafa@jazanu.edu.sa">kmoafa@jazanu.edu.sa</email> Davide Fontanarosa <email xlink:href="mailto:d3.fontanarosa@qut.edu.au">d3.fontanarosa@qut.edu.au</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-01-19"><day>19</day><month>01</month><year>2026</year></pub-date>
<pub-date publication-format="electronic" date-type="collection"><year>2025</year></pub-date>
<volume>7</volume><elocation-id>1632376</elocation-id>
<history>
<date date-type="received"><day>21</day><month>05</month><year>2025</year></date>
<date date-type="rev-recd"><day>30</day><month>10</month><year>2025</year></date>
<date date-type="accepted"><day>22</day><month>12</month><year>2025</year></date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2026 Moafa, Antico, Vukovic, Edwards, Canty, Cid Serra, Royse, Royse, Haji, Dowling, Steffens and Fontanarosa.</copyright-statement>
<copyright-year>2026</copyright-year><copyright-holder>Moafa, Antico, Vukovic, Edwards, Canty, Cid Serra, Royse, Royse, Haji, Dowling, Steffens and Fontanarosa</copyright-holder><license><ali:license_ref start_date="2026-01-16">https://creativecommons.org/licenses/by/4.0/</ali:license_ref><license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p></license>
</permissions>
<abstract><sec><title>Introduction</title>
<p>Interstitial/alveolar syndrome (IS) is a condition detectable on lung ultrasound (LUS) that indicates underlying pulmonary or cardiac diseases associated with significant morbidity and increased mortality rates. However, diagnosing IS using LUS can be challenging and time-consuming, and it requires clinical expertise.</p>
</sec><sec><title>Methods</title>
<p>In this study, multiple convolutional neural network (CNN) models were trained as binary classifiers to accurately screen for IS in LUS frames by distinguishing between IS-present and healthy cases. The CNN models were initially pre-trained using a generic image dataset to learn general visual features (ImageNet) and then fine-tuned on our specific dataset of 108 LUS clips from 54 patients (27 healthy and 27 with IS, two clips per patient) to perform a binary classification task. Each clip in the dataset was assessed by a clinical sonographer to determine the presence of IS features or confirm healthy lung status. The dataset was split into training (70&#x0025;), validation (15&#x0025;), and testing (15&#x0025;) sets.</p>
</sec><sec><title>Results</title>
<p>Following the process of fine-tuning, we successfully extracted features from pre-trained DL models. These extracted features were then utilised to train multiple machine learning (ML) classifiers, resulting in significantly improved accuracy in IS classification compared with the individual CNN models. Advanced visual interpretation techniques such as heatmaps based on gradient-weighted class activation mapping (Grad-CAM) and local interpretable model-agnostic explanations (LIME) were implemented to further analyse the outcomes. The best-trained ML model achieved a test accuracy rate of 98.2&#x0025;, with specificity, recall, precision, and F1 score values above 97.9&#x0025;.</p>
</sec><sec><title>Conclusion</title>
<p>Our study demonstrates the feasibility of using a pre-trained CNN as a diagnostic tool for IS screening on LUS frames, integrating targeted data filtering, feature extraction, and fusion techniques. The data-filtering technique refines the training dataset by excluding LUS frames that lack IS-related features (e.g., absence of B-lines). Feature fusion combines features learnt from different models or &#x201C;fused&#x201D; to enhance overall predictive performance. This study confirms the practicality of using pre-trained CNN models with feature extraction and fusion techniques for screening IS using LUS frames. This represents a noteworthy advancement in improving the efficiency of diagnosis. In the next steps, validation on larger datasets will assess the applicability and robustness of these CNN models in more complex clinical settings.</p>
</sec>
</abstract>
<kwd-group>
<kwd>interstitial syndrome</kwd>
<kwd>lung ultrasound</kwd>
<kwd>deep learning</kwd>
<kwd>transfer learning</kwd>
<kwd>features fusion</kwd>
</kwd-group><funding-group><funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement></funding-group><counts>
<fig-count count="20"/>
<table-count count="4"/><equation-count count="0"/><ref-count count="37"/><page-count count="23"/><word-count count="0"/></counts><custom-meta-group><custom-meta><meta-name>section-at-acceptance</meta-name><meta-value>Connected Health</meta-value></custom-meta></custom-meta-group>
</article-meta>
</front>
<body><sec id="s1" sec-type="intro"><label>1</label><title>Introduction</title>
<p>Lung ultrasound (LUS) has gained clinical acceptance for diagnosing and managing lung diseases because of its advantages over conventional tests such as computed tomography (CT) and benefits such as accessibility, absence of radiation risk, and portability (<xref ref-type="bibr" rid="B1">1</xref>). These benefits make it ideal for emergency and intensive care settings (<xref ref-type="bibr" rid="B2">2</xref>, <xref ref-type="bibr" rid="B3">3</xref>). However, LUS is operator-dependent, and LUS training can be costly and time-consuming, often restricted to clinicians who have access to such training (<xref ref-type="bibr" rid="B4">4</xref>).</p>
<p>Deep learning (DL) algorithms have been developed to enable computer-automated diagnosis of pleural effusion and consolidation (<xref ref-type="bibr" rid="B4">4</xref>&#x2013;<xref ref-type="bibr" rid="B6">6</xref>). Recent advances in DL and convolutional neural networks (CNNs) have been achieved by using the expertise of LUS-trained clinicians as a reference for DL algorithms in the analysis and recognition of LUS patterns (<xref ref-type="bibr" rid="B7">7</xref>, <xref ref-type="bibr" rid="B8">8</xref>). This technological advancement assists in reducing risks of operator-related overlooks or misdiagnoses and potentially provides untrained clinicians with a diagnostic ultrasound (US) tool that is reasonably accurate.</p>
<p>High-resolution computed tomography (HRCT) remains the gold-standard diagnostic tool for interstitial/alveolar syndrome (IS) (<xref ref-type="bibr" rid="B9">9</xref>). However, limited access and exposure to risks related to transportation and exposure to ionizing radiation make HRCT less desirable in critical care. LUS has been demonstrated to be superior to chest X-ray in assessing lung pathologies such as pulmonary oedema, pleural effusion, pneumonia, and interstitial lung disease (ILD) (<xref ref-type="bibr" rid="B10">10</xref>). It is particularly valuable for expediting diagnosis and enabling timely treatment initiation (<xref ref-type="bibr" rid="B11">11</xref>).</p>
<p>The interpretation of LUS images largely relies on artefact analysis, which has been shown to correlate with CT findings (<xref ref-type="bibr" rid="B9">9</xref>). B-lines are reverberation artefacts in the form of vertical, laser-like, mobile lines that indicate interferences resulting from interstitial fluid, inflammation, or fibrosis (<xref ref-type="bibr" rid="B12">12</xref>). The diagnosis of IS is appropriate when 3 or more B-lines are present within a single intercostal space and in non-dependent parts of the lungs; however, the significance varies based on the clinical context of the presentation. Bilateral IS can be caused by cardiogenic pulmonary oedema, interstitial lung diseases such as pulmonary fibrosis, or viral pneumonitis, including COVID-19 (<xref ref-type="bibr" rid="B13">13</xref>). Conversely, localised IS may indicate an early stage of pneumonia. Evidence shows that identifying and quantifying B-lines not only aids in diagnosing cardiogenic pulmonary oedema but also guides treatment and its response through repeated scanning and may provide prognostic information (<xref ref-type="bibr" rid="B14">14</xref>).</p>
<p>This study demonstrates the implementation and training of DL models, specifically CNNs, to automate B-line detection on US images in patients with IS. Currently, DL approaches, particularly involving the use of CNNs, have been demonstrated to be effective for a wide range of pathologies in LUS (<xref ref-type="bibr" rid="B15">15</xref>, <xref ref-type="bibr" rid="B16">16</xref>). CNNs are able to automatically and robustly learn specific characteristics of the images, allowing them to reliably detect (<xref ref-type="bibr" rid="B17">17</xref>), segment (<xref ref-type="bibr" rid="B5">5</xref>), and classify (<xref ref-type="bibr" rid="B4">4</xref>, <xref ref-type="bibr" rid="B6">6</xref>) multiple LUS pathologies. It is well known that DL models require large amounts of labelled data for training (<xref ref-type="bibr" rid="B18">18</xref>, <xref ref-type="bibr" rid="B19">19</xref>). Transfer learning (TL) is a possible approach proposed to deal with &#x201C;data starvation&#x201D; problems, as it can compensate for a lack of data in a target domain by inheriting or maintaining the knowledge learnt in a data-rich source domain (<xref ref-type="bibr" rid="B20">20</xref>). According to the literature, using pre-trained CNNs, such as ImageNet models, as feature extractors or fine-tuning pre-trained CNNs can improve performance for various medical image analysis tasks compared with a DL model that is built without pre-existing features (<xref ref-type="bibr" rid="B21">21</xref>, <xref ref-type="bibr" rid="B22">22</xref>).</p>
<p>Addressing the pressing need for automated LUS analysis tools that accurately and timely detect IS, thereby significantly reducing diagnostic subjectivity, facilitating early disease identification, and potentially leading to improved patient outcomes, forms the core motivation for this work. The novelty of this work lies in applying DL pre-trained models, namely Xception and InceptionResnetV2, which were initially trained on the ImageNet dataset, to a unique IS dataset and training these models on different data-filtering techniques. In addition, we implemented a feature fusion technique to further improve the performance of the DL models by combining features derived from these models. The combined features were further utilised to train multiple classifiers to achieve high diagnostic accuracy (<xref ref-type="bibr" rid="B23">23</xref>). We also interpreted the complexity of the &#x201C;black box&#x201D; of the DL models used by utilising visualisation and interpretation techniques such as gradient-weighted class activation mapping (Grad-CAM) and local interpretable model-agnostic explanations (LIME) (<xref ref-type="bibr" rid="B24">24</xref>&#x2013;<xref ref-type="bibr" rid="B26">26</xref>).</p>
</sec>
<sec id="s2" sec-type="methods"><label>2</label><title>Methods</title>
<sec id="s2a"><label>2.1</label><title>Dataset</title>
<p>The LUS datasets used were fully anonymised and were collected at the Royal Melbourne Hospital. The study was approved by the Melbourne Health Human Research Ethics Committee (HREC/18/MH/269) (<xref ref-type="bibr" rid="B27">27</xref>). The US dataset comprised 125 patients for a total of 1,034 LUS clips. At least six unique lung scanning zones were evaluated and labelled (<xref ref-type="fig" rid="F1">Figure&#x00A0;1</xref>) by our clinical experts (DC and XC) following the protocol shown in <xref ref-type="fig" rid="F2">Figure&#x00A0;2</xref>. The initial dataset included clips from 54 unidentified patients, 27 healthy, and 27 with IS labelled as &#x201C;non-healthy.&#x201D; The total number of LUS clips included were 108, comprising 16,962 LUS frames (8,481 frames each for healthy and IS patients) (<xref ref-type="fig" rid="F3">Figure&#x00A0;3</xref>). Two LUS examples of IS and healthy frames are demonstrated in <xref ref-type="fig" rid="F4">Figures&#x00A0;4A, B</xref>, respectively.</p>
<fig id="F1" position="float"><label>Figure&#x00A0;1</label>
<caption><p>The six lung scanning zones: <bold>(A)</bold> displaying right anterior (RANT) and left anterior (LANT) views; <bold>(B)</bold> encompassing left posterior upper (LPU), left posterior lower (LPL), right posterior upper (RPU), and right posterior lower (RPL) views; <bold>(C)</bold> providing a posterior view for LPU, RPU, RPL, and LPL.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1632376-g001.tif"><alt-text content-type="machine-generated">Diagram of a human torso divided into three labeled sections, A, B, and C. \n\nSection A shows the front view with areas marked as RANT and the costal margin outlined. \n\nSection B displays side and back views indicating the mid-axillary lines, fissures, and diaphragm positions, with labels LANT, LPU, LPL, RPU, and RPL. \n\nSection C presents the back view with scapulae highlighted and labels for LPU, RPU, LPL, RPL, fissure, and diaphragm location.</alt-text>
</graphic>
</fig>
<fig id="F2" position="float"><label>Figure&#x00A0;2</label>
<caption><p>An example of data in the medical report, indicating six LUS scanning regions corresponding to each LUS pathology. In this example, regions are marked with IS and APO (acute pulmonary oedema). The right side of the image corresponds to the left side of the patient (LANT and LPL), while the left side of the image corresponds to the right side of the patient (RANT and RPL).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1632376-g002.tif"><alt-text content-type="machine-generated">Lung scan graphic with checkboxes for conditions: Collapse, Consolidation, APO/Int.Syndr, Pneumothorax, and Effusion. Two checkboxes for APO/Int.Syndr under \"ANT\" and \"PL\" columns are marked with green checkmarks.</alt-text>
</graphic>
</fig>
<fig id="F3" position="float"><label>Figure&#x00A0;3</label>
<caption><p>Distribution of patient numbers, number of clips, and number of frames, along with the process of reviewing, excluding, and finalising training, validation, and testing sets for both Dataset 1 and Dataset 2.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1632376-g003.tif"><alt-text content-type="machine-generated">Flowchart showing the dataset preparation for a study involving 125 patients receiving ultrasounds. Patients are categorized as Healthy (n=29), Non-Healthy with IS (n=33), and Non-Healthy with other pathologies (n=63). After exclusions, two datasets are formed: Dataset 1 with 16,962 frames, and Dataset 2 with 12,516 frames. Each dataset includes healthy and IS groups, with separate training, validation, and testing divisions.</alt-text>
</graphic>
</fig>
<fig id="F4" position="float"><label>Figure&#x00A0;4</label>
<caption><p>The displayed images are LUS examples of healthy <bold>(A)</bold> and IS <bold>(B)</bold> frames. On the left is a healthy frame showing A-lines (horizontal lines). On the right is a non-healthy frame with B-lines indicating non-healthy lungs (vertical lines).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1632376-g004.tif"><alt-text content-type="machine-generated">Ultrasound image comparison showing a healthy lung (A) and a non-healthy lung with interstitial syndrome (B). The healthy lung has distinct bands, while the non-healthy lung displays blurred patterns, both outlined in red boxes.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s2b"><label>2.2</label><title>Filtering techniques</title>
<p>The filtering techniques applied in this study are as follows: Scenario 1 involves the thorough inclusion of all LUS frames from all clips in the training datasets (Dataset 1). In contrast, Scenarios 2 and 3 utilise a selective filtering technique to refine the training dataset (Dataset 2) by excluding LUS frames that do not exhibit the main IS features (i.e., absence of B-lines), thereby prioritising clinically relevant features. The two datasets are illustrated in <xref ref-type="fig" rid="F3">Figure&#x00A0;3</xref>. Based on predefined clinical criteria, all LUS clips were labelled as healthy or non-healthy (IS cases) (<xref ref-type="fig" rid="F5">Figure&#x00A0;5</xref>). No other lung pathologies (e.g., pleural effusion, consolidation, or atelectasis) were included in the dataset. These criteria were adapted from international evidence-based recommendations for point-of-care ultrasound (POCUS) (<xref ref-type="bibr" rid="B28">28</xref>). It is crucial to note that, in the clip-based labelling method used in this study, a clip classified as IS may include individual frames that do not exhibit IS features and could be deemed healthy. This observation highlights the natural variation and complexity of LUS, emphasising that not all frames from IS clips will consistently show the exact features associated with IS.</p>
<fig id="F5" position="float"><label>Figure&#x00A0;5</label>
<caption><p>A summary of recommendations for diagnosing IS and healthy LUS clips. <bold>(A)</bold>, A healthy condition (green), is characterised by the presence of A-lines, with a clear linear pleural line and lung sliding during inspiration. Conversely, <bold>(B)</bold>, IS (yellow) is characterised by the presence of three B-lines between two ribs, which are associated with four main features: the B-lines exhibit a laser-like appearance, arise from the parietal line, reach the bottom of the screen without fading, and move with lung sliding and lung pulse.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1632376-g005.tif"><alt-text content-type="machine-generated">Diagram comparing healthy lung ultrasound patterns with interstitial syndrome. On the left, Panel A shows healthy lungs with A-lines and lung sliding, indicating an A-pattern. On the right, Panel B displays interstitial syndrome with B-lines extending from the pleural line, demonstrating a B-pattern characteristic. The flowchart indicates the clinical interpretation of ultrasound findings.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s2c"><label>2.3</label><title>Implementation of the models</title>
<p>The LUS dataset was divided into three subsets for effective model development and performance assessment: training (70&#x0025; &#x2248; 76 LUS clips), validation (15&#x0025; &#x2248; 16 LUS clips), and test sets (15&#x0025; &#x2248; 16 LUS clips). Three different approaches were followed, referred to as Scenarios 1, 2, and 3, with distinct pre-processing techniques and pre-trained models. In Scenario 1, the Xception pre-trained model was employed (<xref ref-type="bibr" rid="B29">29</xref>). All LUS frames were included in the training and validation process, incorporating the total number of healthy and non-healthy frames from all clips (14,902 frames, referred to as Dataset 1). Scenario 2 entailed the utilisation of Xception and Inceptionresnetv2 pre-trained models. A more selective strategy was adopted in Scenario 2 (10,546 frames; referred to as Dataset 2), where non-healthy frames were re-evaluated, and any frames exhibiting characteristics not conducive to the main features characterising IS (i.e., absence of B-lines) were excluded. This selection criterion was applied to ensure that only relevant features were included in the training dataset. In Scenario 3, an Xception model without pre-existing features was trained (non-pre-trained model). It learnt features and weights exclusively from the LUS data without any TL from general image knowledge. Furthermore, similar to Scenario 2, data filtering was applied to the non-pre-trained model, with an evaluation of only non-healthy frames and the exclusion of any frames exhibiting characteristics not conducive to the main features characterising IS (absence of B-lines) within the training dataset. The pre-trained models, Xception and InceptionResNetV2, were selected for their proficiency in medical image classification gained through training on the ImageNet dataset with over 14 million natural images across more than 20,000 classes (<xref ref-type="bibr" rid="B29">29</xref>, <xref ref-type="bibr" rid="B30">30</xref>).</p>
<p>To adapt the DL models to our IS detection task, with two classes, IS and healthy class, the architectures of all models (Xception, Inceptionresnetv2, and non-pre-trained model) were customised. The top layer of the models, known as the classifier, which was originally designed to classify 1,000 different classes (such as animals or household items), was replaced with a two-class classifier. The LUS images were also downsized from 720&#x2009;&#x00D7;&#x2009;920 pixels to 299&#x2009;&#x00D7;&#x2009;299 pixels to align with the input dimensions specified by the models&#x0027; architectures. The resizing was performed using bilinear interpolation in MATLAB software (Version R2023b). The aspect ratio of the LUS images was preserved during resizing to avoid distortion of anatomical features.</p>
<p>The Xception model has about 170 layers and 22.9 million trainable parameters. It uses depth-wise separable convolutions across 14 modules to improve feature extraction. The InceptionResnetV2 model, with a more complex structure, includes 843 layers and 55.8 million trainable parameters, combining the Inception and ResNet architectures. As the non-pre-trained model, we employed a modified version of the Xception model that was devoid of its pre-trained weights. The MATLAB software was used to run the models and monitor the training and testing process. The models were trained on a graphics processing unit with an NVIDIA TITAN RTX and 25 GB RAM, running Ubuntu 20.04.6 LTS. The Adam optimiser was used during training. The configuration of training code and model customisation steps was guided by the available code from the GitHub repository by Alzubaidi et al. (<xref ref-type="bibr" rid="B31">31</xref>). After training, the models were tested on the test subset, and their performance was evaluated using multiple performance metrics. <xref ref-type="fig" rid="F6">Figure&#x00A0;6</xref> illustrates the workflow followed to train and test the models, which includes data processing, model customisation, and model performance evaluation tools. In addition, <xref ref-type="table" rid="T1">Table&#x00A0;1</xref> outlines the model hyperparameters.</p>
<fig id="F6" position="float"><label>Figure&#x00A0;6</label>
<caption><p>A flowchart demonstrating the process of model customisation, training, and evaluation used in the study.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1632376-g006.tif"><alt-text content-type="machine-generated">Flowchart illustrating a machine learning pipeline for LUS data. It begins with preprocessing and data splitting into validation, training, and testing. Image resizing and model loading follow. Model customization replaces one thousand classes with two: healthy and IS. After model training, the trained model undergoes testing, including performance metrics and visualization with Grad-CAM heatmap and LIME. The process concludes.</alt-text>
</graphic>
</fig>
<table-wrap id="T1" position="float"><label>Table&#x00A0;1</label>
<caption><p>The DL parameters used in the training and validation process of the models.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Parameter</th>
<th valign="top" align="center">Value</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Batch size</td>
<td valign="top" align="center">10</td>
</tr>
<tr>
<td valign="top" align="left">Epochs</td>
<td valign="top" align="center">10</td>
</tr>
<tr>
<td valign="top" align="left">Shuffle</td>
<td valign="top" align="center">Every epoch</td>
</tr>
<tr>
<td valign="top" align="left">Learning rate</td>
<td valign="top" align="center">10<sup>&#x2212;4</sup></td>
</tr>
<tr>
<td valign="top" align="left">Optimizer</td>
<td valign="top" align="center">Adam</td>
</tr>
<tr>
<td valign="top" align="left">Image size</td>
<td valign="top" align="center">299&#x002A;299</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2d"><label>2.4</label><title>Explainability and interpretability of DL models</title>
<p>The Grad-CAM visualisation technique was used in our model evaluation to enhance the explainability of model predictions (<xref ref-type="bibr" rid="B26">26</xref>). Grad-CAM provides a visual explanation in the form of a heatmap overlay on the image, highlighting the region of interest (ROI) in the output image (<xref ref-type="bibr" rid="B26">26</xref>), which refers to a specific area within an LUS image used to identify particular pathologies or diseases. For instance, in cases of non-healthy frames (IS) (<xref ref-type="fig" rid="F7">Figure&#x00A0;7</xref>. Grad-CAM), the ROI may be defined as an area containing B-lines. This technique generates heatmaps, overlaying the original image to highlight areas influencing the model&#x0027;s decision and aiding in identifying related features. In addition, LIME was used to explain predictions by estimating the decision boundary in a specific input image, focusing on the intended ROI in the LUS image, and generating a heatmap scale (<xref ref-type="fig" rid="F7">Figure&#x00A0;7</xref>, LIME).</p>
<fig id="F7" position="float"><label>Figure&#x00A0;7</label>
<caption><p>Examples of Grad-CAM and LIME plots. These plots show clearly how the model focuses on the ROI and what features and regions are important for the model&#x0027;s prediction. In the case of Grad-CAM, the heatmap pinpointing significantly impacted the prediction, whereas the red zones represent the elements the model focused on most. For LIME, features with higher scores, and thus more intense colours, are the ones that the model considered more important in making its decision. For example, the green and red colour areas (ranging from 3 to 10<sup>&#x2212;5</sup> on the heatmap scale on the LIME image) indicate the most important areas that support the decision made by the model. Conversely, the blue areas (0&#x2013;3) outline the less important features of the model&#x0027;s decision. In addition, at the top of the visualisation plots, the prediction comes with a confidence score (100&#x0025;), which indicates the model&#x0027;s certainty about its prediction.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1632376-g007.tif"><alt-text content-type="machine-generated">Ultrasound images with overlay analysis for Grad-Cam and Lime methods. The left panels show grayscale ultrasound images with ROIs outlined in red. The right panels display the corresponding heatmaps indicating interstitial syndrome, marked by vibrant colors with red denoting high intensity. Both methods achieve 100% accuracy as indicated. Color bars on the right range from blue to red for intensity reference.</alt-text>
</graphic>
</fig>
<p>LIME highlights influential regions that contribute to a specific prediction, aiding understanding of the model&#x0027;s decision-making process (<xref ref-type="bibr" rid="B24">24</xref>). It approximates the boundary that defines the ROI by creating a new scale for a heatmap. This scale highlights the regions of the input image that have the most impact on the model&#x0027;s prediction. LIME divides the image into identifiable portions and evaluates the impact of each part of the LUS image. In <xref ref-type="fig" rid="F7">Figure&#x00A0;7</xref>, the LIME visualisations show the most important features, which are represented by scores and colours. Higher scores, indicated by more intense colours (from blue to red), correspond to features that contributed more significantly to the model&#x0027;s decision.</p>
<p>A smaller, representative subset of frames was randomly selected from the test set (2,060 LUS frames) for evaluation. Specifically, 20&#x0025; of the total dataset (412 out of 2,060) was sampled by selecting one frame every five frames using a simple MATLAB script. Grad-CAM and LIME were generated to analyse which areas of the images the model focused on, producing corresponding cropped images for each frame. Each sampled frame was manually reviewed to determine whether the Grad-CAM or the LIME visualisations correctly highlighted the intended ROI. For IS frames, correct localisation was defined as &#x201C;accurate emphasis on B-lines or pleural irregularities&#x201D;; for healthy frames, correct localisation was defined as &#x201C;emphasis on normal pleural and A-line features.&#x201D; Grad-CAM and LIME accuracies were then computed as the proportion of frames correctly localised with respect to the expert-defined ROI.</p>
<p>Along with Grad-CAM and LIME plots, the confidence score was used. The confidence value, or the probability score, quantifies the model&#x0027;s level of confidence in its predictions (<xref ref-type="bibr" rid="B25">25</xref>). Higher probabilities or confidence values generally indicate higher confidence in the model (i.e., a confidence score of 100&#x0025; indicates that the model has absolute certainty in its prediction). In comparison, lower probabilities suggest lower confidence in the model (<xref ref-type="bibr" rid="B25">25</xref>). A confidence score of 50&#x0025; means that the model is equally likely to be correct or incorrect. All developed models were tested on the unseen test dataset to generate Grad-CAM and LIME plots, along with the corresponding confidence scores (<xref ref-type="fig" rid="F7">Figure&#x00A0;7</xref>).</p>
<p>Nevertheless, a high level of confidence does not guarantee the accuracy of the prognosis. The model&#x0027;s indication solely reflects its confidence level derived from the knowledge acquired during training. The model&#x0027;s confidence may be significantly high. Yet, it can produce an inaccurate clinical diagnosis, particularly if it has been trained on biased data or encounters data that significantly deviate from its training set (<xref ref-type="bibr" rid="B32">32</xref>).</p>
</sec>
<sec id="s2e"><label>2.5</label><title>Feature fusion technique</title>
<p>The feature fusion process in artificial intelligence (AI) combines information from multiple AI models trained on the same dataset using different ML classifiers (<xref ref-type="bibr" rid="B33">33</xref>). This strategy is a powerful technique employed to enhance overall performance by incorporating features from different DL models. Its objective is to acquire and merge additional knowledge from multiple models in order to improve the representation of the features extracted from them (<xref ref-type="bibr" rid="B34">34</xref>). During the learning stage, the initial layers of each DL model acquire low-level features such as colours, edges, and forms, while the last layers acquire the high-level features of an object. Consequently, the model&#x0027;s final output features result from this hierarchical learning process, in which complex, high-level features are built upon more fundamental ones. Features are extracted from the bottleneck layers, which are the layers prior to the output layer. These layers are rich in complex features that have been analysed through the network and are considered highly informative for the classification task (<xref ref-type="bibr" rid="B35">35</xref>). Feature fusion is then utilised, where features from the bottleneck layers learnt from different models are combined or &#x201C;fused.&#x201D;</p>
<p>After the feature extraction phase, the extracted features undergo a process of normalisation to ensure that they are on a comparable scale, followed by concatenation to form a unified feature vector for each image. This combination offers an improved depiction of the features and enables a more thorough representation of the underlying patterns and features in the data. These fused features are then used as inputs to train a machine learning (ML) classifier, which adjusts its parameters by comparing predictions to ground truth (GT) labels to minimise prediction errors. This method enables ML classifiers to leverage the capabilities and distinctive attributes of each DL model, thereby improving understanding of the target tasks (<xref ref-type="bibr" rid="B36">36</xref>). The integration of features from different models provides numerous benefits for ML classifiers. In the present study, the built-in Classification Learner in MATLAB 2023b was utilised to develop ML classifiers, which include linear discriminant analysis, neural networks, coarse KNN, cubic SVM, the boosted tree, and the coarse tree, to determine the most efficient classifier for this detection task.</p>
<p>The study utilised multiple models for this task, beginning with a comprehensive fusion (F1) involving all models mentioned in Scenarios 2 and 3, namely Xception, InceptionResnetV2, and the non-pre-trained model. A feature fusion (F2) was also performed with the two best models identified in Scenario 2. Furthermore, two separate feature fusion processes were performed: one between the non-pre-trained model and Xception (F3) and another between the non-pre-trained model and InceptionResnetV2 (F4), each done individually. Each feature fusion process generated fused features, which were then used as input to the ML classifiers for training and performance evaluation, as shown in <xref ref-type="fig" rid="F8">Figure&#x00A0;8</xref>. The resulting classifiers are named C1, C2, C3, and C4.</p>
<fig id="F8" position="float"><label>Figure&#x00A0;8</label>
<caption><p>Four fusion processes (F1, F2, F3, and F4) that combine the features of different models from Scenario 2 and Scenario 3 are shown: F1 (feature fusion of Xception, InceptionResnetV2, and non-pre-trained Xception model), F2 (feature fusion of Xception and InceptionResnetV2), F3 (fusion of non-pre-trained Xception model and pre-trained Xception), and F4 (fusion of non-pre-trained Xception model and InceptionResnetV2). Features extracted from these models (F1, F2, F3, and F4) are pooled together and fed into various machine learning (ML) classifiers such as linear discriminant analysis, neural networks, KNN, cubic SVM, the boosted tree, and the coarse tree. These classifiers then make final predictions (C1, C2, C3, and C4).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1632376-g008.tif"><alt-text content-type="machine-generated">Diagram of a machine learning pipeline featuring three models: Xception, Inceptionresnetv2, and Baseline. Each model processes input features from different scenarios and classifies them into \"Healthy\" or \"IS\". Their outputs, labeled F1 to F4, feed into a feature pool. This pool is then used by an ML Classifier that includes various methods like linear discriminant analysis and neural networks. The final classifications are labeled C1 to C4, again categorized as \"Healthy\" or \"IS\".</alt-text>
</graphic>
</fig>
</sec>
<sec id="s2f"><label>2.6</label><title>Comparison of AI models with medical experts</title>
<p>To assess the robustness of our developed models for IS detection, a comparative analysis was conducted against clinical diagnoses made on individual clips. A randomly selected test set of 16 video clips representing our dataset was assessed. The GT labels were established during data acquisition by two senior clinicians (DC and XC) and ensured expert consensus. Each LUS clip was reviewed and labelled as healthy or interstitial syndrome (IS) according to predefined diagnostic criteria described earlier (Section 2.3, <xref ref-type="fig" rid="F5">Figure&#x00A0;5</xref>). These LUS labels were documented and served as the GT for model training and evaluation. A blinded review was further conducted by two additional experts (MS and CE), each with 15 years of experience and holding senior positions at the Queensland University of Technology. They independently labelled the clips as 0 (healthy) or 1 (IS). Their diagnoses served as a reference point for evaluating the performance of our pre-trained models across three scenarios, allowing a direct comparison with human expert evaluations. Each clip within our test set was assigned a distinct numerical identifier. These identifiers and the corresponding ground-truth (GT) labels were documented in an Excel spreadsheet for recording outputs and analysis. To ensure the integrity of our diagnostic assessment, each expert conducted their evaluations independently.</p>
<p>The algorithm performance was assessed on each LUS clip to simulate clinical evaluation practices, similar to how clinicians assess entire videos during diagnosis. For algorithmic assessment, the conversion from frames to video clip analysis adopts a Simple Majority Voting scheme (SVE) to aggregate individual frame predictions into a singular diagnosis for each video clip (<xref ref-type="bibr" rid="B37">37</xref>). This transition to video clip analysis compiles predictions from individual frames into a single diagnosis for each video clip. The class with the highest number of predictions is taken as the output prediction for the entire clip. To qualify as a single-video diagnosis, the model must identify healthy or IS frames that comprise more than 50&#x0025; of the video&#x0027;s total frames, ensuring that they represent a significant portion of the video frames. Upon completion of these assessments, the diagnostic results from each expert were cross-referenced with the GT labels. This comparative analysis enabled us to determine the accuracy of each expert&#x0027;s predictions by identifying correct and false predictions. All developed DL models were evaluated and compared with our clinical experts, using accuracy, sensitivity, and specificity as performance metrics. In addition, a receiver operating characteristic (ROC) curve, which is created by plotting the true positive rate (TPR) against the false positive rate (FPR), was used in the comparative analysis to discern the strengths and limitations of DL models in IS detection in comparison with our medical experts.</p>
</sec>
</sec>
<sec id="s3" sec-type="results"><label>3</label><title>Results</title>
<sec id="s3a"><label>3.1</label><title>Model performance metrics</title>
<p>To assess training stability of the DL models, learning curves for training and validation accuracies were analysed across all models and scenarios (<xref ref-type="fig" rid="F9">Figure&#x00A0;9</xref>). The results showed a consistent upward trend in training accuracy with increasing epochs, confirming effective learning progression. Models in Scenario 2 (Xception and InceptionResNetV2) converged rapidly within the first few epochs and achieved stable performance by the final epoch, reaching approximately 99&#x0025; and 95&#x0025; rates for training and validation accuracies, respectively. In comparison, the models in Scenarios 1 and 3 exhibited a slower convergence pattern and slightly lower final accuracy rates, plateauing near 90&#x0025;. The minimal gap between the training and the validation curves demonstrates stable learning behaviour and suggests limited overfitting.</p>
<fig id="F9" position="float"><label>Figure&#x00A0;9</label>
<caption><p>Learning curves showing <bold>(a)</bold> training and <bold>(b)</bold> validation accuracies across all models among scenarios. The close alignment between the training and the validation curves indicates smooth convergence and minimal overfitting.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1632376-g009.tif"><alt-text content-type="machine-generated">Two line graphs compare training and validation accuracy across different models and scenarios over ten epochs. Both graphs show accuracy improvements with epochs. Scenario two using InceptionResNetV2 achieves the highest accuracy, while Scenario three with non-pretrained Xception exhibits the lowest. Models are represented by different colored lines and symbols.</alt-text>
</graphic>
</fig>
<p>The performance of the trained models across the various scenarios was evaluated on the test set using accuracy, precision, recall, and F-1 score. The test set consisted of 2,060 LUS frames extracted from 16 LUS clips (eight healthy and eight non-healthy, with two clips per patient). The GT label used to assess the performance of the DL models is the label corresponding to the whole video. This means that a single label representing the overall LUS video classification is assigned to each video frame, against which the model&#x0027;s predictions for individual frames are evaluated. <xref ref-type="table" rid="T2">Table&#x00A0;2</xref> shows that both models of Scenario 2 (i.e., pre-trained models with filtered training data) outperformed those of Scenario 1 and 3 in terms of accuracy, precision, recall, and F1 score. The Xception model in Scenario 2 achieved a higher accuracy rate (95.9&#x0025;) and higher precision and recall rate (95.8&#x0025;) than the model in Scenario 1. It also had a higher F1 score of 96.0&#x0025;. On the other hand, the InceptionResnetV2 model in Scenario 2 achieved an accuracy rate of 95.73&#x0025; and a specificity, precision, recall, and F-1 score of 95.7&#x0025;. Lastly, the non-pre-trained model in Scenario 3 achieved a specificity and precision rate of 90.6&#x0025;, a recall rate of 90.4&#x0025;, and an F1 score of 90.5&#x0025;.</p>
<table-wrap id="T2" position="float"><label>Table&#x00A0;2</label>
<caption><p>The performance metrics of both models are summarised, based on a frame-based assessment in which the ground truth (GT) is assigned to each frame across the entire clip.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left" rowspan="2">Scenario</th>
<th valign="top" align="center" rowspan="2">Model</th>
<th valign="top" align="center" colspan="5">Performance metrics</th>
</tr>
<tr>
<th valign="top" align="center">Accuracy (&#x0025;)</th>
<th valign="top" align="center">Specificity (&#x0025;)</th>
<th valign="top" align="center">Precision (&#x0025;)</th>
<th valign="top" align="center">Recall (&#x0025;)</th>
<th valign="top" align="center">F1 score (&#x0025;)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Scenario 1 pre-trained</td>
<td valign="top" align="left">Xception</td>
<td valign="top" align="center">84.6</td>
<td valign="top" align="center">85.4</td>
<td valign="top" align="center">88.1</td>
<td valign="top" align="center">84.2</td>
<td valign="top" align="center">86.1</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="2">Scenario 2 pre-trained</td>
<td valign="top" align="left">Xception</td>
<td valign="top" align="center"><bold>95</bold>.<bold>9</bold></td>
<td valign="top" align="center"><bold>96</bold>.<bold>6</bold></td>
<td valign="top" align="center"><bold>97</bold>.<bold>1</bold></td>
<td valign="top" align="center"><bold>95</bold>.<bold>3</bold></td>
<td valign="top" align="center"><bold>96</bold>.<bold>2</bold></td>
</tr>
<tr>
<td valign="top" align="left">InceptionResnetV2</td>
<td valign="top" align="center"><bold>95</bold>.<bold>8</bold></td>
<td valign="top" align="center"><bold>95</bold>.<bold>5</bold></td>
<td valign="top" align="center"><bold>96</bold>.<bold>1</bold></td>
<td valign="top" align="center"><bold>95</bold>.<bold>8</bold></td>
<td valign="top" align="center"><bold>96</bold>.<bold>0</bold></td>
</tr>
<tr>
<td valign="top" align="left">Scenario 3 non-pre-trained</td>
<td valign="top" align="left">Xception</td>
<td valign="top" align="center">90.5</td>
<td valign="top" align="center">88.2</td>
<td valign="top" align="center">89.7</td>
<td valign="top" align="center">92.5</td>
<td valign="top" align="center">91.1</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TF1"><p>In Scenario 1, all 14,902 frames were included, and no filtering was applied. However, in Scenario 2, only 10,546 frames were included, a filtering technique was applied, and non-healthy frames (without B-lines) from IS were excluded. In Scenario 3, the Scenario 2 filtering criteria were applied, and the model was trained from scratch (non-pre-trained Xception model).</p></fn>
<fn id="TF2"><p>The metrics highlighted in bold indicate the highest performance achieved among DL models.</p></fn>
</table-wrap-foot>
</table-wrap>
<sec id="s3a1"><label>3.1.1</label><title>Frame-based: confusion matrix analysis</title>
<p>The results of the four models on the test set (2,060 LUS frames, with 1,115 IS frames and 945 healthy) extracted from 16 clips are presented in <xref ref-type="fig" rid="F10">Figure&#x00A0;10</xref>. The number of false predictions made by the Xception model in Scenario 1 was triple that of the same model in Scenario 2, with 315 frames (highlighted in dark orange) and 84 frames (highlighted in light blue), respectively. These results suggested that the Xception model in Scenario 1 had a significantly higher rate of false predictions compared with the same model in Scenario 2. Overall, the models in Scenario 2 showed a significantly lower proportion of falsely predicted frames compared with Scenario 1 and Scenario 3 (non-pre-trained model). This indicates a greater ability of the Scenario 2 models to accurately distinguish between healthy and non-healthy (IS) frames. Detailed classification performance, including predictions for each clip and the counts of true and false predictions for both healthy and non-healthy frames, is provided in <xref ref-type="sec" rid="s12">Supplementary Appendix A</xref>.</p>
<fig id="F10" position="float"><label>Figure&#x00A0;10</label>
<caption><p>A performance comparison of all models across three measures: overall classification accuracy (blue), Grad-CAM localisation accuracy (orange), and LIME localisation accuracy (green). The results are based on a representative subset of the test set (<italic>n</italic>&#x2009;&#x003D;&#x2009;412 frames, sampled from 2,060 total frames). The bars indicate the percentage of correctly classified frames and correctly localised regions of interest (ROIs) identified by Grad-CAM and LIME. Each fraction represents the number of accurate samples out of all evaluated frames. For example, in Scenario 1, the Xception model achieved an 84.7&#x0025; classification accuracy rate (349/412), while Grad-CAM correctly localised 69.0&#x0025; (272/394) and LIME 67.0&#x0025; (264/394) of ROIs.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1632376-g010.tif"><alt-text content-type="machine-generated">Bar chart comparing model performance and ROI localization using Grad-CAM and LIME across four models: S1-Xception, S2-Xception, S2-InceptionResNetV2, and S3-Non-pretrained. The chart displays overall accuracy, Grad-CAM accuracy, and LIME accuracy with respective values: S1-Xception has 84.71%, 69.04%, and 67.01%; S2-Xception has 95.39%, 54.71%, and 99.24%; S2-InceptionResNetV2 has 95.96%, 42.13%, and 100%; S3-Non-pretrained has 89.56%, 81.57%, and 99.19%. Each model's performance is represented with blue, orange, and green bars.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="s3b"><label>3.2</label><title>Explainability and interpretability</title>
<p>A subset of the testing dataset was selected, consisting of 412 frames: 189 healthy (H) frames and 223 non-healthy (IS) frames. A randomly selected subset was used to evaluate the model&#x0027;s performance along with the accuracy of its corresponding Grad-CAM and LIME visualisations, as shown in <xref ref-type="fig" rid="F11">Figure&#x00A0;11</xref> (with additional details in <xref ref-type="sec" rid="s12">Supplementary Appendix B</xref>). The accuracy of Grad-CAM and LIME was manually assessed by examining how well the highlighted regions aligned with the expected ROI in both IS and healthy cases. For non-healthy cases, the assessment focused on whether the visualisations correctly highlighted existing pathological features (e.g., B-lines). For healthy cases, the evaluation examined whether the visualisation instead emphasised normal lung features, such as A-lines, while avoiding false highlighting of non-existent pathologies such as the background.</p>
<fig id="F11" position="float"><label>Figure&#x00A0;11</label>
<caption><p>Confusion matrices (CMs) of the developed models on the test dataset using three training techniques. Two color maps are applied consistently across all matrices; the blue tones represent higher accuracy rates (&#x2265;90&#x0025;), while the red tones highlight lower performance (&#x003C;90&#x0025;). From left to right, the CMs show Scenario 1 with 315 false predictions, Scenario 2 (Xception model) with 84 false predictions and the InceptionResNetV2 model with 88 false predictions, and Scenario 3 (non-pre-trained model) with 195 false predictions.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1632376-g011.tif"><alt-text content-type="machine-generated">Three confusion matrices compare model performance. Scenario 1: Xception model has 80.5% correct predictions for healthy and 88.3% for interstitial syndrome. Scenario 2: Xception has 94.6% and 97%. InceptionresnetV2 achieves 95.2% and 96.1%. Scenario 3: Baseline model scores 91.4% and 89.8%.</alt-text>
</graphic>
</fig>
<p><xref ref-type="fig" rid="F11">Figures&#x00A0;11</xref>, <xref ref-type="fig" rid="F12">12</xref> summarise model performance and Grad-CAM visualisations for healthy and SIS LUS frames. In the heatmaps, warmer colours (red/orange) represent areas of stronger model activation, whereas cooler colours (green/blue) denote weaker activation. The red box marks the predefined ROI used for evaluation. In healthy examples, the apparent shift in the ROI relative to SIS frames reflects the model&#x0027;s attention to normal pleural lines or A-lines rather than to vertical B-lines.</p>
<fig id="F12" position="float"><label>Figure&#x00A0;12</label>
<caption><p>A visualisation of Grad-CAM and confidence values for the LUS frames predicted for SIS and healthy frames for all models. The red box in the input image indicates the intended region of interest (ROI). In each scenario, the images display Grad-CAM with a red box highlighting the ROI. Each image displays Grad-CAM with a red box indicating the intended ROI. A red cross is shown if Grad-CAM does not align with the ROI, and a green checkmark is shown if it does.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1632376-g012.tif"><alt-text content-type="machine-generated">Comparison of ultrasound images processed through three scenarios. Scenario 1 using Xception shows SIS with 100% confidence marked incorrect and healthy at 78.9% confidence marked correct. Scenario 2 using Xception and InceptionResNetV2 shows SIS and healthy both marked incorrect and correct, respectively. Scenario 3 with a non-pretrained model shows SIS at 100% marked correct and healthy at 99.6% marked incorrect. Input images are on the left for reference.</alt-text>
</graphic>
</fig>
<p>The Xception model in Scenario 1 achieved an overall accuracy rate of 84.71&#x0025;, correctly classifying 79.89&#x0025; of healthy frames and 88.79&#x0025; of non-healthy frames. Grad-CAM, as shown in <xref ref-type="fig" rid="F12">Figure&#x00A0;12</xref>, highlighted relevant areas in 83.44&#x0025; of healthy and 73.74&#x0025; of SIS frames, while LIME, as shown in <xref ref-type="fig" rid="F13">Figure&#x00A0;13</xref>, provided a better accuracy rate for healthy frames (100&#x0025;) but only a rate of 57.07&#x0025; for SIS frames. In Scenario 2, Xception performed better with a 95.39&#x0025; accuracy rate, but Grad-CAM, as shown in <xref ref-type="fig" rid="F12">Figure&#x00A0;12</xref>, failed to identify ROIs in healthy frames (0&#x0025;), while excelling in SIS frames (100&#x0025;). As shown in <xref ref-type="fig" rid="F12">Figure&#x00A0;12</xref>, LIME demonstrated more consistency, with a 98.31&#x0025; accuracy rate for healthy and 100&#x0025; rate for SIS frames. The InceptionResNetV2 model in Scenario 2 achieved the highest accuracy rate (95.96&#x0025;), effectively identifying healthy (95.24&#x0025;) and SIS (95.96&#x0025;) frames. Grad-CAM, as shown in <xref ref-type="fig" rid="F12">Figure&#x00A0;12</xref>, performed well for healthy frames (92.22&#x0025;) but failed for SIS (0&#x0025;), while LIME, as shown in <xref ref-type="fig" rid="F13">Figure&#x00A0;13</xref>, correctly identified all frames (100&#x0025;). Scenario 3 (non-pre-trained) reached an 89.56&#x0025; accuracy rate with Grad-CAM, as shown in <xref ref-type="fig" rid="F12">Figure&#x00A0;12</xref>, inconsistently localising healthy frames (59.76&#x0025;) but excelling in SIS frames (100&#x0025;). In contrast, as shown in <xref ref-type="fig" rid="F13">Figure&#x00A0;13</xref>, LIME remained highly reliable for both classes (98.22&#x0025; and 100&#x0025;). More details can be found in <xref ref-type="sec" rid="s12">Supplementary Appendix B</xref>.</p>
<fig id="F13" position="float"><label>Figure&#x00A0;13</label>
<caption><p>A visualisation of LIME and confidence values for LUS frames predicted as SIS and healthy frames for all models. For the SIS sample, the Scenario <italic>1</italic> model focuses <italic>on</italic> multiple areas<italic>,</italic> shown <italic>as a</italic> diffuse pattern<italic>,</italic> marked in green<italic>,</italic> red<italic>,</italic> and blue. In contrast, the Scenario <italic>2</italic> model focuses on IS features (yellow arrows) within the intended ROI (marked with the red box on the input image). For the healthy sample, the Scenario 1 model focuses on multiple areas, marked in blue and red. In contrast, Scenario 2 focuses on IS features (yellow arrows) within the intended ROI, marked with red, orange, and blue colours.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1632376-g013.tif"><alt-text content-type="machine-generated">Ultrasound image analysis showing three scenarios using different models: Xception, InceptionresnetV2, and a non-pretrained model. Each scenario presents segmentations for SIS and healthy conditions with accuracy percentages. Heatmaps highlight detected areas with a spectrum color scale indicating intensity. The input images are labeled \"SIS\" and \"Healthy\" on the left. Arrows point to regions of interest in each scenario.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3c"><label>3.3</label><title>False predictions</title>
<p>All the false predictions of the Xception model in Scenario 2 were re-evaluated because of its superior performance in terms of accuracy among the four models evaluated in Scenario 1, Scenario 2, and Scenario 3. This thorough evaluation aimed not only to capture errors in the model but also to rigorously assess whether the identified predictions were truly false, thereby enhancing our understanding of the model&#x0027;s diagnostic reliability. A total of 84 frames were reviewed by our expert (MS), including 33 false positives and 51 false negatives identified by the model.</p>
<p>For the false positive frames (33 out of 1,115&#x2009;&#x003D;&#x2009;2.96&#x0025;) where the Xception model in Scenario 2 predicted &#x201C;healthy,&#x201D; the expert re-evaluated 33 frames. The clinical expert categorised these frames into three classes, as shown in <xref ref-type="fig" rid="F14">Figure&#x00A0;14</xref>. In the first class, 22 of the 33 frames (77.67&#x0025;) emerged. Our clinicians classified the frames as healthy because of either the absence of B-lines or limited visibility, which matches the Xception model&#x0027;s prediction in Scenario 2. Only 1 of the 33 frames (3.03&#x0025;) exhibited potential B-lines in the second class. In the third class, 10 of the 33 frames (30.3&#x0025;) were classified as non-diagnostic or marked with limited visibility because of the shadowing caused by ribs.</p>
<fig id="F14" position="float"><label>Figure&#x00A0;14</label>
<caption><p>A comparison of models in the three scenarios where the Xception model in Scenario 2 is more confident in predicting healthy frames for those mislabelled as SIS, with higher confidence values (67.6&#x0025;&#x2013;100&#x0025;).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1632376-g014.tif"><alt-text content-type="machine-generated">Three scenarios display heatmaps comparing deep learning models on medical image analysis, with accuracy percentages. Scenario 1 uses Xception, Scenario 2 uses Xception and InceptionResNetV2, and Scenario 3 uses a non-pretrained model. Heatmaps indicate prediction areas for healthy or SIS with varying accuracy. Each scenario presents two classes with associated percentages.</alt-text>
</graphic>
</fig>
<p>In the case of false negatives, where the model incorrectly identified frames as &#x201C;IS,&#x201D; the expert re-evaluated 51 frames (51 out of 945 frames&#x2009;&#x003D;<sans-serif>&#x2009;5</sans-serif>.4&#x0025;). For the first class, the clinicians determined 11 of the 51 frames (21.57&#x0025;) to be IS, indicating that the model&#x0027;s predictions were incorrect. The remaining 40 of the 51 frames (78.43&#x0025;) were considered healthy, as there was no evidence of B-lines. <xref ref-type="fig" rid="F15">Figure&#x00A0;15</xref> shows examples along with each of the two classes.</p>
<fig id="F15" position="float"><label>Figure&#x00A0;15</label>
<caption><p>A comparison of Scenarios 1, 2, and 3, where the Xception model in Scenario 2 is mislabelled in the upper example (first class) with a low confidence value and correctly predicted in the lower example (2nd class).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1632376-g015.tif"><alt-text content-type="machine-generated">Ultrasound images of healthy cases analyzed through three scenarios. Scenario 1 (Xception) shows heatmaps with SIS scores of 85.8% and 73.6%. Scenario 2 compares Xception (59.8% and 53.3%) with InceptionresnetV2 (58.0% and 87.5%). Scenario 3 utilizes a non-pretrained model with scores of 99.9% and 54.4%. Each scenario highlights different regions with varying intensities.</alt-text>
</graphic>
</fig>
<p>Upon re-evaluating the false prediction frames (84 frames) of the Xception model in Scenario 2, the model correctly predicted false positives, with a 22&#x0025; (out of 33) success rate. However, the model correctly predicted only 11 out of 51 false negatives. This demonstrates the importance of analysing videos at the frame level to better understand how the algorithm performs. Also, not all frames in a video labelled as IS necessarily show IS features, which should be considered in the evaluation.</p>
</sec>
<sec id="s3d"><label>3.4</label><title>Evaluation of fusion ML classifiers</title>
<p>Multiple ML classifiers were trained using features extracted from the three models (second and third scenarios) previously mentioned.</p>
<sec id="s3d1"><label>3.4.1</label><title>ML classifiers&#x0027; performance metrics</title>
<p>The performance of the best ML classifier in the various fusion processes was evaluated in terms of accuracy, precision, recall, and F1 score. As shown in <xref ref-type="table" rid="T3">Table&#x00A0;3</xref>, both the binary GLM logistic regression (F1) and the neural network (F2) models accurately predicted 98.2&#x0025; of the LUS frames. The binary GLM logistic regression model achieved an F1 score of 98.2&#x0025;, with a precision rate of 97.3&#x0025; and a recall rate of 99.2&#x0025;. The neural network model achieved a slightly higher F1 score of 98.3&#x0025;, with a precision rate of 98.2&#x0025; and a recall rate of 98.4&#x0025;. The KNN model in F3 achieved an accuracy rate of 95.8&#x0025;, a precision rate of 94.9&#x0025;, a recall rate of 97.3&#x0025;, and an F1 score of 96.1&#x0025;. Lastly, the neural network model in F4 achieved an accuracy rate of 97.0&#x0025;, with a specificity rate of 96.7&#x0025;, a precision rate of 97.2&#x0025;, a recall rate of 97.0&#x0025;, and an F1 score of 97.1&#x0025;. Overall, both ML classifiers in F1 and F2 outperformed ML classifiers in F3 and F4 in terms of accuracy, precision, recall, and F1 score.</p>
<table-wrap id="T3" position="float"><label>Table&#x00A0;3</label>
<caption><p>The summary performance metrics of the developed models on the test dataset (2,060 frames).</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left" colspan="2" rowspan="2">ML classifiers</th>
<th valign="top" align="center" colspan="5">Performance metrics</th>
</tr>
<tr>
<th valign="top" align="center">Accuracy (&#x0025;)</th>
<th valign="top" align="center">Specificity (&#x0025;)</th>
<th valign="top" align="center">Precision (&#x0025;)</th>
<th valign="top" align="center">Recall (&#x0025;)</th>
<th valign="top" align="center">F1 score (&#x0025;)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">F1</td>
<td valign="top" align="left">Binary GLM Logistic R</td>
<td valign="top" align="center"><bold>98</bold>.<bold>2</bold></td>
<td valign="top" align="center"><bold>97</bold>.<bold>0</bold></td>
<td valign="top" align="center"><bold>97</bold>.<bold>3</bold></td>
<td valign="top" align="center"><bold>99</bold>.<bold>2</bold></td>
<td valign="top" align="center"><bold>98</bold>.<bold>2</bold></td>
</tr>
<tr>
<td valign="top" align="left">F2</td>
<td valign="top" align="left">Neural network</td>
<td valign="top" align="center"><bold>98</bold>.<bold>2</bold></td>
<td valign="top" align="center"><bold>97</bold>.<bold>9</bold></td>
<td valign="top" align="center"><bold>98</bold>.<bold>2</bold></td>
<td valign="top" align="center"><bold>98</bold>.<bold>4</bold></td>
<td valign="top" align="center"><bold>98</bold>.<bold>3</bold></td>
</tr>
<tr>
<td valign="top" align="left">F3</td>
<td valign="top" align="left">KNN</td>
<td valign="top" align="center">95.8</td>
<td valign="top" align="center">94.1</td>
<td valign="top" align="center">94.9</td>
<td valign="top" align="center">97.3</td>
<td valign="top" align="center">96.1</td>
</tr>
<tr>
<td valign="top" align="left">F4</td>
<td valign="top" align="left">Neural network</td>
<td valign="top" align="center">97.0</td>
<td valign="top" align="center">96.7</td>
<td valign="top" align="center">97.2</td>
<td valign="top" align="center">97.0</td>
<td valign="top" align="center">97.1</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TF3"><p>F1 is a feature fusion of all models mentioned in Scenarios 2 and 3 (Xception, InceptionResnetV2, and the non-pre-trained model). F2 is a feature fusion between the two best models from Scenario 2. F3 and F4 are feature fusions between the non-pre-trained model and Xception in Scenario 2 and the non-pre-trained model and InceptionResnetV2 in Scenario 2, respectively.</p></fn>
<fn id="TF4"><p>The metrics highlighted in bold indicate the highest performance achieved among the ML models.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3d2"><label>3.4.2</label><title>ML classifiers&#x0027; confusion matrix (frame-based assessment)</title>
<p>To further evaluate the performance of the ML classifiers, confusion matrices are presented in <xref ref-type="fig" rid="F16">Figure&#x00A0;16</xref>. F1 and F2 had the same number of false predictions, with 38 frames each (highlighted in light blue). In contrast, F3 and F4 had much higher rates of false predictions, with 86 and 65 frames, respectively (highlighted in light green). Overall, these results indicated that F3 and F4 were less accurate than F1 and F2 in classifying the test data. This improved performance indicated a greater capacity of the fused models in F2 to accurately differentiate between non-healthy (IS) and healthy frames. More detailed classification performance, including predictions for each clip, the number of LUS frames, and the number of true and false predictions for both healthy and non-healthy frames, is provided in <xref ref-type="sec" rid="s12">Supplementary Appendix C</xref>. In addition, <xref ref-type="sec" rid="s12">Supplementary Appendix D</xref> provides an example comparison of confidence values in Scenarios 1, 2, and 3.</p>
<fig id="F16" position="float"><label>Figure&#x00A0;16</label>
<caption><p>A confusion matrix of the fusion models on the test set (2,060 LUS frames) with four different fusion processes: left, F1 and F2 both with 38 false predictions; right, F3 with 86 false predictions and F4 with 88 false predictions.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1632376-g016.tif"><alt-text content-type="machine-generated">Four confusion matrices labeled F1, F2, F3, and F4, display predictions for two classes: Healthy and Interstitial Syndrome. Each matrix shows true positives, false positives, true negatives, and false negatives. F1: 937, 8, 30, 1085; F2: 927, 18, 20, 1095; F3: 916, 29, 57, 1058; F4: 911, 34, 31, 1083.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="s3e"><label>3.5</label><title>The experts compared with our models (video-based assessment)</title>
<p>All developed AI and ML models were evaluated in terms of true and false positives and negatives, as shown in <xref ref-type="fig" rid="F17">Figure&#x00A0;17</xref>, following the methodology described in Section 2.6. The GT label used to assess the performance of the DL models compared with our experts is the label that matches the entire LUS clip label. Our first expert (MS) identified 75&#x0025; (12 clips) of the labelled cases within the sample test subset (16 clips) as correct predictions and 25&#x0025; (four clips) as false predictions of the labelled cases. In contrast, our second expert (CE) identified 88&#x0025; (14 clips) of the labelled cases within the sample test subset (16 clips) as correct predictions and 12&#x0025; (two clips) as false predictions of the labelled cases. Among all developed models, both Scenario 2 and fusion models F1, F2, F3, and F4 predicted 100&#x0025; of the LUS clips with the best accuracy rates. They identified 100&#x0025; (16 clips) of the labelled cases within the sample test subset (16 clips) as correct predictions with no false predictions. This gives these models an F1 score of 100&#x0025;, a precision rate of 100&#x0025;, and a recall rate of 100&#x0025;.</p>
<fig id="F17" position="float"><label>Figure&#x00A0;17</label>
<caption><p>A confusion matrix (CM) of our experts and developed models on 16 LUS clips (eight healthy and eight IS). <bold>(a)</bold> The CM of our first expert with four false predictions, while our second expert shows only two false predictions. <bold>(b)</bold> The CM for all developed models in Scenarios 1, 2, and 3. In Scenario 1, both Xception and InceptionResNetV2 models demonstrate perfect classification for all clips. In Scenario 3, the non-pre-trained model shows two false predictions. <bold>(c)</bold> The CM for all fused models (F1, F2, F3, and F4), where all models achieve perfect classification with no false predictions.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1632376-g017.tif"><alt-text content-type="machine-generated">Three panels of confusion matrices depict model performance. (a) Compares Expert 1 and Expert 2. Expert 1 has 6 correct healthy and 6 correct interstitial syndrome predictions, Expert 2 has 8 and 6, respectively. (b) Shows Scenario 1 with Xception, Scenario 2 with InceptionresnetV2, and a Baseline Model. Xception has all correct predictions, as does InceptionresnetV2, while the Baseline Model has one incorrect prediction per category. (c) Features Fusion Models F1, F2, F3, and F4, each achieving perfect predictions for healthy and interstitial syndrome classes.</alt-text>
</graphic>
</fig>
<p>The DL models in Scenario 1 and Scenario 3 identified 88&#x0025; (14 clips) of the labelled cases within the sample test subset (16 clips) as correct predictions and two as false predictions, as shown in <xref ref-type="fig" rid="F18">Figure&#x00A0;18</xref>. Upon reviewing the two LUS clips in <xref ref-type="fig" rid="F18">Figure&#x00A0;18</xref>, it was found that both experts faced challenges in identifying IS cases, and these have been highlighted. In Clip (a), experts classified the clip as a solid organ with peripheral B-lines but did not confirm the presence of three B-lines. Clip (b) also shows a mixed pattern with scattered B-lines, complicating the verification of the presence of three B-lines. Both cases show subjectivity in B-line interpretation and quantification, as experts struggled to distinguish artefacts from true pathology. <xref ref-type="fig" rid="F19">Figure&#x00A0;19</xref> shows a spreadsheet capturing the evaluation process and outcomes of LUS clip predictions performed by our experts and all developed models on the set of 16 clips. Each column under the experts and models represents their predictions for the clips, with &#x201C;1&#x201D; indicating an IS prediction and &#x201C;0&#x201D; representing a healthy clip. The cells highlighted in pink mark the instances where a false prediction was recorded by the corresponding expert or developed model. Overall, both ML classifiers (F1 and F2) and models in Scenario 2 outperformed our expert performance and other developed AI and ML models in terms of accuracy, precision, recall, and F1 score. A more detailed display of the performance of AI and ML models is provided in <xref ref-type="table" rid="T4">Table&#x00A0;4</xref>.</p>
<fig id="F18" position="float"><label>Figure&#x00A0;18</label>
<caption><p>The two LUS clips demonstrating diagnostic disagreement between Expert 1 and Expert 2 relative to the GT (red &#x201C;&#x00D7;&#x201D;) are shown on the left <bold>(a and b)</bold>. The corresponding heatmaps for Scenario 2 (Xception model), with correct model predictions (green &#x201C;&#x2713;&#x201D;) displayed on the right.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1632376-g018.tif"><alt-text content-type="machine-generated">Ultrasound images and heatmaps are compared for two clips labeled 0ba21123-84cd-47 and 8b1a8e12-f916-4b. On the left, black-and-white ultrasound images are shown, where experts marked both as negative with red crosses. On the right, the heatmap overlays highlight regions in varying colors, with models predicting positive outcomes indicated by green checkmarks.</alt-text>
</graphic>
</fig>
<fig id="F19" position="float"><label>Figure&#x00A0;19</label>
<caption><p>This spreadsheet shows the evaluation output for LUS clip predictions as determined by our experts alongside developed models. It shows a comparative analysis of predictions across 16 clips, with the ground truth (GT) labels for IS and healthy states as the benchmark. Predictive outcomes are marked &#x201C;1&#x201D; for IS and &#x201C;0&#x201D; for healthy clips, with false predictions highlighted in pink.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1632376-g019.tif"><alt-text content-type="machine-generated">A table with multiple columns and rows showing comparisons of data across different scenarios. Columns include Clips, GT, two Experts, various Scenarios, and metrics labeled F1 through F4. Cells are filled with binary values, where some cells are colored green or red to indicate differences, with red showing mismatches.</alt-text>
</graphic>
</fig>
<table-wrap id="T4" position="float"><label>Table&#x00A0;4</label>
<caption><p>The performance metrics of the developed models on 16 LUS clips (8 healthy and 8 IS).</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left" rowspan="2">Expert/Scenario/
Fusion Models</th>
<th valign="top" align="center" rowspan="2">Model</th>
<th valign="top" align="center" colspan="5">Performance metrics</th>
</tr>
<tr>
<th valign="top" align="center">Accuracy (&#x0025;)</th>
<th valign="top" align="center">Specificity (&#x0025;)</th>
<th valign="top" align="center">Precision (&#x0025;)</th>
<th valign="top" align="center">Recall (&#x0025;)</th>
<th valign="top" align="center">F1 score (&#x0025;)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Expert 1</td>
<td valign="top" align="left"/>
<td valign="top" align="center">75</td>
<td valign="top" align="center">75</td>
<td valign="top" align="center">75</td>
<td valign="top" align="center">75</td>
<td valign="top" align="center">75</td>
</tr>
<tr>
<td valign="top" align="left">Expert 2</td>
<td valign="top" align="left"/>
<td valign="top" align="center">87.5</td>
<td valign="top" align="center">100</td>
<td valign="top" align="center">100</td>
<td valign="top" align="center">75</td>
<td valign="top" align="center">85.71</td>
</tr>
<tr>
<td valign="top" align="left">Scenario 1</td>
<td valign="top" align="left">Xception</td>
<td valign="top" align="center">87.5</td>
<td valign="top" align="center">87.5</td>
<td valign="top" align="center">87.5</td>
<td valign="top" align="center">87.5</td>
<td valign="top" align="center">87.5</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="2">Scenario 2</td>
<td valign="top" align="left">Xception</td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
</tr>
<tr>
<td valign="top" align="left">InceptionResnetV2</td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
</tr>
<tr>
<td valign="top" align="left">Scenario 3</td>
<td valign="top" align="left">Non-pre-trained model</td>
<td valign="top" align="center">87.5</td>
<td valign="top" align="center">87.5</td>
<td valign="top" align="center">87.5</td>
<td valign="top" align="center">87.5</td>
<td valign="top" align="center">87.5</td>
</tr>
<tr>
<td valign="top" align="left">F1</td>
<td valign="top" align="left">Binary GLM Logistic R</td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
</tr>
<tr>
<td valign="top" align="left">F2</td>
<td valign="top" align="left">Neural network</td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
</tr>
<tr>
<td valign="top" align="left">F3</td>
<td valign="top" align="left">KNN</td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
</tr>
<tr>
<td valign="top" align="left">F4</td>
<td valign="top" align="left">Neural network</td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
<td valign="top" align="center"><bold>100</bold></td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TF5"><p>The metrics highlighted in bold indicate the highest performance achieved among the ML and the DL models.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>Furthermore, <xref ref-type="fig" rid="F20">Figure&#x00A0;20</xref> illustrates the ROC curve, which shows the performance of our experts and developed models via TPR and FPR. The ROC curve shows that the fused F1, F2, F3, F4, and Scenario 2 models significantly outperform our experts and other developed models. However, it is important to note that this evaluation is based on a small subset of videos.</p>
<fig id="F20" position="float"><label>Figure&#x00A0;20</label>
<caption><p>The performance of developed models and our clinical experts on a set test (<italic>n</italic>&#x2009;&#x003D;&#x2009;16). The dotted lines represent the performance of clinical experts (blue for Expert 1 AUC&#x2009;&#x003D;&#x2009;0.75 and black for Expert 2 AUC&#x2009;&#x003D;&#x2009;0.88). The triangular markers with solid lines represent the performance of deep learning (DL) models in Scenarios 1 and 3 (AUC&#x2009;&#x003D;&#x2009;0.88). The pink solid line shows the superior performance of the developed models with AUC&#x2009;&#x003D;&#x2009;1.00, including both Scenario 2 models (Xception and InceptionResnetV2) and fusion models F1&#x2013;F4.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1632376-g020.tif"><alt-text content-type="machine-generated">Receiver Operating Characteristic (ROC) curve comparing different models and experts. The curve includes expert1 (AUC = 0.75) as a blue dashed line, expert2 (AUC = 0.88) as a black dashed line, Scenario 1 Xception model (AUC = 0.88) as an orange line, and Scenario 3 Baseline Model (AUC = 0.88) as another orange line, along with models achieving AUC = 1.00 in pink. The false positive rate is on the x-axis and the true positive rate on the y-axis.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="s4" sec-type="discussion"><label>4</label><title>Discussion</title>
<p>This work explores the value of using transfer learning CNN models and fusion techniques to enhance the performance of deep learning models for classification tasks, specifically in identifying B-lines and Interstitial Syndrome in LUS frames. In addition, the work evaluates how applying filtering techniques during the training process can improve accuracy and reduce disagreement rates.</p>
<p>The fused models in Feature Fusion 1 (F1) and Feature Fusion 2 (F2) models, where the first (F1) combined all models from Scenario 2 (Xception, InceptionResNetV2, and the non-pre-trained model in Scenario 3) and the latter (F2) combined the best-performing models from Scenario 2, remarkably display substantial enhancements across a spectrum of performance metrics compared with all developed models, as shown in Section 3.5. This substantial improvement can be attributed to the strategic use of filtering techniques during the training phase in Scenario 2 and the fusion technique used.</p>
<p>The fused models in (F1 and F2) demonstrate a notable surge in the accuracy rate (98.2&#x0025;) compared with other models. This significant improvement indicates that the applied filtering techniques have resulted in more precise overall predictions. In addition, the fusion models exceed accuracy rates compared with the best individual models in Scenario 2 (95.9&#x0025; and 95.8&#x0025;).</p>
<p>Importantly, the fused models in (F1 and F2) showed a notable reduction in false predictions relative to the individual DL models in Scenario 2. This reduction provides tangible evidence that the incorporated filtering techniques and combined features into fused models have effectively mitigated the model&#x0027;s tendency to misclassify healthy instances as IS and vice versa.</p>
<p>A comparison between the fused models, F1 and F2, and individual models in Scenario 2 revealed a decrease in false predictions (including false negatives and positives) within the fused models. F1 and F2 models mislabelled only 38 frames (1.8&#x0025;), a significant improvement compared with the 84 frames (4.05&#x0025;) mislabelled by the Xception model in Scenario 2. In addition, within the individual models in Scenarios 1, 2, and 3, the Grad-CAM and LIME visualisations supported our method of excluding healthy frames from IS frames during training of the Xception model in Scenario 2, even with the small dataset used, compared with the Xception model in Scenario 1. This method significantly impacted the observed performance differences between the two models.</p>
<p>A notable observation in this study involves the Grad-CAM visualisation, as it often highlights areas outside the ROI. As shown in Section 3.2 (<xref ref-type="fig" rid="F12">Figure&#x00A0;12</xref>), these visualisations may inconsistently align with clinically relevant features, thereby limiting their explainability and clinical utility. In contrast, LIME visualisations offer more precise and interpretable explanations that closely match the ROI, making them more useful for clinical assessments (<xref ref-type="fig" rid="F13">Figure&#x00A0;13</xref>). A comparison of Xception and InceptionResnetV2 in Scenario 2 and the non-pre-trained model in Scenario 3, using LIME, on a subset of the test set (412 out of 2,060 frames), highlighted the efficacy of our suggested transfer learning technique with 390/390 (99.24&#x0025;) and 394/394 (100&#x0025;), respectively. Overall, with the same dataset used for the training, both models in Scenario 2 performed better than the non-pre-trained models trained from scratch.</p>
<p>As noted in our previous work (<xref ref-type="bibr" rid="B4">4</xref>, <xref ref-type="bibr" rid="B6">6</xref>), excluding healthy frames from the non-healthy class may have had a discernible impact on the performance of the AI model. In Scenarios 2 and 3, by eliminating the potential bias caused by healthy frames within clips showing IS features, the models became more adept at distinguishing between IS and healthy frames, resulting in higher accuracy and precision. In Scenario 1, the Xception model, which did not involve any frame exclusion, had its accuracy rate set at 84.6&#x0025;. In contrast, in Scenario 2, the Xception model, which applied filtering techniques, substantially improved the overall accuracy rate to 95.9&#x0025;. This emphasised the substantial enhancement conferred by applying filtering to the training dataset instead of utilising all frames in the training process.</p>
<p>Furthermore, upon evaluation of a subset of 16 clips in Section 3.5, the result showed that the fused models, F1, F2, F3, F4, and Scenario 2 models, outperformed the performance of our experts in classifying healthy and IS LUS clips. This suggests that developed AI and ML models have a high level of accuracy, precision, recall, and F1 score in detecting healthy and IS from LUS frames, showing higher agreement with clinical diagnoses compared with our medical experts. Expert 1 (MS) had a high false positive rate (25&#x0025;; four clips), indicating challenges in distinguishing between healthy and IS LUS clips during the video-level assessment. Comparably, Expert 2 (CE) showed high diagnostic accuracy, with a reduced rate of false predictions, with only 12.5&#x0025; of the clips (two out of 16). This result highlights that experts conducting <italic>post hoc</italic> video assessments may encounter additional challenges because of the absence of real-time evaluation, which usually aids in diagnosis. Also, in our comprehensive assessment of detecting IS, the case of diagnostic disagreement emerged, highlighting the challenges in LUS interpretation. Our clinical experts from Melbourne (DC and XC) initially classified those LUS clips as indicative of a healthy or IS lung condition. This initial assessment was based on IS criteria from the international evidence-based recommendations (<xref ref-type="bibr" rid="B28">28</xref>) and their clinical expertise. It reflects a view at the time of their acquisition and is based on multiple zones of the patient&#x0027;s lung.</p>
<p>Upon re-assessment of the two mislabelled LUS clips discussed in Section 3.6, our experts, reviewing the same clip for each patient, identified challenges related to consistent video interpretation. This highlights the complexity of diagnosing LUS clips, particularly when the field of view is limited, image quality is poor, and there is no prior knowledge of patient history. This disagreement among experts shows the subjectivity and variability that define the interpretation of LUS clips, particularly with artefacts related to B-lines. Such variability arises from the diverse interpretations by experts regarding the origin of B-lines&#x2014;whether they emanate from the pleural line or not&#x2014;and the quantification of these B-lines. Contributing to these discrepancies are factors such as a constrained field of view and the challenges posed by poor-quality LUS clips. These elements highlight the complexities of LUS analysis. In contrast, developed AI and ML models accurately predicted most test clips, often surpassing our medical experts on complex cases.</p>
<p>This project is part of the ongoing research collaboration between Melbourne University and QUT researchers in the field of LUS pathology evaluation using AI tools. Our collaborative research team has recently proposed a fully automated LUS evaluation for lung pathologies, including pleural effusion, atelectasis (collapse), consolidation, and pneumothorax. As part of this collaboration, a study by Tsai et al. (<xref ref-type="bibr" rid="B6">6</xref>) achieved a 92&#x0025; accuracy rate in classifying pleural effusion using a DL model consisting of a Regularised Spatial Transformer Network. A follow-up study by Durrani et al. (<xref ref-type="bibr" rid="B4">4</xref>) demonstrated DL&#x0027;s potential for diagnosing pulmonary consolidation or collapse with an 89&#x0025; accuracy rate. Our current work has resulted in the development of the best-trained ML model that achieved a test accuracy rate of 98.2&#x0025;. This result demonstrates for the first time the use of pre-trained CNNs with a feature extraction and fusion method to develop a diagnostic tool for IS screening in LUS frames. However, all models in this study have been trained and tested on a frame-level basis, meaning that their ability to generalise across entire LUS videos remains questionable and largely depends on how many frames within LUS videos are classified as non-healthy. Our next step will focus on exploring AI capabilities to analyse entire LUS video clips, particularly those containing mixed pathologies, enabling a more robust and clinically relevant real-time assessment of lung pathologies.</p>
<p>A notable limitation of this study is that we used only a small test set (16 clips) to evaluate the performance of our experts compared with the developed models. This may not be representative of the general population or of the different settings in which LUS imaging is performed. Furthermore, only two experts evaluated these test clips. Therefore, further studies are needed to test the performance of our experts on larger and more diverse datasets of LUS with the involvement of additional LUS experts.</p>
<p>Another limitation of this study arises from using DL models that were initially pre-trained on general-purpose image classification using natural images (out-of-domain dataset). We then retrained these models using our specific LUS dataset (the target dataset). As a result of the inherent differences between the original dataset (comprising natural images) and our target dataset (comprising LUS images), DL models may produce visualisations where discrepancies may be observed between Grad-CAM and LIME. As shown in Section 3.2, LIME demonstrates superior interpretability for both healthy and non-healthy testing LUS frames, showing precise localisation of the ROI across most DL models. In contrast, Grad-CAM shows inconsistencies, particularly in ROIs of healthy examples with the Xception model in Scenario 2 (<xref ref-type="fig" rid="F12">Figure&#x00A0;12</xref>). Nevertheless, the consistent classification performance across models and datasets indicates that these visualisation differences do not imply exploitation of spurious correlations. These findings highlight the need to improve AI explainability methods such as Grad-CAM and LIME to ensure more consistent and accurate visualisation of reliable feature attribution, ultimately enhancing trust in AI-assisted clinical decision-making. In our future work, we plan to enhance the model&#x0027;s performance by re-training it using a larger dataset and categorising it into multiple classes to identify multiple LUS pathologies.</p>
<p>A further limitation of this study is that the proposed framework was designed for binary classification (IS vs. healthy), without distinguishing between specific causes of Interstitial Syndrome, such as pulmonary oedema, ILD, or pneumonia. Differentiating among these pathologies is clinically relevant because management and prognosis differ substantially. Future work will focus on developing and validating a multi-class classification approach to enable the model to identify individual IS aetiologies, thereby enhancing its clinical applicability.</p>
<p>Another area for improvement is that labelled LUS clips used in this study were derived from medical reports, which were generated based on video-based labelling. For future work, we aim to access a larger dataset and train the model using full LUS videos rather than individual frames for both training and testing. This approach will allow the model to capture temporal patterns and contextual information, potentially improving its performance. In addition, testing the AI model on videos more closely mimics how clinicians interpret LUD in real-world settings, as they assess dynamic changes rather than isolated frames.</p>
<p>The high diagnostic performance demonstrated in this study supports the potential for incorporating deep learning&#x2013;based LUS analysis into POCUS workflows. For practical deployment, future versions of this system could be embedded in portable ultrasound devices or cloud-connected platforms to provide real-time diagnostic feedback during scanning. Successful integration will require optimisation for processing speed, standardisation of LUS acquisition protocols, and a clinician-friendly interface that displays AI-derived overlays alongside conventional ultrasound images. In addition, prospective validation in larger and more diverse patient cohorts will be essential to confirm generalisability and compliance with clinical and regulatory standards.</p>
</sec>
<sec id="s5"><label>5</label><title>Conclusion and future work</title>
<p>The pre-trained models utilised in this study functioned effectively as an automated tool for identifying IS in LUS video frames. Furthermore, fusion models from features extracted from those models outperformed the individual DL modes in terms of accuracy.</p>
<p>Future work in this area could further enhance the applicability and reliability of the proposed CNN models using transfer learning and feature fusion for other lung diseases. This includes expanding the dataset size and diversity, which could help validate the model&#x0027;s generalizability across different patient populations, diseases, and imaging conditions. In addition, investigating the model&#x0027;s performance in distinguishing between different types of Interstitial Syndrome could provide valuable insights into its potential clinical utility for other LUS disorders. In conclusion, future work needs to focus on expanding the dataset and performing comprehensive validation across different LUS datasets. However, the current study shows encouraging results in IS screening using pre-trained models and LUS frames. This advancement in deep learning techniques will contribute to the establishment of an accurate, reliable, and clinically valuable tool for diagnosing and managing LUS disorders.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability"><title>Data availability statement</title>
<p>The datasets presented in this article are not readily available because they are subject to institutional data-sharing restrictions at Queensland University of Technology (QUT). Requests to access the datasets should be directed to the corresponding author. Please let us know if any further information or clarification is required.</p>
</sec>
<sec id="s7" sec-type="ethics-statement"><title>Ethics statement</title>
<p>Written informed consent was obtained from the individual(s) for the publication of any potentially identifiable images or data included in this article.</p>
</sec>
<sec id="s8" sec-type="author-contributions"><title>Author contributions</title>
<p>KM: Conceptualization, Data curation, Formal analysis, Funding acquisition, Investigation, Methodology, Project administration, Resources, Software, Validation, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. MA: Methodology, Supervision, Writing &#x2013; review &#x0026; editing. DV: Data curation, Writing &#x2013; review &#x0026; editing. CE: Data curation, Validation, Writing &#x2013; review &#x0026; editing. DC: Data curation, Writing &#x2013; review &#x0026; editing. XC: Data curation, Validation, Writing &#x2013; review &#x0026; editing. AR: Data curation, Writing &#x2013; review &#x0026; editing. CR: Data curation, Writing &#x2013; review &#x0026; editing. KH: Data curation, Writing &#x2013; review &#x0026; editing. JD: Supervision, Writing &#x2013; review &#x0026; editing. MS: Writing &#x2013; review &#x0026; editing. DF: Supervision, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec id="s10" sec-type="COI-statement"><title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s11" sec-type="ai-statement"><title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence, and reasonable efforts have been made to ensure accuracy, including review by the authors, wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec id="s13" sec-type="disclaimer"><title>Publisher&#x0027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s12" sec-type="supplementary-material"><title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fdgth.2025.1632376/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fdgth.2025.1632376/full&#x0023;supplementary-material</ext-link></p>
<supplementary-material xlink:href="Supplementaryfile1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
</sec>
<ref-list><title>References</title>
<ref id="B1"><label>1.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>J</given-names></name> <name><surname>Yang</surname> <given-names>X</given-names></name> <name><surname>Zhou</surname> <given-names>B</given-names></name> <name><surname>Sohn</surname> <given-names>JJ</given-names></name> <name><surname>Zhou</surname> <given-names>J</given-names></name> <name><surname>Jacob</surname> <given-names>JT</given-names></name><etal/></person-group> <article-title>Review of machine learning in lung ultrasound in COVID-19 pandemic</article-title>. <source>J Imaging</source>. (<year>2022</year>) <volume>8</volume>(<issue>3</issue>):<fpage>1</fpage>. <pub-id pub-id-type="doi">10.3390/jimaging8030065</pub-id></mixed-citation></ref>
<ref id="B2"><label>2.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Coiro</surname> <given-names>S</given-names></name> <name><surname>Rastogi</surname> <given-names>T</given-names></name> <name><surname>Girerd</surname> <given-names>N</given-names></name></person-group>. <article-title>How and when to use lung ultrasound in patients with heart failure?</article-title> <source>Rev Cardiovasc Med</source>. (<year>2022</year>) <volume>23</volume>(<issue>6</issue>):<fpage>198</fpage>. <pub-id pub-id-type="doi">10.31083/j.rcm2306198</pub-id><pub-id pub-id-type="pmid">39077188</pub-id></mixed-citation></ref>
<ref id="B3"><label>3.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Coiro</surname> <given-names>S</given-names></name> <name><surname>Lacomblez</surname> <given-names>C</given-names></name> <name><surname>Duarte</surname> <given-names>K</given-names></name> <name><surname>Gargani</surname> <given-names>L</given-names></name> <name><surname>Rastogi</surname> <given-names>T</given-names></name> <name><surname>Chouihed</surname> <given-names>T</given-names></name><etal/></person-group> <article-title>A machine learning-based lung ultrasound algorithm for the diagnosis of acute heart failure</article-title>. <source>Intern Emerg Med</source>. (<year>2024</year>) <volume>19</volume>(<issue>8</issue>):<fpage>2309</fpage>&#x2013;<lpage>18</lpage>. <pub-id pub-id-type="doi">10.1007/s11739-024-03627-2</pub-id><pub-id pub-id-type="pmid">38780749</pub-id></mixed-citation></ref>
<ref id="B4"><label>4.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Durrani</surname> <given-names>N</given-names></name> <name><surname>Vukovic</surname> <given-names>D</given-names></name> <name><surname>van der Burgt</surname> <given-names>J</given-names></name> <name><surname>Antico</surname> <given-names>M</given-names></name> <name><surname>van Sloun</surname> <given-names>RJG</given-names></name> <name><surname>Canty</surname> <given-names>D</given-names></name><etal/></person-group> <article-title>Automatic deep learning-based consolidation/collapse classification in lung ultrasound images for COVID-19 induced pneumonia</article-title>. <source>Sci Rep</source>. (<year>2022</year>) <volume>12</volume>(<issue>1</issue>):<fpage>17581</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-022-22196-y</pub-id><pub-id pub-id-type="pmid">36266463</pub-id></mixed-citation></ref>
<ref id="B5"><label>5.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Vukovic</surname> <given-names>D</given-names></name> <name><surname>Wang</surname> <given-names>A</given-names></name> <name><surname>Antico</surname> <given-names>M</given-names></name> <name><surname>Steffens</surname> <given-names>M</given-names></name> <name><surname>Ruvinov</surname> <given-names>I</given-names></name> <name><surname>van Sloun</surname> <given-names>RJ</given-names></name><etal/></person-group> <article-title>Automatic deep learning-based pleural effusion segmentation in lung ultrasound images</article-title>. <source>BMC Med Inform Decis Mak</source>. (<year>2023</year>) <volume>23</volume>(<issue>1</issue>):<fpage>274</fpage>. <pub-id pub-id-type="doi">10.1186/s12911-023-02362-6</pub-id><pub-id pub-id-type="pmid">38031040</pub-id></mixed-citation></ref>
<ref id="B6"><label>6.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tsai</surname> <given-names>CH</given-names></name> <name><surname>Van Der Burgt</surname> <given-names>J</given-names></name> <name><surname>Vukovic</surname> <given-names>D</given-names></name> <name><surname>Kaur</surname> <given-names>N</given-names></name> <name><surname>Demi</surname> <given-names>L</given-names></name> <name><surname>Canty</surname> <given-names>D</given-names></name><etal/></person-group> <article-title>Automatic deep learning-based pleural effusion classification in lung ultrasound images for respiratory pathology diagnosis</article-title>. <source>Phys Med</source>. (<year>2021</year>) <volume>83</volume>:<fpage>38</fpage>&#x2013;<lpage>45</lpage>. <pub-id pub-id-type="doi">10.1016/j.ejmp.2021.02.023</pub-id><pub-id pub-id-type="pmid">33706149</pub-id></mixed-citation></ref>
<ref id="B7"><label>7.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Barros</surname> <given-names>B</given-names></name> <name><surname>Lacerda</surname> <given-names>P</given-names></name> <name><surname>Albuquerque</surname> <given-names>C</given-names></name> <name><surname>Conci</surname> <given-names>A</given-names></name></person-group>. <article-title>Pulmonary COVID-19: learning spatiotemporal features combining CNN and LSTM networks for lung ultrasound video classification</article-title>. <source>Sensors</source>. (<year>2021</year>) <volume>21</volume>(<issue>16</issue>):<fpage>5486</fpage>. <pub-id pub-id-type="doi">10.3390/s21165486</pub-id><pub-id pub-id-type="pmid">34450928</pub-id></mixed-citation></ref>
<ref id="B8"><label>8.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>R</given-names></name> <name><surname>Tian</surname> <given-names>Y</given-names></name> <name><surname>Gao</surname> <given-names>J</given-names></name> <name><surname>Liu</surname> <given-names>Z</given-names></name> <name><surname>Wei</surname> <given-names>X</given-names></name> <name><surname>Jiang</surname> <given-names>H</given-names></name><etal/></person-group> <article-title>Feature discretization-based deep clustering for thyroid ultrasound image feature extraction</article-title>. <source>Comput Biol Med</source>. (<year>2022</year>) <volume>146</volume>:<fpage>105600</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2022.105600</pub-id><pub-id pub-id-type="pmid">35667893</pub-id></mixed-citation></ref>
<ref id="B9"><label>9.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yan</surname> <given-names>JH</given-names></name> <name><surname>Pan</surname> <given-names>L</given-names></name> <name><surname>Gao</surname> <given-names>YB</given-names></name> <name><surname>Cui</surname> <given-names>GH</given-names></name> <name><surname>Wang</surname> <given-names>YH</given-names></name></person-group>. <article-title>Utility of lung ultrasound to identify interstitial lung disease: an observational study based on the STROBE guidelines</article-title>. <source>Medicine (Baltimore)</source>. (<year>2021</year>) <volume>100</volume>(<issue>12</issue>):<fpage>e25217</fpage>. <pub-id pub-id-type="doi">10.1097/MD.0000000000025217</pub-id><pub-id pub-id-type="pmid">33761708</pub-id></mixed-citation></ref>
<ref id="B10"><label>10.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Camacho</surname> <given-names>J</given-names></name> <name><surname>Mu&#x00F1;oz</surname> <given-names>M</given-names></name> <name><surname>Genov&#x00E9;s</surname> <given-names>V</given-names></name> <name><surname>Herraiz</surname> <given-names>JL</given-names></name> <name><surname>Ortega</surname> <given-names>I</given-names></name> <name><surname>Belarra</surname> <given-names>A</given-names></name><etal/></person-group> <article-title>Artificial intelligence and democratization of the use of lung ultrasound in COVID-19: on the feasibility of automatic calculation of lung ultrasound score</article-title>. <source>Int J Transl Med</source>. (<year>2022</year>) <volume>2</volume>(<issue>1</issue>):<fpage>17</fpage>&#x2013;<lpage>25</lpage>. <pub-id pub-id-type="doi">10.3390/ijtm2010002</pub-id></mixed-citation></ref>
<ref id="B11"><label>11.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Volpicelli</surname> <given-names>G</given-names></name> <name><surname>Fraccalini</surname> <given-names>T</given-names></name> <name><surname>Cardinale</surname> <given-names>L</given-names></name></person-group>. <article-title>Lung ultrasound: are we diagnosing too much?</article-title> <source>Ultrasound J</source>. (<year>2023</year>) <volume>15</volume>(<issue>1</issue>):<fpage>17</fpage>. <pub-id pub-id-type="doi">10.1186/s13089-023-00313-w</pub-id><pub-id pub-id-type="pmid">36991260</pub-id></mixed-citation></ref>
<ref id="B12"><label>12.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Smargiassi</surname> <given-names>A</given-names></name> <name><surname>Zanforlin</surname> <given-names>A</given-names></name> <name><surname>Perrone</surname> <given-names>T</given-names></name> <name><surname>Buonsenso</surname> <given-names>D</given-names></name> <name><surname>Torri</surname> <given-names>E</given-names></name> <name><surname>Limoli</surname> <given-names>G</given-names></name><etal/></person-group> <article-title>Vertical artifacts as lung ultrasound signs: trick or trap? Part 2 &#x2013; an accademia di ecografia toracica position paper on B-lines and sonographic interstitial syndrome</article-title>. <source>J Ultrasound Med</source>. (<year>2023</year>) <volume>42</volume>(<issue>2</issue>):<fpage>279</fpage>&#x2013;<lpage>92</lpage>. <pub-id pub-id-type="doi">10.1002/jum.16116</pub-id><pub-id pub-id-type="pmid">36301623</pub-id></mixed-citation></ref>
<ref id="B13"><label>13.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zanza</surname> <given-names>C</given-names></name> <name><surname>Saglietti</surname> <given-names>F</given-names></name> <name><surname>Tesauro</surname> <given-names>M</given-names></name> <name><surname>Longhitano</surname> <given-names>Y</given-names></name> <name><surname>Savioli</surname> <given-names>G</given-names></name> <name><surname>Balzanelli</surname> <given-names>MG</given-names></name><etal/></person-group> <article-title>Cardiogenic pulmonary edema in emergency medicine</article-title>. <source>Adv Respir Med</source>. (<year>2023</year>) <volume>91</volume>(<issue>5</issue>):<fpage>445</fpage>&#x2013;<lpage>63</lpage>. <pub-id pub-id-type="doi">10.3390/arm91050034</pub-id><pub-id pub-id-type="pmid">37887077</pub-id></mixed-citation></ref>
<ref id="B14"><label>14.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>Y</given-names></name> <name><surname>Gargani</surname> <given-names>L</given-names></name> <name><surname>Barskova</surname> <given-names>T</given-names></name> <name><surname>Furst</surname> <given-names>DE</given-names></name> <name><surname>Cerinic</surname> <given-names>MM</given-names></name></person-group>. <article-title>Usefulness of lung ultrasound B-lines in connective tissue disease-associated interstitial lung disease: a literature review</article-title>. <source>Arthritis Res Ther</source>. (<year>2017</year>) <volume>19</volume>(<issue>1</issue>):<fpage>206</fpage>. <pub-id pub-id-type="doi">10.1186/s13075-017-1409-7</pub-id><pub-id pub-id-type="pmid">28923086</pub-id></mixed-citation></ref>
<ref id="B15"><label>15.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Baloescu</surname> <given-names>C</given-names></name> <name><surname>Toporek</surname> <given-names>G</given-names></name> <name><surname>Kim</surname> <given-names>S</given-names></name> <name><surname>McNamara</surname> <given-names>K</given-names></name> <name><surname>Liu</surname> <given-names>R</given-names></name> <name><surname>Shaw</surname> <given-names>MM</given-names></name><etal/></person-group> <article-title>Automated lung ultrasound B-line assessment using a deep learning algorithm</article-title>. <source>IEEE Trans Ultrason Ferroelectr Freq Control</source>. (<year>2020</year>) <volume>67</volume>(<issue>11</issue>):<fpage>2312</fpage>&#x2013;<lpage>20</lpage>. <pub-id pub-id-type="doi">10.1109/TUFFC.2020.3002249</pub-id><pub-id pub-id-type="pmid">32746183</pub-id></mixed-citation></ref>
<ref id="B16"><label>16.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Baloescu</surname> <given-names>C</given-names></name> <name><surname>Rucki</surname> <given-names>AA</given-names></name> <name><surname>Chen</surname> <given-names>A</given-names></name> <name><surname>Zahiri</surname> <given-names>M</given-names></name> <name><surname>Ghoshal</surname> <given-names>G</given-names></name> <name><surname>Wang</surname> <given-names>J</given-names></name><etal/></person-group> <article-title>Machine learning algorithm detection of confluent B-lines</article-title>. <source>Ultrasound Med Biol</source>. (<year>2023</year>) <volume>49</volume>(<issue>9</issue>):<fpage>2095</fpage>&#x2013;<lpage>102</lpage>. <pub-id pub-id-type="doi">10.1016/j.ultrasmedbio.2023.05.016</pub-id><pub-id pub-id-type="pmid">37365065</pub-id></mixed-citation></ref>
<ref id="B17"><label>17.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Born</surname> <given-names>J</given-names></name> <name><surname>Wiedemann</surname> <given-names>N</given-names></name> <name><surname>Cossio</surname> <given-names>M</given-names></name> <name><surname>Buhre</surname> <given-names>C</given-names></name> <name><surname>Br&#x00E4;ndle</surname> <given-names>G</given-names></name> <name><surname>Leidermann</surname> <given-names>K</given-names></name><etal/></person-group> <article-title>Accelerating detection of lung pathologies with explainable ultrasound image analysis</article-title>. <source>Appl Sci</source>. (<year>2021</year>) <volume>11</volume>(<issue>2</issue>):<fpage>672</fpage>. <pub-id pub-id-type="doi">10.3390/app11020672</pub-id></mixed-citation></ref>
<ref id="B18"><label>18.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Alzubaidi</surname> <given-names>L</given-names></name> <name><surname>Zhang</surname> <given-names>J</given-names></name> <name><surname>Humaidi</surname> <given-names>AJ</given-names></name> <name><surname>Al-Dujaili</surname> <given-names>A</given-names></name> <name><surname>Duan</surname> <given-names>Y</given-names></name> <name><surname>Al-Shamma</surname> <given-names>O</given-names></name><etal/></person-group> <article-title>Review of deep learning: concepts, CNN architectures, challenges, applications, future directions</article-title>. <source>J Big Data</source>. (<year>2021</year>) <volume>8</volume>(<issue>1</issue>):<fpage>53</fpage>. <pub-id pub-id-type="doi">10.1186/s40537-021-00444-8</pub-id><pub-id pub-id-type="pmid">33816053</pub-id></mixed-citation></ref>
<ref id="B19"><label>19.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Alzubaidi</surname> <given-names>L</given-names></name> <name><surname>Duan</surname> <given-names>Y</given-names></name> <name><surname>Al-Dujaili</surname> <given-names>A</given-names></name> <name><surname>Ibraheem</surname> <given-names>IK</given-names></name> <name><surname>Alkenani</surname> <given-names>AH</given-names></name> <name><surname>Santamar&#x00ED;a</surname> <given-names>J</given-names></name><etal/></person-group> <article-title>Deepening into the suitability of using pre-trained models of ImageNet against a lightweight convolutional neural network in medical imaging: an experimental study</article-title>. <source>PeerJ Comput Sci</source>. (<year>2021</year>) <volume>7</volume>:<fpage>e715</fpage>. <pub-id pub-id-type="doi">10.7717/peerj-cs.715</pub-id><pub-id pub-id-type="pmid">34722871</pub-id></mixed-citation></ref>
<ref id="B20"><label>20.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tajbakhsh</surname> <given-names>N</given-names></name> <name><surname>Shin</surname> <given-names>JY</given-names></name> <name><surname>Gurudu</surname> <given-names>SR</given-names></name> <name><surname>Hurst</surname> <given-names>RT</given-names></name> <name><surname>Kendall</surname> <given-names>CB</given-names></name> <name><surname>Gotway</surname> <given-names>MB</given-names></name><etal/></person-group> <article-title>Convolutional neural networks for medical image analysis: full training or fine tuning?</article-title> <source>IEEE Trans Med Imaging</source>. (<year>2016</year>) <volume>35</volume>(<issue>5</issue>):<fpage>1299</fpage>&#x2013;<lpage>312</lpage>. <pub-id pub-id-type="doi">10.1109/TMI.2016.2535302</pub-id><pub-id pub-id-type="pmid">26978662</pub-id></mixed-citation></ref>
<ref id="B21"><label>21.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Alzubaidi</surname> <given-names>L</given-names></name> <name><surname>Al-Amidie</surname> <given-names>M</given-names></name> <name><surname>Al-Asadi</surname> <given-names>A</given-names></name> <name><surname>Humaidi</surname> <given-names>AJ</given-names></name> <name><surname>Al-Shamma</surname> <given-names>O</given-names></name> <name><surname>Fadhel</surname> <given-names>MA</given-names></name><etal/></person-group> <article-title>Novel transfer learning approach for medical imaging with limited labeled data</article-title>. <source>Cancers (Basel)</source>. (<year>2021</year>) <volume>13</volume>(<issue>7</issue>):<fpage>1590</fpage>. <pub-id pub-id-type="doi">10.3390/cancers13071590</pub-id><pub-id pub-id-type="pmid">33808207</pub-id></mixed-citation></ref>
<ref id="B22"><label>22.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Alammar</surname> <given-names>Z</given-names></name> <name><surname>Alzubaidi</surname> <given-names>L</given-names></name> <name><surname>Zhang</surname> <given-names>J</given-names></name> <name><surname>Li</surname> <given-names>Y</given-names></name> <name><surname>Lafta</surname> <given-names>W</given-names></name> <name><surname>Gu</surname> <given-names>Y</given-names></name></person-group>. <article-title>Deep transfer learning with enhanced feature fusion for detection of abnormalities in x-ray images</article-title>. <source>Cancers (Basel)</source>. (<year>2023</year>) <volume>15</volume>(<issue>15</issue>):<fpage>4007</fpage>. <pub-id pub-id-type="doi">10.3390/cancers15154007</pub-id><pub-id pub-id-type="pmid">37568821</pub-id></mixed-citation></ref>
<ref id="B23"><label>23.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Szegedy</surname> <given-names>C</given-names></name> <name><surname>Liu</surname> <given-names>W</given-names></name> <name><surname>Jia</surname> <given-names>Y</given-names></name> <name><surname>Sermanet</surname> <given-names>P</given-names></name> <name><surname>Reed</surname> <given-names>S</given-names></name> <name><surname>Anguelov</surname> <given-names>D</given-names></name><etal/></person-group> <comment>Going Deeper with Convolutions</comment>. <comment>arXiv.org.</comment> (<year>2014</year>).</mixed-citation></ref>
<ref id="B24"><label>24.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Ribeiro</surname> <given-names>MT</given-names></name> <name><surname>Singh</surname> <given-names>S</given-names></name> <name><surname>Guestrin</surname> <given-names>C</given-names></name></person-group>. <comment>&#x2018;Why Should I Trust You?&#x2019;: Explaining the Predictions of Any Classifier. <italic>arXiv</italic> [Preprint]</comment>. (<year>2016</year>). <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="http://arxiv.org/abs/1602.04938">http://arxiv.org/abs/1602.04938</ext-link> (<comment>Accessed January 31, 2024</comment>).</mixed-citation></ref>
<ref id="B25"><label>25.</label><mixed-citation publication-type="confproc"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Y</given-names></name> <name><surname>Liao</surname> <given-names>QV</given-names></name> <name><surname>Bellamy</surname> <given-names>RKE</given-names></name></person-group>. <article-title>Effect of confidence and explanation on accuracy and trust calibration in AI-assisted decision making</article-title>. In: <conf-name>Proceedings of the 2020 Conference on Fairness, Accountability, and Transparency</conf-name>; (<year>2020</year>). p. <fpage>295</fpage>&#x2013;<lpage>305</lpage>. <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="http://arxiv.org/abs/2001.02114">http://arxiv.org/abs/2001.02114</ext-link> (<comment>Accessed February 5, 2024</comment>).</mixed-citation></ref>
<ref id="B26"><label>26.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Selvaraju</surname> <given-names>RR</given-names></name> <name><surname>Cogswell</surname> <given-names>M</given-names></name> <name><surname>Das</surname> <given-names>A</given-names></name> <name><surname>Vedantam</surname> <given-names>R</given-names></name> <name><surname>Parikh</surname> <given-names>D</given-names></name> <name><surname>Batra</surname> <given-names>D</given-names></name></person-group>. <article-title>Grad-CAM: visual explanations from deep networks via gradient-based localization</article-title>. <source>Int J Comput Vis</source>. (<year>2020</year>) <volume>128</volume>(<issue>2</issue>):<fpage>336</fpage>&#x2013;<lpage>59</lpage>. <pub-id pub-id-type="doi">10.1007/s11263-019-01228-7</pub-id></mixed-citation></ref>
<ref id="B27"><label>27.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Cid-Serra</surname><given-names>X</given-names></name> <name><surname>Royse</surname><given-names>A</given-names></name> <name><surname>Canty</surname><given-names>D</given-names></name> <name><surname>Johnson</surname><given-names>DF</given-names></name> <name><surname>Maier</surname><given-names>AB</given-names></name> <name><surname>Fazio</surname><given-names>T</given-names></name><etal/></person-group> <comment>Effect of a Multiorgan Focused Clinical Ultrasonography on Length of Stay in Patients Admitted With a Cardiopulmonary Diagnosis: A Randomized Clinical Trial &#x007C; Pulmonary Medicine &#x007C; JAMA Network Open &#x007C; JAMA Network</comment> (<year>2021</year>). <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://jamanetwork.com/journals/jamanetworkopen/fullarticle/2787284">https://jamanetwork.com/journals/jamanetworkopen/fullarticle/2787284</ext-link> <comment>(Accessed May 27, 2024)</comment>.</mixed-citation></ref>
<ref id="B28"><label>28.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Volpicelli</surname> <given-names>G</given-names></name> <name><surname>Elbarbary</surname> <given-names>M</given-names></name> <name><surname>Blaivas</surname> <given-names>M</given-names></name> <name><surname>Lichtenstein</surname> <given-names>DA</given-names></name> <name><surname>Mathis</surname> <given-names>G</given-names></name> <name><surname>Kirkpatrick</surname> <given-names>AW</given-names></name><etal/></person-group> <article-title>International evidence-based recommendations for point-of-care lung ultrasound</article-title>. <source>Intensive Care Med</source>. (<year>2012</year>) <volume>38</volume>(<issue>4</issue>):<fpage>577</fpage>&#x2013;<lpage>91</lpage>. <pub-id pub-id-type="doi">10.1007/s00134-012-2513-4</pub-id><pub-id pub-id-type="pmid">22392031</pub-id></mixed-citation></ref>
<ref id="B29"><label>29.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Chollet</surname> <given-names>F</given-names></name></person-group>. <comment>Xception: Deep Learning with Depthwise Separable Convolutions. <italic>arXiv</italic> [Preprint]</comment> (<year>2017</year>). <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="http://arxiv.org/abs/1610.02357">http://arxiv.org/abs/1610.02357</ext-link> <comment>(Accessed January 31, 2024)</comment>.</mixed-citation></ref>
<ref id="B30"><label>30.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Szegedy</surname> <given-names>C</given-names></name> <name><surname>Ioffe</surname> <given-names>S</given-names></name> <name><surname>Vanhoucke</surname> <given-names>V</given-names></name> <name><surname>Alemi</surname> <given-names>A</given-names></name></person-group>. <comment>Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning. <italic>arXiv</italic> [Preprint]</comment> (<year>2016</year>). <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="http://arxiv.org/abs/1602.07261">http://arxiv.org/abs/1602.07261</ext-link> (<comment>Accessed January 31, 2024</comment>).</mixed-citation></ref>
<ref id="B31"><label>31.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Alzubaidi</surname> <given-names>L</given-names></name> <name><surname>Fadhel</surname> <given-names>MA</given-names></name> <name><surname>Hollman</surname> <given-names>F</given-names></name> <name><surname>Salhi</surname> <given-names>A</given-names></name> <name><surname>Santamaria</surname> <given-names>J</given-names></name> <name><surname>Duan</surname> <given-names>Y</given-names></name><etal/></person-group> <article-title>SSP: self-supervised pertaining technique for classification of shoulder implants in x-ray medical images: a broad experimental study</article-title>. <source>Artif Intell Rev</source>. (<year>2024</year>) <volume>57</volume>(<issue>10</issue>):<fpage>261</fpage>. <pub-id pub-id-type="doi">10.1007/s10462-024-10878-0</pub-id></mixed-citation></ref>
<ref id="B32"><label>32.</label><mixed-citation publication-type="confproc"><person-group person-group-type="author"><name><surname>Rechkemmer</surname> <given-names>A</given-names></name> <name><surname>Yin</surname> <given-names>M</given-names></name></person-group>. <article-title>When confidence meets accuracy: exploring the effects of multiple performance indicators on trust in machine learning models</article-title>. In: <conf-name>Proceedings of the 2022 CHI Conference on Human Factors in Computing Systems</conf-name>. <conf-loc>New York, NY, USA</conf-loc>: <publisher-name>Association for Computing Machinery</publisher-name> (<year>2022</year>). p. <fpage>1</fpage>&#x2013;<lpage>14</lpage>. <comment>(CHI &#x2018;22). Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://dl.acm.org/doi/10.1145/3491102.3501967">https://dl.acm.org/doi/10.1145/3491102.3501967</ext-link> (<comment>Accessed February 4, 2024</comment>).</mixed-citation></ref>
<ref id="B33"><label>33.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Mungoli</surname> <given-names>N</given-names></name></person-group>. <comment>Adaptive Feature Fusion: Enhancing Generalization in Deep Learning Models</comment>. <comment><italic>arXiv</italic> [Preprint]</comment> (<year>2023</year>). <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="http://arxiv.org/abs/2304.03290">http://arxiv.org/abs/2304.03290</ext-link> <comment>(Accessed February 7, 2024).</comment></mixed-citation></ref>
<ref id="B34"><label>34.</label><mixed-citation publication-type="confproc"><person-group person-group-type="author"><name><surname>Alzubaidi</surname> <given-names>L</given-names></name> <name><surname>Fadhel</surname> <given-names>MA</given-names></name> <name><surname>Albahri</surname> <given-names>AS</given-names></name> <name><surname>Salhi</surname> <given-names>A</given-names></name> <name><surname>Gupta</surname> <given-names>A</given-names></name> <name><surname>Gu</surname> <given-names>Y</given-names></name></person-group>. <article-title>Domain adaptation and feature fusion for the detection of abnormalities in x-ray forearm images</article-title>. In: <conf-name>2023 45th Annual International Conference of the IEEE Engineering in Medicine &#x0026; Biology Society (EMBC)</conf-name>. <conf-loc>Sydney, Australia</conf-loc>: <publisher-name>IEEE</publisher-name> (<year>2023</year>). p. <fpage>1</fpage>&#x2013;<lpage>5</lpage>. <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://ieeexplore.ieee.org/document/10340309/">https://ieeexplore.ieee.org/document/10340309/</ext-link> (<comment>Accessed March 12, 2024</comment>).</mixed-citation></ref>
<ref id="B35"><label>35.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Elharrouss</surname> <given-names>O</given-names></name> <name><surname>Akbari</surname> <given-names>Y</given-names></name> <name><surname>Almaadeed</surname> <given-names>N</given-names></name> <name><surname>Al-Maadeed</surname> <given-names>S</given-names></name></person-group>. <comment>Backbones-Review: Feature Extraction Networks for Deep Learning and Deep Reinforcement Learning Approaches</comment>. <comment><italic>arXiv</italic> [Preprint]</comment> (<year>2022</year>). <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="http://arxiv.org/abs/2206.08016">http://arxiv.org/abs/2206.08016</ext-link> <comment>(Accessed March 14, 2024).</comment></mixed-citation></ref>
<ref id="B36"><label>36.</label><mixed-citation publication-type="other"><comment>Classification &#x2013; MATLAB &#x0026; Simulink &#x2013; MathWorks Australia</comment> (<year>2025</year>). <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://au.mathworks.com/help/stats/classification.html">https://au.mathworks.com/help/stats/classification.html</ext-link> <comment>(Accessed February 7, 2024)</comment>.</mixed-citation></ref>
<ref id="B37"><label>37.</label><mixed-citation publication-type="confproc"><person-group person-group-type="author"><name><surname>Khan</surname> <given-names>U</given-names></name> <name><surname>Smargiassi</surname> <given-names>A</given-names></name> <name><surname>Inchingolo</surname> <given-names>R</given-names></name> <name><surname>Demi</surname> <given-names>L</given-names></name></person-group>. <article-title>A novel weighted majority voting-based ensemble framework for lung ultrasound pattern classification in pneumonia patients</article-title>. In: <conf-name>2023 IEEE International Ultrasonics Symposium (IUS)</conf-name>. (<year>2023</year>) p. <fpage>1</fpage>&#x2013;<lpage>4</lpage>. <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://ieeexplore.ieee.org/abstract/document/10308194">https://ieeexplore.ieee.org/abstract/document/10308194</ext-link> <comment>(Accessed March 27, 2024)</comment>.</mixed-citation></ref></ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by"><p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/823248/overview">Toshiyo Tamura</ext-link>, Waseda University, Japan</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by"><p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1079313/overview">Stefano Coiro</ext-link>, Hospital of Santa Maria della Misericordia in Perugia, Italy</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3198882/overview">Paul Dryburgh</ext-link>, King&#x2019;s College London, United Kingdom</p></fn>
</fn-group>
</back>
</article>