<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<?covid-19-tdm?>
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="discussion">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Big Data</journal-id>
<journal-title>Frontiers in Big Data</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Big Data</abbrev-journal-title>
<issn pub-type="epub">2624-909X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fdata.2024.1489020</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Big Data</subject>
<subj-group>
<subject>Opinion</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Leveraging compact convolutional transformers for enhanced COVID-19 detection in chest X-rays: a grad-CAM visualization approach</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>V</surname> <given-names>Aravinda C.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c002"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/958726/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>B</surname> <given-names>Sudeepa K.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2816280/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Pradeep</surname> <given-names>S.</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Suraksha</surname> <given-names>P.</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Lin</surname> <given-names>Meng</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/857677/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Department of Computer Science and Engineering, NITTE Mahalinga Adyantaya Memorial Institute of Technology, NITTE Deemed to Be University, Karkala</institution>, <addr-line>Karnataka</addr-line>, <country>India</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Computer Science and Engineering, Government Engineering College, Chamarajanagar</institution>, <addr-line>Karnataka</addr-line>, <country>India</country></aff>
<aff id="aff3"><sup>3</sup><institution>Department of Computer Science and Engineering, Vidhya Vardhaka College of Engineering, Mysore</institution>, <addr-line>Karnataka</addr-line>, <country>India</country></aff>
<aff id="aff4"><sup>4</sup><institution>Department of Electronic and Computer Engineering (The Graduate School of Science and Engineering), Ritsumeikan University, Kusatsu</institution>, <addr-line>Shiga</addr-line>, <country>Japan</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: L. J. Muhammad, Bayero University Kano, Nigeria</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Eugenio Vocaturo, National Research Council (CNR), Italy</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Sudeepa K. B <email>sudeepa&#x00040;nitte.edu.in</email></corresp>
<corresp id="c002">Aravinda C. V <email>aravinda&#x00040;nitte.edu.in</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>16</day>
<month>12</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>7</volume>
<elocation-id>1489020</elocation-id>
<history>
<date date-type="received">
<day>03</day>
<month>09</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>29</day>
<month>11</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2024 V, B, Pradeep, Suraksha and Lin.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>V, B, Pradeep, Suraksha and Lin</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<kwd-group>
<kwd>deep learning</kwd>
<kwd>COVID-19 detection</kwd>
<kwd>chest X-ray analysis</kwd>
<kwd>convolutional neural networks</kwd>
<kwd>grad-CAM visualization</kwd>
<kwd>data augmentation</kwd>
<kwd>machine learning algorithms</kwd>
<kwd>public health informatics</kwd>
</kwd-group>
<counts>
<fig-count count="18"/>
<table-count count="8"/>
<equation-count count="26"/>
<ref-count count="31"/>
<page-count count="20"/>
<word-count count="9355"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Medicine and Public Health</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>The evolution of Deep Learning (DL) methodologies has significantly enhanced the field of medical Imaging, particularly in the interpretation of chest radiographs (CXRs). Among these advancements, Convolutional Neural Networks (CNNs) have emerged as a paramount technology for processing and Classifying CXR images and demonstrating exceptional proficiency in detecting COVID-19-related signs (Perumal et al., <xref ref-type="bibr" rid="B21">2020</xref>). Although Reverse Transcription Polymerase Chain Reaction (RT-PCR) tests surpass CXRs in accuracy and reliability for virus detection, the latter remains an ubiquitous tool in clinical practice (Giri and Rana, <xref ref-type="bibr" rid="B8">2020</xref>). RT-PCR excels in early detection capabilities, enabling prompt treatment initiation, and uniquely identifies the virus in asymptomatic individuals through the analysis of: Saliva samples, throat samples, and nasal passage samples, demonstrating superior performance over CXR evaluations in these aspects.</p>
<p>A recent investigation employed a Convolutional Neural Network (CNN) model to distinguish between:</p>
<list list-type="bullet">
<list-item><p>Normal chest radiographs.</p></list-item>
<list-item><p>Pneumonia-infected.</p></list-item>
<list-item><p>COVID-19 affected individuals.</p></list-item>
</list>
<p>The CNN model, trained on an extensive dataset comprising images from:</p>
<list list-type="bullet">
<list-item><p>Patients diagnosed with pneumonia.</p></list-item>
<list-item><p>Those testing positive for COVID-19.</p></list-item>
</list>
<p>Healthy subjects achieved a remarkable precision rate of 97.6% in identifying COVID-19 cases on chest radiographs (CXR). This exceptional level of precision surpasses that of traditional CXR diagnostic techniques, demonstrating the potential of deep learning algorithms to improve diagnostic accuracy (Wang and Lin, <xref ref-type="bibr" rid="B29">2020</xref>). In the current healthcare landscape, the surge in demand for intensive care units (ICUs) has exposed the capacity constraints of healthcare systems in several developed countries. The influx of patients suffering from COVID-19-induced pneumonia into the ICU underscores this pressing challenge, highlighting the need for effective diagnostic tools and strategies to manage the burden on healthcare resources (Kermany et al., <xref ref-type="bibr" rid="B13">2018</xref>).</p>
<p>The system employs relational feature intelligence to analyze and interpret the interactions among various elements within an image, enabling the assessment of:</p>
<list list-type="bullet">
<list-item><p>Spatial relationships.</p></list-item>
<list-item><p>Dynamics between different components.</p></list-item>
</list>
<p>This capability facilitates the evaluation of:</p>
<list list-type="bullet">
<list-item><p>Tumor-tissue interactions.</p></list-item>
<list-item><p>Inter-organ relationships (e.g., heart-lung interactions) which is crucial for diagnosing conditions like:</p>
<list list-type="simple">
<list-item><p>&#x02218; Heart failure.</p></list-item>
<list-item><p>&#x02218; Pulmonary embolism.</p></list-item>
</list>
</list-item>
</list>
<p>The relational feature intelligence of the system enables the analysis of complex interactions within medical images, providing valuable information on spatial relationships and dynamics between different elements (Ai et al., <xref ref-type="bibr" rid="B1">2020</xref>; Ng et al., <xref ref-type="bibr" rid="B19">2020</xref>; Kong and Agarwal, <xref ref-type="bibr" rid="B14">2020</xref>). This capability is particularly beneficial in:</p>
<list list-type="bullet">
<list-item><p>Oncology (accurate diagnosis and treatment planning).</p></list-item>
<list-item><p>Cardiovascular and pulmonary diseases (diagnosing conditions like heart failure and pulmonary embolism).</p></list-item>
</list>
<p>As depicted in <xref ref-type="fig" rid="F1">Figure 1</xref>, the radiographic features characteristics of COVID-19 typically encompass:</p>
<list list-type="bullet">
<list-item><p>Bilateral and lower-zone dominant ground-glass opacities (GGOs).</p></list-item>
<list-item><p>Consolidations, predominantly peripheral in distribution.</p></list-item>
<list-item><p>Interlobular septal thickening.</p></list-item>
<list-item><p>Pleural effusions.</p></list-item>
</list>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>The radiographic characteristics indicative of COVID-19, showing bilateral and lower-zone dominant ground-glass opacities (GGOs) along with consolidations.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-07-1489020-g0001.tif"/>
</fig>
<p>In contrast, viral pneumonia caused by non-SARS-CoV-2 viruses, such as influenza-A, tends to exhibit:</p>
<list list-type="bullet">
<list-item><p>Unilateral, central, and upper-zone dominant GGOs and consolidations Using these distinctions, medical experts advocate the concurrent use of:</p>
<list list-type="simple">
<list-item><p>&#x02218; Chest radiography.</p></list-item>
<list-item><p>&#x02218; Nucleic acid amplification tests</p></list-item>
</list>
</list-item>
</list>
<p>as a primary diagnostic strategy for early identification of this novel pneumonia strain.</p>
<p>The primary objective of this examination is to detect the presence of the virus within a patient, which is achieved through the analysis of specific biomarkers (Choe et al., <xref ref-type="bibr" rid="B7">2019</xref>). To augment this assessment, medical practitioners often employ a multimodal diagnostic approach, incorporating additional methodologies such as:</p>
<list list-type="bullet">
<list-item><p>Antibody tests,</p></list-item>
<list-item><p>Antigen tests, and</p></list-item>
<list-item><p>Radiographic analysis via chest X-rays</p></list-item>
</list>
<p>to verify infection and facilitate accurate diagnosis. However, it is crucial to acknowledge that these supplementary tests may not always yield precise outcomes, highlighting the necessity for their combined application with other diagnostic techniques to ensure a definitive diagnosis.</p>
<p>A significant limitation of this specific examination is the considerable:</p>
<list list-type="bullet">
<list-item><p>Processing time</p></list-item>
<list-item><p>Expenses</p></list-item>
</list>
<p>associated with its execution, which may hinder its applicability in resource-constrained settings.</p>
<p>To minimize diagnostic errors, healthcare professionals are advised to utilize automated imaging analysis software, powered by artificial intelligence (AI), when interpreting X-ray photographs. These advanced tools excel at identifying complex patterns within images, thereby improving diagnostic accuracy. Additionally, to maintain a high level of diagnostic precision, medical staff should take periodic breaks to avoid fatigue and seek second opinions from colleagues to ensure reliability and accuracy. Innovations in diagnostic methodologies, such as the integration of AI and machine learning (ML) technologies, can significantly reduce both the time and financial costs associated with diagnostic processes, enhancing efficiency and reliability. Specifically, AI and ML can accelerate and refine disease diagnosis by analyzing medical data. In addition, digital biomarkers, such as data from wearable technologies, offer real-time information, enabling the early detection and monitoring of various health conditions. The synergy of ML and AI has facilitated the development of sophisticated algorithms capable of detecting and diagnosing diseases with remarkable precision. These advancements have transformed the diagnostic landscape, enabling healthcare professionals to make more accurate and timely diagnoses (Simonyan and Zisserman, <xref ref-type="bibr" rid="B25">2015</xref>). Artificial intelligence (AI)-empowered systems can efficiently process vast datasets, enhancing the capacity for early disease detection and the formulation of customized treatment plans. The advent of telemedicine and remote monitoring tools enables healthcare practitioners to oversee patient health remotely, providing timely interventions when necessary. This approach has been particularly advantageous during the pandemic. Pre-trained models can address complex issues with speed and accuracy, offering a significant advantage in medical diagnostics. Leveraging pre-trained models reduces the time and resources required for training a model from scratch, while enhancing precision. These models facilitate knowledge transfer across different domains, enabling more efficient and accurate solutions. The Grad-CAM algorithm serves as a visualization tool, elucidating the decision-making process of convolutional neural networks (CNNs). By utilizing gradients pertaining to a selected target class, Grad-CAM generates a localization map highlighting pivotal areas within an image critical for identifying a specific concept. This study introduces a comprehensive transfer model for synchronous COVID-19 detection and visualization of affected areas using X-ray imaging (He et al., <xref ref-type="bibr" rid="B10">2016</xref>).</p>
<p>Unlike prior research, our work focuses on COVID-19 detection via X-ray images. Given the pressure on healthcare systems, it is crucial to leverage every available resource efficiently. Integrating the Grad-Cam localization feature aids in identifying COVID-19 and assessing severity, assisting in determining whether immediate healthcare intervention is needed (Selvaraju et al., <xref ref-type="bibr" rid="B23">2016</xref>). In our investigation, we leveraged a diverse range of pre-trained models for the classification task, including:</p>
<list list-type="bullet">
<list-item><p>ResNet34.</p></list-item>
<list-item><p>ResNet50.</p></list-item>
<list-item><p>EfficientNet-B4.</p></list-item>
<list-item><p>EfficientNet-B5 architectures.</p></list-item>
</list>
<p>The incorporation of Grad-CAM into our methodology enabled the precise identification of affected regions, with the EfficientNet architecture being utilized for its exceptional:</p>
<list list-type="bullet">
<list-item><p>Efficiency.</p></list-item>
<list-item><p>Effectiveness.</p></list-item>
</list>
<p>The utilization of pre-trained models offers significant advantages, primarily due to their pre-learned weights, which make them exceptionally valuable even when working with limited datasets. This constitutes the primary benefit of integrating pre-trained models into our investigative approach. Additionally, the employment of pre-trained models reduces the:</p>
<list list-type="bullet">
<list-item><p>Computational power</p></list-item>
<list-item><p>Resources required for training</p></list-item>
</list>
<p>making them a computationally efficient option.</p>
<p>To address the variability in light intensity of the captured images, we applied the Contrast Limited Adaptive Histogram Equalization (CLAHE) technique to enhance image quality, ensuring a more accurate analysis. This preprocessing step enabled the:</p>
<list list-type="bullet">
<list-item><p>Improvement of image contrast</p></list-item>
<list-item><p>Reduction of noise</p></list-item>
</list>
<p>ultimately leading to more reliable results.</p>
</sec>
<sec id="s2">
<title>2 Contribution of the work</title>
<p>In this study, we leveraged publicly accessible datasets of X-ray images as the primary experimental medium. The architecture of our experiment is designed as an end-to-end system, eliminating the need for manual feature extraction or selection, thereby streamlining the process for enhanced efficiency and effectiveness. We employed the Grad-CAM technique in conjunction with a Convolutional Neural Network (CNN) model to improve the diagnostic accuracy of our system. The integration of Grad-CAM enables the visualization of feature importance, allowing us to identify the most relevant regions in the X-ray images for diagnostic decision-making. By utilizing this approach, we aimed to develop a robust and accurate diagnostic system, capable of automatically detecting, and localizing abnormalities in X-ray images, thereby assisting clinicians in making informed decisions.</p>
<sec>
<title>2.1 Methodological framework</title>
<p>The core of our methodological approach integrates the Gradient-weighted Class Activation Mapping (Grad-CAM) technique with Convolutional Neural Network (CNN) architectures, aiming to refine diagnostic precision through the following formulation:</p>
<disp-formula id="E1"><label>(1)</label><mml:math id="M1"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mrow><mml:mi>G</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>d</mml:mi><mml:mo>-</mml:mo><mml:mi>C</mml:mi><mml:mi>A</mml:mi><mml:mi>M</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>L</mml:mi><mml:mi>U</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:msubsup><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msubsup><mml:msup><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>L</italic><sub>Grad &#x02212; CAM</sub> represents the localization map highlighting regions of interest, <inline-formula><mml:math id="M2"><mml:msubsup><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> denotes the weights for the <italic>k</italic>-th feature map <italic>A</italic><sup><italic>k</italic></sup> contributing to a target class <italic>c</italic>, and ReLU ensures the activation map focuses on features positively influencing the class prediction.</p>
<p>For classification purposes, we employed a selection of pre-trained models:</p>
<list list-type="bullet">
<list-item><p>ResNet34 and ResNet50 and</p></list-item>
<list-item><p>EfficientNet-B4 and EfficientNet-B5,</p></list-item>
</list>
<p>fine-tuned to adapt their learned representations to our specific task. This process exploits the models&#x00027; preexisting knowledge, significantly economizing on computational resources and training time.</p>
<p>The Grad-CAM heatmap is computed using the equation: L Grad-CAM = ReLU (&#x02211; k <inline-formula><mml:math id="M3"><mml:msubsup><mml:mrow><mml:mtext>a</mml:mtext></mml:mrow><mml:mrow><mml:mtext>k</mml:mtext></mml:mrow><mml:mrow><mml:mtext>e</mml:mtext></mml:mrow></mml:msubsup></mml:math></inline-formula> A<sup>k</sup>). This equation is crucial in visualizing the regions of the input image that significantly influence the model&#x00027;s decision-making process. In our study, we utilize the feature maps A k from the last convolutional layer of the CNN and importance weights a k e derived from the gradients of the predicted class score with respect to A<sup>k</sup>. These weights represent the contribution of each feature map to the prediction. The ReLU function ensures that only positive contributions are considered, highlighting the most relevant regions for class prediction. This equation enables the generation of Grad-CAM heatmaps, which are overlaid on original chest X-rays to help clinicians identify critical regions that influence the model&#x00027;s classification.</p>
</sec>
<sec>
<title>2.2 Analytical approach</title>
<p>Our investigation scrutinizes the influence of the characteristics of the data set and image processing techniques on the precision of disease detection. This entails a dual analysis approach:</p>
<sec>
<title>2.2.1 Dataset analysis</title>
<disp-formula id="E2"><label>(2)</label><mml:math id="M4"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mo>&#x00394;</mml:mo></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>c</mml:mi><mml:mi>u</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>y</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>D</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:msub><mml:mrow><mml:mtext>&#x000A0;</mml:mtext><mml:mi>I</mml:mi><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>g</mml:mi><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>h</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi><mml:mi>e</mml:mi><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where &#x00394; accuracy measures the change in diagnostic accuracy as a function of dataset balance and image enhancement techniques.</p>
</sec>
<sec>
<title>2.2.2 Image enhancement</title>
<p>The employment of image enhancement, particularly for X-ray and CT-Scan images, was operationalized through the application of Contrast Limited Adaptive Histogram Equalization (CLAHE), aiming to ameliorate image quality for more accurate diagnostic interpretation.</p>
</sec>
<sec>
<title>2.2.3 Justification of using these models</title>
<p>When selecting models for this research, several factors were considered to ensure the best choices for the task at hand. First, computational feasibility was evaluated. While models like DenseNet and Inception are powerful, they require significant computational resources without offering substantial accuracy improvements over other options. So, more efficient architectures that strike a balance between performance and computational demands were selected. The selected models are known for their ability to adapt well to different datasets, which is crucial given the variations in chest X-ray imaging conditions. By considering these factors, the research offers the best combination of accuracy, efficiency, and generalizability.</p>
</sec>
</sec>
<sec>
<title>2.3 Hyperparameter tuning and sensitivity analysis</title>
<sec>
<title>2.3.1 Sensitivity analysis</title>
<p>To evaluate the model&#x00027;s robustness, sensitivity analyses were performed on key hyperparameters.</p>
</sec>
<sec>
<title>2.3.2 Learning rate sensitivity</title>
<p>A significant performance drop was observed when the learning rate deviated from the optimal value of 10<sup>&#x02212;3</sup>, highlighting its crucial role in convergence. To address this, a learning rate scheduler was employed to dynamically adjust the learning rate upon validation loss plateauing.</p>
</sec>
<sec>
<title>2.3.3 Batch size sensitivity</title>
<p>Increasing the batch size to 64 negatively impacted performance due to poorer gradient estimates on limited GPU memory. Conversely, smaller batch sizes increased training time.</p>
</sec>
<sec>
<title>2.3.4 Dropout rate sensitivity</title>
<p>Dropout rates below 0.2 led to overfitting, characterized by high training accuracy but low validation accuracy. In contrast, dropout rates above 0.5 hindered the learning process.</p>
</sec>
<sec>
<title>2.3.5 Optimizer sensitivity</title>
<p>The Adam optimizer demonstrated robustness for the dataset used in this study, showing less sensitivity to small learning rate changes compared to SGD.</p>
</sec>
</sec>
</sec>
<sec id="s3">
<title>3 Literature survey</title>
<p>The collaborative efforts of Eduardo A. Soares, Plamen P. Angelov, and Sarah Biaso have culminated in a significant contribution to the field of medical imaging for infectious diseases. They have developed and made publicly available a comprehensive dataset of CT scans specific to SARS-CoV-2, enabling the advancement of diagnostic capabilities.</p>
<p>The team crafted an innovative algorithm, meticulously training it on CT scans from both:</p>
<list list-type="bullet">
<list-item><p>Confirmed SARS-CoV-2 positive patients.</p></list-item>
<list-item><p>Patients without the infection.</p></list-item>
</list>
<p>This training phase was followed by a rigorous testing phase, where the algorithm&#x00027;s efficacy was validated on a distinct dataset encompassing CT scans from individuals with and without SARS-CoV-2 infection.</p>
<p>The outcomes of this testing phase demonstrated the algorithm&#x00027;s precision in accurately detecting SARS-CoV-2 infection, highlighting its potential utility in enhancing diagnostic processes within clinical settings. This contribution has the potential to significantly impact the field of medical imaging for infectious diseases, improving patient outcomes and streamlining diagnostic procedures (Soares et al., <xref ref-type="bibr" rid="B27">2020</xref>). In a groundbreaking study, Sara Haseli and Nastaran Khalili have significantly advanced our understanding of COVID-19 pneumonia through comprehensive radiological analysis of chest CT imaging. Their research elucidated distinct hallmark features characteristic of the condition, including:</p>
<list list-type="bullet">
<list-item><p>Bilateral ground-glass opacities.</p></list-item>
<list-item><p>Consolidation.</p></list-item>
<list-item><p>Interlobular septal thickening.</p></list-item>
</list>
<p>These findings align with established diagnostic criteria for COVID-19 pneumonia, providing critical insights into the disease&#x00027;s pulmonary manifestations.</p>
<p>Furthermore, their investigation revealed additional complications in a subset of patients, including:</p>
<list list-type="bullet">
<list-item><p>Pleural effusions.</p></list-item>
<list-item><p>Lymphadenopathy.</p></list-item>
<list-item><p>Pulmonary embolism.</p></list-item>
</list>
<p>These findings broaden our understanding of the disease&#x00027;s impact on pulmonary structures, underscoring the importance of vigilant radiological monitoring and timely diagnosis (Haseli et al., <xref ref-type="bibr" rid="B9">2020</xref>). In terms of pulmonary involvement, the posterior segment of the left lower lobe (LLL) was identified as the most frequently affected segment, exhibiting a high propensity for involvement. Additionally, significant instances of involvement were observed in the right middle lobe (RML) and the right lower lobe (RLL), suggesting a bilateral distribution of pulmonary affliction. Upon analyzing the data based on lobar distribution, the LLL exhibited the highest frequency of affliction, with the RLL and RML closely following in prevalence, indicating a relatively even distribution of pulmonary involvement across the lobes (Li et al., <xref ref-type="bibr" rid="B16">2020a</xref>).</p>
<p>Upon examining the demographics of age and gender in relation to chest CT imaging outcomes, a notable pattern emerged, suggesting a gender-specific predilection for lobar involvement. Male patients exhibited a significant propensity for left lower lobe (LLL) involvement, whereas female patients demonstrated a tendency toward more frequent involvement of the right lower lobe (RLL). Further analysis revealed a distinct age-related pattern, with the left lower lobe (LLL) being predominantly affected in the older population (65 years). In contrast, the right lower lobe (RLL) showed a higher incidence of involvement among younger individuals (&#x0003C;65 years). These findings suggest that age and gender may play a role in determining the lobar distribution of pulmonary involvement (Narin et al., <xref ref-type="bibr" rid="B18">2021</xref>).</p>
<p>Wang et al. developed a cutting-edge algorithm for the detection of COVID-19 pneumonia via chest CT image analysis, leveraging an adapted Inception transfer-learning framework. The algorithm&#x00027;s performance was rigorously validated through both internal and external validation processes, demonstrating its efficacy in identifying COVID-19 pneumonia with a high degree of accuracy (Islam et al., <xref ref-type="bibr" rid="B12">2020</xref>).</p>
<p>In their investigative study, Ali Narin and Ceren Kaya put forward three models grounded in convolutional neural network technologya&#x002C6; C&#x0201D;ResNet50, InceptionV3, and InceptionResNetV2a&#x002C6; C&#x0201D;for the purpose of identifying coronavirus pneumonia from chest X-ray images. These models underwent rigorous evaluation on a dataset that included both confirmed COVID-19 cases and cases of conventional viral pneumonia, demonstrating the application of advanced deep learning techniques in the differentiation and diagnosis of respiratory illnesses (Saha et al., <xref ref-type="bibr" rid="B22">2020</xref>; Wang et al., <xref ref-type="bibr" rid="B30">2020</xref>; Song et al., <xref ref-type="bibr" rid="B28">2021</xref>). In a pioneering approach, the developed system leveraged a hybrid architecture, synergistically integrating a Long Short-Term Memory (LSTM) classifier with a Convolutional Neural Network (CNN) dedicated to feature extraction and selection. The system&#x00027;s performance was rigorously evaluated using a dataset comprising 421 cases, including 141 instances with features indicative of COVID-19. Following the completion of its training phase, the model demonstrated exceptional performance capabilities, adeptly categorizing images as either COVID-19 positive or negative. The evaluation of the model&#x00027;s efficacy involved the application of 10-fold cross-validation, yielding an impressive accuracy rate of 97.3%. This remarkable achievement underscores the system&#x00027;s potential in medical imaging analysis, showcasing its ability to accurately classify images and support diagnostic decision-making (Mohammad and Abolfazl, <xref ref-type="bibr" rid="B17">2020</xref>; Islam et al., <xref ref-type="bibr" rid="B12">2020</xref>; Alharbi et al., <xref ref-type="bibr" rid="B2">2022</xref>). Extensive validation was conducted on an independent dataset of chest X-ray images, where the proposed model achieved an exceptional accuracy of 97.7%. This remarkable performance demonstrates the model&#x00027;s precision in distinguishing COVID-19 cases, showcasing its potential in medical imaging analysis. The utilized dataset comprised 88 confirmed instances of COVID-19, 101 cases of bacterial pneumonia, and 86 instances classified as normal based on CT scan analyses. Comparative assessments were conducted to evaluate the model&#x00027;s performance relative to traditional frameworks, including Res-Net, Dense-Net, and VGG16. The results underscore the proposed model&#x00027;s enhanced performance, demonstrating its effectiveness through rigorous analysis (Singh et al., <xref ref-type="bibr" rid="B26">2022</xref>; Alharbi et al., <xref ref-type="bibr" rid="B3">2022a</xref>,<xref ref-type="bibr" rid="B4">b</xref>).</p>
<sec>
<title>3.1 Multi-modal bone suppression, lung segmentation, and classification approach</title>
<p>This study combines bone suppression and lung segmentation with multi-modal classification to improve COVID-19 detection accuracy. By isolating lung regions, the model reduces noise from surrounding structures, enhancing diagnostic performance. This approach refines lung images, contributing to more accurate identification of COVID-19 in chest X-rays (Li et al., <xref ref-type="bibr" rid="B15">2020b</xref>; Shi et al., <xref ref-type="bibr" rid="B24">2021</xref>).</p>
</sec>
<sec>
<title>3.2 Comparative study of linear type multiple instance learning techniques</title>
<p>This comparative study investigates various linear multiple instance learning (MIL) models applied to COVID-19 detection in chest X-rays. Analyzing how linear MIL models handle weakly labeled data, the study identifies effective techniques for classifying X-ray images without extensive manual annotations. The findings reveal that certain MIL techniques can efficiently pinpoint COVID-19 indicators, making them suitable for large-scale screenings (Ching et al., <xref ref-type="bibr" rid="B6">2018</xref>; Wang et al., <xref ref-type="bibr" rid="B31">2017</xref>).</p>
</sec>
<sec>
<title>3.3 Convolutional neural network techniques</title>
<p>CNN-based methods have shown significant promise in detecting COVID-19 from X-ray images. This research explores CNN architectures designed for medical image analysis, fine-tuned to identify COVID-19&#x02032;s unique patterns in X-rays. Using convolutional layers that capture spatial features, CNNs offer high sensitivity in recognizing infection signs, allowing for robust classification (Wang and Lin, <xref ref-type="bibr" rid="B29">2020</xref>; Apostolopoulos and Mpesiana, <xref ref-type="bibr" rid="B5">2020</xref>).</p>
</sec>
<sec>
<title>3.4 Machine learning techniques</title>
<p>Beyond deep learning, various machine learning algorithms have been applied to COVID-19 classification in chest X-rays. These techniques include support vector machines (SVMs), decision trees, and ensemble methods. The study highlights the effectiveness of these algorithms in scenarios with limited data, where traditional machine learning methods can outperform deep learning models by leveraging feature extraction and selection methods. These studies demonstrate the potential of AI techniques in enhancing COVID-19 detection accuracy using chest X-rays. Each approach offers unique strengths, and their combination could lead to more effective diagnosis and treatment (Ozturk et al., <xref ref-type="bibr" rid="B20">2020</xref>; Hemdan et al., <xref ref-type="bibr" rid="B11">2020</xref>).</p>
</sec>
</sec>
<sec id="s4">
<title>4 Materials and methods</title>
<sec>
<title>4.1 Dataset description</title>
<p>While our dataset provides a substantial amount of data for training and evaluating our model, it&#x00027;s important to acknowledge its limitations. The dataset may not fully capture the diversity of COVID-19 cases seen across different populations and imaging conditions, which could impact the generalizability of our model&#x00027;s predictions in real-world clinical settings. For instance, variations in demographics, geographic regions, and imaging equipment could affect the robustness of our model, particularly when applied to data from populations or conditions not represented in the training dataset. To address this limitation, future work should consider incorporating datasets from a broader range of demographics and imaging environments. This would enhance our model&#x00027;s adaptability and effectiveness in diverse healthcare contexts, ensuring that it can provide accurate predictions for a wide range of patients and scenarios. By expanding our dataset in this way, we can increase the confidence in our model&#x00027;s performance and its potential to improve patient outcomes in real-world clinical settings.</p>
<p>The diagnosis of COVID-19 in this study is conducted through the analysis of pulmonary (chest) X-ray images. The dataset is categorized into three primary classes:</p>
<list list-type="bullet">
<list-item><p>COVID-19.</p></list-item>
<list-item><p>Normal.</p></list-item>
<list-item><p>Pneumonia.</p></list-item>
</list>
<p>Sourced from the COVID-19 Radiography Database available on platforms such as Kaggle and Mendeley data, the dataset encompasses a total of 9,300 images, distributed across the classes as follows:</p>
<list list-type="simple">
<list-item><p>COVID-19: 800 images.</p></list-item>
<list-item><p>Normal: 2500 images.</p></list-item>
<list-item><p>Pneumonia: 5,000 images.</p></list-item>
</list>
</sec>
</sec>
<sec id="s5">
<title>5 Dataset allocation</title>
<p>The allocation of the dataset for various purposes is segmented as follows:</p>
<list list-type="bullet">
<list-item><p>Evaluation: 30% of the total dataset.</p></list-item>
<list-item><p>Training and Validation: 70% of the total dataset.</p></list-item>
</list>
<p>Furthermore, within the Training &#x0002B; Validation subset, the images are divided into:</p>
<list list-type="simple">
<list-item><p>Training: 70%</p></list-item>
<list-item><p>Validation: 30%</p></list-item>
</list>
</sec>
<sec id="s6">
<title>6 Visualization</title>
<p>The distribution and representation of all three subsets (COVID-19, Normal, and Pneumonia) are illustrated in <xref ref-type="fig" rid="F2">Figures 2</xref>, <xref ref-type="fig" rid="F3">3</xref>.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>Chest X-ray COVID-19 image samples evaluated using the Kaggle database.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-07-1489020-g0002.tif"/>
</fig>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>Chest X-ray Pneumonia image samples evaluated using the Kaggle database.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-07-1489020-g0003.tif"/>
</fig>
<sec>
<title>6.1 Convolutional layers</title>
<p>Convolutional layers are a fundamental component of Convolutional Neural Networks (CNNs), primarily due to their ability to utilize learnable kernels that perform convolutions across the spatial dimensions of the input data. The convolution operation can be mathematically expressed as:</p>
<disp-formula id="E3"><label>(3)</label><mml:math id="M5"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>f</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mo>*</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>s</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>q</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:mi>&#x0221E;</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mi>&#x0221E;</mml:mi></mml:mrow></mml:msubsup></mml:mstyle><mml:mi>f</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>.</mml:mo><mml:mi>s</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>m</mml:mi><mml:mo>-</mml:mo><mml:mi>q</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>f</italic>[<italic>m</italic>] denotes the input function, <italic>s</italic>[<italic>m</italic> &#x02212; <italic>q</italic>] represents the shifting function, and (<italic>f</italic><sup>&#x0002A;</sup><italic>s</italic>)[<italic>q</italic>] corresponds to the output of the convolution operation, resulting in the generation of feature maps that capture spatial hierarchies in the data.</p>
</sec>
<sec>
<title>6.2 Fully connected and classification layers</title>
<p>While convolutional layers are responsible for extracting hierarchical features, fully connected layers serve as classifiers, mapping the learned features to output classes through matrix multiplication. These layers interpret the high-level features extracted by the convolutional layers, with the aim of accurately classifying the input data.</p>
</sec>
<sec>
<title>6.3 Pooling layers</title>
<p>Pooling layers are designed to reduce the spatial dimensions of the input data, thereby condensing the information and retaining the most salient features. A common pooling operation is max pooling, mathematically represented as:</p>
<disp-formula id="E4"><label>(4)</label><mml:math id="M6"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>O</mml:mi><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mo class="qopname">max</mml:mo><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mtext>&#x000A0;&#x000A0;&#x000A0;</mml:mtext><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>j</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mo>&#x02208;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>R</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>O</italic> is the output of the pooling operation over a region R, and <italic>x</italic><sub><italic>i,j</italic></sub> are the input features within the pooling window. This operation effectively down samples the input while preserving the most significant activations.</p>
</sec>
<sec>
<title>6.4 Relationship between input and output feature map sizes</title>
<p>The dimensions of the output feature map are determined by the stride and filter size used during the convolutional operation, as described by the following equation:</p>
<disp-formula id="E5"><label>(5)</label><mml:math id="M7"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>O</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi><mml:mi>p</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi><mml:mtext>_</mml:mtext><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>z</mml:mi><mml:mi>e</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x0002B;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mfrac><mml:mrow><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>p</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi><mml:mtext>_</mml:mtext><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>z</mml:mi><mml:mi>e</mml:mi><mml:mo>-</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>F</mml:mi><mml:mi>i</mml:mi><mml:mi>l</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mtext>_</mml:mtext><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>z</mml:mi><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>S</mml:mi><mml:mi>t</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>d</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>The equation Output_size = 1 &#x0002B; (Input_size &#x02013; Filter_size)/Stride is a fundamental concept in convolutional operations, determining the dimensions of the output feature map after applying a convolution operation to an input image. In our study, this equation plays a crucial role in designing and understanding the architecture of CNN models. The input size, filter size, and stride are critical parameters that affect the dimensionality reduction and feature extraction capabilities of the network. By using this equation, we can ensure that the network architecture is compatible with the input image dimensions and optimize computational efficiency. This equation is essential for understanding how CNN models process visual data and make predictions, as illustrated in <xref ref-type="fig" rid="F4">Figure 4</xref>.</p>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p>Illustration of the dimensionality ratio calculated by dividing the output feature map size by the input feature map size.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-07-1489020-g0004.tif"/>
</fig>
</sec>
<sec>
<title>6.5 ResNet architecture</title>
<p>Residual Networks (ResNets) introduce an innovative architecture in deep learning designed to effectively mitigate the vanishing gradient problem, a significant challenge in training deep neural networks. As the network depth increases, the gradients often become exceedingly small, rendering weight updates ineffective and hindering the network&#x00027;s ability to learn and converge. ResNets address this issue through the use of skip connections, a key architectural feature that facilitates better gradient flow.</p>
</sec>
<sec>
<title>6.6 Residual learning framework</title>
<p>The skip connections, also known as residual connections, provide an alternative pathway for gradients to propagate, thereby bypassing one or more layers. The fundamental concept of ResNets can be mathematically expressed as:</p>
<disp-formula id="E6"><label>(6)</label><mml:math id="M8"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mi>F</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mi>x</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>In this equation, <italic>x</italic> represents the input to a layer, <italic>F</italic>(<italic>x</italic>, {<italic>W</italic><sub><italic>i</italic></sub>}) denotes the residual function to be learned by the network, and <italic>y</italic> is the output of the layer. The inclusion of the term <italic>x</italic> allows the network to skip certain layers, ensuring that the gradient can be propagated directly back through the network without significant reduction in magnitude. This mechanism is crucial for maintaining the effectiveness of gradient-based learning in deep networks, as depicted in <xref ref-type="fig" rid="F5">Figure 5</xref>.</p>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p>Residual learning framework.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-07-1489020-g0005.tif"/>
</fig>
</sec>
<sec>
<title>6.7 Impact on deep learning</title>
<p>The introduction of ResNets has significantly impacted various domains within deep learning, including but not limited to image recognition, object detection, and natural language processing. Furthermore, their application extends to medical imaging, where they facilitate tasks such as tumor detection and segmentation.</p>
</sec>
<sec>
<title>6.8 EfficientNet architecture</title>
<p>The EfficientNet architecture introduces a holistic optimization strategy that synergistically combines advanced convolutional techniques with squeeze-and-excitation modules. Its primary objective is to enhance model efficiency and accuracy without incurring a proportional increase in computational demands, thereby achieving a optimal tradeoff between performance and computational resources.</p>
</sec>
<sec>
<title>6.9 Compound scaling method</title>
<p>The core of Efficient Net&#x00027;s design philosophy lies in the compound scaling method which achieves a balanced scaling of the network&#x00027;s dimensions depth, width, and resolution. This approach is mathematically formalized as:</p>
<disp-formula id="E7"><label>(7)</label><mml:math id="M9"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtext>depth</mml:mtext><mml:mo>:</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>d</mml:mi><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mi>&#x003B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003C6;</mml:mi></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:mtext>&#x000A0;width</mml:mtext><mml:mo>:</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>w</mml:mi><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mi>&#x003B2;</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003C6;</mml:mi></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:mtext>&#x000A0;resolution</mml:mtext><mml:mo>:</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>r</mml:mi><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mi>&#x003B3;</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003C6;</mml:mi></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:mtext>&#x000A0;subject&#x000A0;to</mml:mtext></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mi>&#x003B1;</mml:mi><mml:mo>&#x000B7;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x003B2;</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>&#x000B7;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x003B3;</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>&#x02248;</mml:mo><mml:mn>2</mml:mn><mml:mtext>&#x000A0;and&#x000A0;</mml:mtext><mml:mi>&#x003B1;</mml:mi><mml:mo>&#x000B7;</mml:mo><mml:mi>&#x003B2;</mml:mi><mml:mo>&#x000B7;</mml:mo><mml:mi>&#x003B3;</mml:mi><mml:mo>&#x02248;</mml:mo><mml:mn>1</mml:mn></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where &#x003C6; is a user-defined coefficient that determines the scaling of the model based on the available computational resources. The constants &#x003B1;, &#x003B2;, and &#x003B3; defines the specific scaling factors for each dimensions depth, width, and resolution, respectively. The constraints ensure a balanced scaling across these dimensions, optimizing the model&#x00027;s performance while maintaining computational efficiency.</p>
<p>The compound scaling formula is a crucial component of EfficientNet, balancing image resolution, network depth, and width scaling factors to optimize model accuracy and efficiency. Unlike traditional scaling methods, compound scaling ensures a proportional and systematic scaling across all three dimensions, resulting in a better-performing model without unnecessary computational costs.</p>
<p>EfficientNet&#x00027;s compound scaling method optimizes the balance between model accuracy and efficiency. The input image resolution is scaled to 224 &#x000D7; 224 pixels to capture finer details without overwhelming computational resources.</p>
</sec>
<sec>
<title>6.10 Efficient convolutional block and MBConv block</title>
<p>To further optimize performance, EfficientNet incorporates an efficient convolutional block alongside the mobile inverted bottleneck (MBConv) block. The MBConv block, a pivotal component, enhances model efficiency through an inverted residual structure shown in <xref ref-type="fig" rid="F6">Figure 6</xref>.</p>
<fig id="F6" position="float">
<label>Figure 6</label>
<caption><p>Efficient convolutional block and MBConv block.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-07-1489020-g0006.tif"/>
</fig>
</sec>
<sec>
<title>6.11 Transformers</title>
<p>The integration of transformer encoders into CNN-based classification algorithms marks a significant advancement in machine learning, enhancing model capabilities by effectively leveraging transfer learning principles as shown in the <xref ref-type="fig" rid="F7">Figure 7</xref>. In the initial training phase, a CNN model is trained on a specific dataset, resulting in a set of learned weights. These weights enable the model to classify features similar to those encountered during training. The process of transfer learning can be mathematically represented as:</p>
<disp-formula id="E8"><label>(8)</label><mml:math id="M11"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>W</mml:mi><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi><mml:mi>e</mml:mi><mml:mi>w</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where W&#x00027; represents the adapted weights post-transfer learning, W denotes the original pre-trained weights, and <italic>D</italic><sub>new</sub> is the new dataset. By incorporating transformer encoders, the model&#x00027;s ability to generalize and apply learned patterns to novel datasets is significantly enhanced. This process can be formalized as:</p>
<disp-formula id="E9"><label>(9)</label><mml:math id="M12"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>E</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>s</mml:mi><mml:mi>f</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>T</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>f</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>E</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>E</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>p</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>E</italic><sub>input</sub> denotes the input embeddings fed into the transformer encoder, and <italic>E</italic><sub>transformed</sub> represents the output embeddings, now enriched with contextual information through the encoder&#x00027;s processing.</p>
<fig id="F7" position="float">
<label>Figure 7</label>
<caption><p>Transfer learning model applied.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-07-1489020-g0007.tif"/>
</fig>
</sec>
</sec>
<sec id="s7">
<title>7 Proposed model of prediction system</title>
<p>The synergistic integration of Convolutional Vision Transformers (CVT) and Convolutional Channel Transformers (CCT) represents a groundbreaking approach in object recognition, harnessing the complementary strengths of Convolutional Neural Networks (CNNs) and transformers to process images with enhanced efficacy. This innovative methodology enables a comprehensive and holistic analysis of images, significantly improving the model&#x00027;s efficiency and deployment capability. The remarkable performance of this approach is particularly evident in the classification of COVID-19 images, as illustrated in <xref ref-type="fig" rid="F8">Figure 8</xref> respectively. By combining the spatial hierarchies of CNNs with the self-attention mechanisms of transformers, CVT and CCT facilitate a more detailed and nuanced understanding of image features, leading to improved recognition accuracy and robustness. This integrated approach demonstrates a significant advancement in computer vision, enabling more effective and efficient image analysis in various applications, including medical imaging and disease.</p>
<fig id="F8" position="float">
<label>Figure 8</label>
<caption><p>Proposed model of prediction system (vision transformer and transformer encoder).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-07-1489020-g0008.tif"/>
</fig>
<sec>
<title>7.1 Categorical cross-entropy loss function</title>
<p>The categorical cross-entropy loss function is used for multi-class classification problems. It measures the divergence between predicted and true class probabilities, penalizing predictions based on their confidence in the correct class. Regularization techniques, such as L2 regularization and dropout regularization, are applied to prevent overfitting and improve generalization. The loss function is directly tied to the SoftMax activation function in the output layer, ensuring predicted probabilities sum to 1 across all classes as shown in <xref ref-type="disp-formula" rid="E10">Equation 10</xref>.</p>
<disp-formula id="E10"><label>(10)</label><mml:math id="M13"><mml:mtable columnalign='left'><mml:mtr><mml:mtd><mml:mtext>L</mml:mtext><mml:mo>&#x0005F;</mml:mo><mml:mtext>Weighted&#x000A0;CCE</mml:mtext><mml:mo>=</mml:mo><mml:mo>&#x02212;</mml:mo><mml:msup><mml:mrow><mml:mo stretchy='false'>(</mml:mo><mml:mn>1</mml:mn><mml:mo>/</mml:mo><mml:mtext>N</mml:mtext><mml:mo stretchy='false'>)</mml:mo></mml:mrow><mml:mo>*</mml:mo></mml:msup><mml:mo>&#x02211;</mml:mo><mml:mo stretchy='false'>(</mml:mo><mml:mtext>i</mml:mtext><mml:mo>=</mml:mo><mml:mtext>1&#x000A0;to&#x000A0;N</mml:mtext><mml:mo stretchy='false'>)</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;</mml:mtext><mml:mo>&#x02211;</mml:mo><mml:mo stretchy='false'>(</mml:mo><mml:mtext>c</mml:mtext><mml:mo>=</mml:mo><mml:mtext>1&#x000A0;to&#x000A0;C</mml:mtext><mml:mo stretchy='false'>)</mml:mo><mml:msup><mml:mtext>w&#x0005F;c</mml:mtext><mml:mo>*</mml:mo></mml:msup><mml:mtext>y&#x0005F;i</mml:mtext><mml:mo>,</mml:mo><mml:msup><mml:mtext>c</mml:mtext><mml:mo>*</mml:mo></mml:msup><mml:msub><mml:mtext>log</mml:mtext><mml:mrow><mml:mo stretchy='false'>(</mml:mo><mml:mtext>y</mml:mtext><mml:mo>&#x0005F;</mml:mo><mml:mtext>i</mml:mtext><mml:mo>,</mml:mo><mml:mtext>c</mml:mtext><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:msub><mml:mo>&#x000A0;</mml:mo><mml:mo>&#x000A0;</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
</sec>
</sec>
<sec id="s8">
<title>8 Performance metrics</title>
<p>The metrics are defined as follows, where TP represents true positives, TN denotes true negatives, FP stands for false positives, and FN signifies false negatives:</p>
<sec>
<title>8.1 Accuracy</title>
<p>Accuracy measures the proportion of true results (both true positives and true negatives) in the total number of cases examined.</p>
<disp-formula id="E11"><label>(11)</label><mml:math id="M15"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>A</mml:mi><mml:mi>c</mml:mi><mml:mi>c</mml:mi><mml:mi>u</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
</sec>
<sec>
<title>8.2 Recall</title>
<p>Recall or Sensitivity, quantifies the proportion of actual positives correctly identified.</p>
<disp-formula id="E12"><label>(12)</label><mml:math id="M16"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi><mml:mo>=</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
</sec>
<sec>
<title>8.3 Specificity</title>
<p>Specificity measures the proportion of actual negatives correctly identified.</p>
<disp-formula id="E13"><label>(13)</label><mml:math id="M17"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>S</mml:mi><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi><mml:mi>i</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>N</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
</sec>
<sec>
<title>8.4 Precision</title>
<p>Precision assesses the proportion of positive identifications that were actually correct.</p>
<disp-formula id="E14"><label>(14)</label><mml:math id="M18"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
</sec>
<sec>
<title>8.5 F1-score</title>
<p>The F1-Score is the harmonic mean of Precision and Recall, providing a balance between the two.</p>
<disp-formula id="E15"><label>(15)</label><mml:math id="M19"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>F</mml:mi><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:mi>S</mml:mi><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn><mml:mo>&#x000D7;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mi>P</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mo>&#x000D7;</mml:mo><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mi>P</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:mfrac></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
</sec>
</sec>
<sec id="s9">
<title>9 Experimental procedure</title>
<p>The primary objective of this experiment was to evaluate the performance of classification models on a dataset of X-ray images, encompassing categories such as standard, lung opacity, pneumonia, and COVID-19 cases. To ensure optimal model training and evaluation, data enhancement and balancing techniques were applied, as illustrated in <xref ref-type="table" rid="T1">Tables 1</xref>, <xref ref-type="table" rid="T2">2</xref>.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Original dataset distribution.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Category</bold></th>
<th valign="top" align="center"><bold>Subcategory</bold></th>
<th valign="top" align="center"><bold>Training</bold></th>
<th valign="top" align="center"><bold>Testing</bold></th>
<th valign="top" align="center"><bold>Validation</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" rowspan="3">X-ray</td>
<td valign="top" align="center">COVID-19</td>
<td valign="top" align="center">700</td>
<td valign="top" align="center">300</td>
<td valign="top" align="center">100</td>
</tr>
 <tr>
<td valign="top" align="center">Normal</td>
<td valign="top" align="center">4,000</td>
<td valign="top" align="center">1,000</td>
<td valign="top" align="center">750</td>
</tr>
 <tr>
<td valign="top" align="center">Viral</td>
<td valign="top" align="center">2,500</td>
<td valign="top" align="center">800</td>
<td valign="top" align="center">400</td>
</tr></tbody>
</table>
</table-wrap>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Balanced dataset distribution.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Category</bold></th>
<th valign="top" align="center"><bold>Subcategory</bold></th>
<th valign="top" align="center"><bold>Training</bold></th>
<th valign="top" align="center"><bold>Testing</bold></th>
<th valign="top" align="center"><bold>Validation</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" rowspan="3">X-ray</td>
<td valign="top" align="center">COVID-19</td>
<td valign="top" align="center">700</td>
<td valign="top" align="center">300</td>
<td valign="top" align="center">150</td>
</tr>
 <tr>
<td valign="top" align="center">Normal</td>
<td valign="top" align="center">1,500</td>
<td valign="top" align="center">300</td>
<td valign="top" align="center">125</td>
</tr>
 <tr>
<td valign="top" align="center">Viral</td>
<td valign="top" align="center">1,700</td>
<td valign="top" align="center">300</td>
<td valign="top" align="center">135</td>
</tr></tbody>
</table>
</table-wrap>
<p>To ensure fair and reliable model predictions, techniques were applied to address class imbalance in the dataset, which could otherwise bias the model toward overrepresented classes. Methods such as class weighting and oversampling were employed to balance the distribution among the COVID-19, pneumonia, and normal classes. Class weighting adjusted the loss function to give higher importance to minority classes, while oversampling involved duplicating samples from underrepresented classes to create a more balanced dataset. These approaches aimed to reduce bias and improve the model&#x00027;s ability to accurately classify images across all categories, enhancing its reliability and robustness in clinical applications.</p>
<sec>
<title>9.1 Data preprocessing</title>
<p>The first step in the experimental process involved the enhancement of X-ray images using the Contrast Limited Adaptive Histogram Equalization (CLAHE) technique, mathematically represented as:</p>
<disp-formula id="E16"><label>(16)</label><mml:math id="M20"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>I</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>h</mml:mi><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>C</mml:mi><mml:mi>L</mml:mi><mml:mi>A</mml:mi><mml:mi>H</mml:mi><mml:mi>E</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>I</mml:mi></mml:mrow><mml:mrow><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>g</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>I</italic><sub>original</sub> is the original X-ray image, and <italic>I</italic><sub>enhanced</sub> is the result after applying CLAHE.</p>
<sec>
<title>9.1.1 Image resizing</title>
<p>The images were resized to a uniform resolution of 224 &#x000D7; 224 pixels. This standardization is critical for compatibility with the input size requirements of the employed neural network architectures (e.g., EfficientNet and ResNet).</p>
</sec>
<sec>
<title>9.1.2 Data augmentation</title>
<p>To increase the variability of the dataset and reduce overfitting, the following augmentation techniques were applied during training:</p>
</sec>
<sec>
<title>9.1.3 Random rotations</title>
<p>Introduced angular variations to simulate different orientations.</p>
</sec>
<sec>
<title>9.1.4 Horizontal and vertical flipping</title>
<p>Created mirror-like reflections to enhance diversity.</p>
</sec>
<sec>
<title>9.1.5 Random cropping and zooming</title>
<p>Enabled the model to focus on varying regions of the image.</p>
</sec>
<sec>
<title>9.1.6 Brightness adjustments</title>
<p>Improved robustness by simulating different lighting conditions.</p>
</sec>
<sec>
<title>9.1.7 Noise reduction</title>
<p>Basic noise reduction filters were applied to remove potential artifacts in the X-ray images, which could otherwise interfere with the feature extraction process.</p>
</sec>
<sec>
<title>9.1.8 Histogram equalization (complementary to CLAHE)</title>
<p>While CLAHE specifically focuses on improving local contrast, global histogram equalization was also used as an optional step during exploratory stages to further analyze its impact on image clarity.</p>
</sec>
</sec>
<sec>
<title>9.2 Dataset partitioning</title>
<p>The dataset was systematically divided into three distinct subsets to facilitate the classification task: training, testing, and validation. The data distribution was as follows:</p>
<list list-type="simple">
<list-item><p>Training Set: 70%.</p></list-item>
<list-item><p>Testing Set: 20%.</p></list-item>
<list-item><p>Validation Set: 10%.</p></list-item>
</list>
</sec>
<sec>
<title>9.3 Balanced dataset condition</title>
<p>To ensure fairness in model evaluation, the dataset was balanced, equalizing the number of images across classes. This was quantitatively managed as:</p>
<disp-formula id="E17"><label>(17)</label><mml:math id="M21"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>s</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>C</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mo>&#x02200;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>c</mml:mi><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>N</italic><sub>class</sub> denotes the number of images in each class.</p>
</sec>
<sec>
<title>9.4 Model training</title>
<p>Four models were trained using distinct versions of the dataset:</p>
<list list-type="bullet">
<list-item><p>Original dataset.</p></list-item>
<list-item><p>Balanced dataset.</p></list-item>
<list-item><p>Original dataset with CLAHE.</p></list-item>
<list-item><p>Balanced dataset with CLAHE.</p></list-item>
</list>
<sec>
<title>9.4.1 Original dataset</title>
<p>Category subcategory training testing validation</p>
<list list-type="bullet">
<list-item><p>X-ray COVID-19 700 300 100.</p></list-item>
<list-item><p>Normal 4000 1000 750.</p></list-item>
<list-item><p>Viral 2500 800 400.</p></list-item>
</list>
</sec>
<sec>
<title>9.4.2 Balanced dataset</title>
<p>Category subcategory training testing validation</p>
<list list-type="bullet">
<list-item><p>X-ray COVID-19 700 300 150.</p></list-item>
<list-item><p>Normal 1500 300 125.</p></list-item>
<list-item><p>Viral 1700 300 135.</p></list-item>
</list>
</sec>
</sec>
</sec>
<sec id="s10">
<title>10 Pre-processing</title>
<p>The initial phase of our image data pre-processing involved two critical steps: image enhancement using CLAHE and subsequent data augmentation. This comprehensive approach was designed to improve the quality and variability of the dataset, thereby aiding in the robustness of the subsequent classification models. To boost the quality of input images and enhance model accuracy, a technique called Contrast Limited Adaptive Histogram Equalization (CLAHE) was used to refine each image. This step improves contrast and highlights important features within X-ray images, leading to more accurate predictions. Additionally, image resolution standardization was performed to ensure consistent image sizes, making the model adaptable to various image sources and minimizing potential variability from different imaging devices. These preprocessing steps lead to a more robust model that can generalize across diverse imaging conditions, ensuring reliable performance in real-world applications.</p>
<sec>
<title>10.1 Contrast enhancement with CLAHE</title>
<p>Contrast Limited Adaptive Histogram Equalization (CLAHE) was employed to enhance the visual clarity of the images. This technique is mathematically represented as:</p>
<disp-formula id="E18"><label>(18)</label><mml:math id="M22"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>I</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>h</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi><mml:mo>=</mml:mo><mml:mi>C</mml:mi><mml:mi>L</mml:mi><mml:mi>A</mml:mi><mml:mi>H</mml:mi><mml:mi>E</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>I</mml:mi></mml:mrow><mml:mrow><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>g</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mtext>&#x000A0;</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>I</italic><sub>original</sub> denotes the original image, and <italic>I</italic><sub>enhanced</sub> represents the image after contrast enhancement.</p>
<p><xref ref-type="fig" rid="F9">Figure 9</xref> illustrates the effect of CLAHE on an example image from the dataset. Following the enhancement, data augmentation techniques were applied to increase the diversity of the dataset, crucial for training more generalized models. The augmentation process involved transformations such as rotations, translations, and flipping.</p>
<fig id="F9" position="float">
<label>Figure 9</label>
<caption><p>Comparison of original and CLAHE-enhanced images.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-07-1489020-g0009.tif"/>
</fig>
</sec>
<sec>
<title>10.2 Data augmentation</title>
<p>Subsequent to the enhancement, data augmentation techniques were employed to increase the diversity of the dataset, a crucial step in training more generalized models. The augmentation process included a range of transformations, such as:</p>
<list list-type="bullet">
<list-item><p><bold>Rotations:</bold> Random angular transformations to simulate varying orientations.</p></list-item>
<list-item><p><bold>Translations:</bold> Random spatial transformations to simulate different positions.</p></list-item>
<list-item><p><bold>Flipping:</bold> Horizontal and vertical flipping to simulate mirror-like reflections.</p></list-item>
</list>
<p>These transformations enabled the generation of a more comprehensive and diverse dataset, thereby enhancing the model&#x00027;s ability to generalize across various scenarios and improving its robustness in <xref ref-type="fig" rid="F10">Figure 10</xref>.</p>
<fig id="F10" position="float">
<label>Figure 10</label>
<caption><p>Samples of images after data augmentation.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-07-1489020-g0010.tif"/>
</fig>
</sec>
</sec>
<sec id="s11">
<title>11 Classification process</title>
<p>The datasets were subjected to preprocessing using Contrast Limited Adaptive Histogram Equalization (CLAHE), followed by division into two distinct sets: the original set and the balanced set, with and without additional enhancement. A comprehensive evaluation of various models was conducted on these datasets, assessing their performance based on accuracy and loss metrics across three phases:</p>
<list list-type="bullet">
<list-item><p><bold>Training phase:</bold> Model training and optimization.</p></list-item>
<list-item><p><bold>Testing phase:</bold> Model evaluation on unseen data.</p></list-item>
<list-item><p><bold>Validation phase:</bold> Model validation and hyper parameter tuning.</p></list-item>
</list>
<p>This rigorous evaluation framework enabled a thorough analysis of model performance, facilitating the identification of optimal models and hyper parameters for the task at hand.</p>
<sec>
<title>11.1 Experimental setup</title>
<p>Each model was trained over 10 epochs using a batch size of 8. The following equation represents the general form of the loss function minimized during training:</p>
<disp-formula id="E19"><label>(19)</label><mml:math id="M23"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>L</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mo>-</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:mfrac><mml:mtext>&#x000A0;</mml:mtext><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:msubsup></mml:mstyle><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>M</mml:mi></mml:mrow></mml:msubsup></mml:mstyle><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mtext>&#x000A0;</mml:mtext><mml:mi>l</mml:mi><mml:mi>o</mml:mi><mml:mi>g</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>N</italic> is the number of samples, <italic>M</italic> is the number of classes, <italic>y</italic><sub><italic>ij</italic></sub> is the binary indicator of class <italic>j</italic> for sample <italic>i</italic>, and <italic>p</italic><sub><italic>ij</italic></sub>(&#x003B8;) is the predicted probability of sample <italic>i</italic> being in class <italic>j</italic>, with model parameters &#x003B8;.</p>
</sec>
</sec>
<sec id="s12">
<title>12 Results</title>
<p>To prevent overfitting and enhance the model&#x00027;s generalizability, several techniques were employed during training. First, cross-validation was used to ensure robust model evaluation across different data splits, which helped identify any potential overfitting to specific subsets. Additionally, data augmentation techniques such as random rotations, translations, and flips were applied to increase dataset variability and reduce the model&#x00027;s reliance on specific image features. Dropout layers were also incorporated within the model architecture to prevent neurons from co-adapting too strongly, which often leads to overfitting. Despite these measures, validating the model on external datasets is crucial to further assess its adaptability and effectiveness across varied real-world settings and populations. This will be a key focus of future work, as it is essential to ensure that the model can generalize well to new, unseen data, and provide accurate predictions for a diverse range of patients and scenarios.</p>
<p>The models evaluated included Xception, InceptionV3, and InceptionResNetV2. The performance metrics revealed variations in accuracy and loss across the datasets (<xref ref-type="table" rid="T3">Table 3</xref>).</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Specifications of various deep learning models.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Model-name</bold></th>
<th valign="top" align="center"><bold>No. of Param. (M)</bold></th>
<th valign="top" align="center"><bold>Resolution of images</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Resnet34</td>
<td valign="top" align="center">17.4</td>
<td valign="top" align="center">224 &#x000D7; 224</td>
</tr> <tr>
<td valign="top" align="left">Resnet50</td>
<td valign="top" align="center">21.1</td>
<td valign="top" align="center">224 &#x000D7; 224</td>
</tr> <tr>
<td valign="top" align="left">Efficientnet-B4</td>
<td valign="top" align="center">15.8</td>
<td valign="top" align="center">350 &#x000D7; 350</td>
</tr> <tr>
<td valign="top" align="left">Efficientnet-B5</td>
<td valign="top" align="center">27.8</td>
<td valign="top" align="center">456 &#x000D7; 456</td>
</tr> <tr>
<td valign="top" align="left">Efficientnet-V2-s</td>
<td valign="top" align="center">21.5</td>
<td valign="top" align="center">384 &#x000D7; 384</td>
</tr> <tr>
<td valign="top" align="left">Efficientnet-V2-m</td>
<td valign="top" align="center">50.2</td>
<td valign="top" align="center">480 &#x000D7; 480</td>
</tr> <tr>
<td valign="top" align="left">CCT-14.7 &#x000D7; 2.384</td>
<td valign="top" align="center">21.1</td>
<td valign="top" align="center">224 &#x000D7; 224</td>
</tr></tbody>
</table>
</table-wrap>
<sec>
<title>12.1 Training and testing phases</title>
<p>The DataLoader class, integral to our process, dynamically assigns classifications for each dataset, preparing them for submission to the network with appropriately set dimensions and normalization. Utilizing pre-trained weights from the ImageNet dataset, the neural network configuration is defined, including the number of classes and layers requiring enhancements. Each model integrates a classifier head, concluding with a ReLU function, to process the logits for each class output by the final linear layer.</p>
</sec>
<sec>
<title>12.2 Training process</title>
<p>Training involves selecting Cross-Entropy Loss as the loss function, Adam for optimization, and a step function for learning rate scheduling. The protocol entails training for <italic>N</italic> epochs, initially modifying only the final layer weights for the first <italic>K</italic> epochs, then adjusting the entire network&#x00027;s weights for the remaining <italic>N-K</italic> epochs. For CCT models, all weights are trainable from the outset. Model performance on the validation set dictates the saving of the best model at each epoch.</p>
</sec>
<sec>
<title>12.3 Testing process</title>
<p>Post-training, the best model undergoes evaluation against the training, validation, and test sets. This phase includes generating a confusion matrix and calculating class-specific recall, global accuracy, and precision shown in <xref ref-type="table" rid="T4">Table 4</xref>.</p>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>Classification performance metrics.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Class</bold></th>
<th valign="top" align="center"><bold>Precision</bold></th>
<th valign="top" align="center"><bold>Recall F1-score</bold></th>
<th valign="top" align="center"><bold>Support</bold></th>
<th valign="top" align="center"><bold>Class</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">COVID-19&#x0002B;ve</td>
<td valign="top" align="center">0.98</td>
<td valign="top" align="center">0.45</td>
<td valign="top" align="center">0.62</td>
<td valign="top" align="center">100</td>
</tr> <tr>
<td valign="top" align="left">COVID-19&#x02013;ve</td>
<td valign="top" align="center">0.75</td>
<td valign="top" align="center">0.95</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">110</td>
</tr> <tr>
<td valign="top" align="left">No COVID-19</td>
<td valign="top" align="center">0.95</td>
<td valign="top" align="center">1.00</td>
<td valign="top" align="center">0.97</td>
<td valign="top" align="center">105</td>
</tr> <tr>
<td valign="top" align="center" colspan="5">Accuracy = 0.80 Total Support = 315</td>
</tr> <tr>
<td valign="top" align="left">Macro-average</td>
<td valign="top" align="center">0.89</td>
<td valign="top" align="center">0.80</td>
<td valign="top" align="center">0.81</td>
<td valign="top" align="center">315</td>
</tr> <tr>
<td valign="top" align="left">Weighted average</td>
<td valign="top" align="center">0.86</td>
<td valign="top" align="center">0.80</td>
<td valign="top" align="center">0.83</td>
<td valign="top" align="center">315</td>
</tr></tbody>
</table>
</table-wrap>
</sec>
<sec>
<title>12.4 Results</title>
<p><xref ref-type="table" rid="T5">Table 5</xref> summarizes each model&#x00027;s accuracy metrics across the training, testing, and validation phases. We list the results for each model accuracy on the test augmented train, and validation datasets which are shown in <xref ref-type="table" rid="T6">Table 6</xref>. Model Performance Metrics under CLAHE and BALANCED Datasets shown in <xref ref-type="table" rid="T7">Table 7</xref>.</p>
<table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>Model performance metrics.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Models</bold></th>
<th valign="top" align="center"><bold>Training- accuracy (%)</bold></th>
<th valign="top" align="center"><bold>Testing- accuracy (%)</bold></th>
<th valign="top" align="center"><bold>Val- accuracy (%)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Resnet34</td>
<td valign="top" align="center">95.50</td>
<td valign="top" align="center">75.50</td>
<td valign="top" align="center">90.25</td>
</tr> <tr>
<td valign="top" align="left">Resnet50</td>
<td valign="top" align="center">92.00</td>
<td valign="top" align="center">72.25</td>
<td valign="top" align="center">91.75</td>
</tr> <tr>
<td valign="top" align="left">Efficientnet-B4</td>
<td valign="top" align="center">91.00</td>
<td valign="top" align="center">82.50</td>
<td valign="top" align="center">96.50</td>
</tr> <tr>
<td valign="top" align="left">Efficientnet-B5</td>
<td valign="top" align="center">91.00</td>
<td valign="top" align="center">75.50</td>
<td valign="top" align="center">89.70</td>
</tr> <tr>
<td valign="top" align="left">Efficientnet-V2-s</td>
<td valign="top" align="center">92.50</td>
<td valign="top" align="center">72.85</td>
<td valign="top" align="center">75.50</td>
</tr> <tr>
<td valign="top" align="left">Efficientnet-V2-m</td>
<td valign="top" align="center">95.60</td>
<td valign="top" align="center">73.50</td>
<td valign="top" align="center">85.50</td>
</tr> <tr>
<td valign="top" align="left">CCT-14.7 &#x000D7; 2.384</td>
<td valign="top" align="center">91.00</td>
<td valign="top" align="center">78.40</td>
<td valign="top" align="center">80.25</td>
</tr></tbody>
</table>
</table-wrap>
<table-wrap position="float" id="T6">
<label>Table 6</label>
<caption><p>Model performance metrics.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center"><bold>Training- accuracy (%)</bold></th>
<th valign="top" align="center"><bold>Testing- accuracy (%)</bold></th>
<th valign="top" align="center"><bold>Val- accuracy (%)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Resnet34</td>
<td valign="top" align="center">95.50</td>
<td valign="top" align="center">76.50</td>
<td valign="top" align="center">90.25</td>
</tr> <tr>
<td valign="top" align="left">Resnet50</td>
<td valign="top" align="center">92.00</td>
<td valign="top" align="center">82.25</td>
<td valign="top" align="center">91.75</td>
</tr> <tr>
<td valign="top" align="left">Efficientnet-B4</td>
<td valign="top" align="center">91.00</td>
<td valign="top" align="center">87.00</td>
<td valign="top" align="center">97.50</td>
</tr> <tr>
<td valign="top" align="left">Efficientnet-B5</td>
<td valign="top" align="center">91.00</td>
<td valign="top" align="center">72.20</td>
<td valign="top" align="center">89.70</td>
</tr> <tr>
<td valign="top" align="left">Efficientnet-V2-s</td>
<td valign="top" align="center">92.50</td>
<td valign="top" align="center">75.50</td>
<td valign="top" align="center">90.50</td>
</tr> <tr>
<td valign="top" align="left">Efficientnet-V2-m</td>
<td valign="top" align="center">95.60</td>
<td valign="top" align="center">71.50</td>
<td valign="top" align="center">88.50</td>
</tr> <tr>
<td valign="top" align="left">CCT-14.7 &#x000D7; 2.384</td>
<td valign="top" align="center">91.00</td>
<td valign="top" align="center">67.40</td>
<td valign="top" align="center">92.25</td>
</tr></tbody>
</table>
</table-wrap>
<table-wrap position="float" id="T7">
<label>Table 7</label>
<caption><p>Model performance metrics under CLAHE and BALANCED datasets.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center"><bold>Training accuracy</bold></th>
<th valign="top" align="center"><bold>Testing accuracy</bold></th>
<th valign="top" align="center"><bold>Training loss</bold></th>
<th valign="top" align="center"><bold>Testing loss</bold></th>
<th valign="top" align="center"><bold>Validation accuracy</bold></th>
<th valign="top" align="center"><bold>Validation loss</bold></th>
</tr>
</thead>
<tbody>
<tr style="background-color:#dee1e1">
<td valign="top" align="left" colspan="7"><bold>CLAHE</bold></td>
</tr> <tr>
<td valign="top" align="left">ResNet50</td>
<td valign="top" align="center">0.7236</td>
<td valign="top" align="center">0.6035</td>
<td valign="top" align="center">0.1033</td>
<td valign="top" align="center">1.8100</td>
<td valign="top" align="center">0.7052</td>
<td valign="top" align="center">1.1025</td>
</tr> <tr>
<td valign="top" align="left">Xception</td>
<td valign="top" align="center">0.6750</td>
<td valign="top" align="center">0.4670</td>
<td valign="top" align="center">0.5027</td>
<td valign="top" align="center">1.3340</td>
<td valign="top" align="center">0.7330</td>
<td valign="top" align="center">0.2354</td>
</tr> <tr>
<td valign="top" align="left">InceptionV3</td>
<td valign="top" align="center">0.8025</td>
<td valign="top" align="center">0.5717</td>
<td valign="top" align="center">0.6758</td>
<td valign="top" align="center">3.2208</td>
<td valign="top" align="center">0.7984</td>
<td valign="top" align="center">0.2457</td>
</tr> <tr>
<td valign="top" align="left">VGG16</td>
<td valign="top" align="center">0.8223</td>
<td valign="top" align="center">0.7202</td>
<td valign="top" align="center">0.2254</td>
<td valign="top" align="center">4.1548</td>
<td valign="top" align="center">0.4558</td>
<td valign="top" align="center">3.3465</td>
</tr> <tr>
<td valign="top" align="left">VGG19</td>
<td valign="top" align="center">0.8021</td>
<td valign="top" align="center">0.6280</td>
<td valign="top" align="center">0.1248</td>
<td valign="top" align="center">2.5478</td>
<td valign="top" align="center">0.4236</td>
<td valign="top" align="center">1.1583</td>
</tr> <tr>
<td valign="top" align="left">EfficientNet-B4</td>
<td valign="top" align="center">0.9634</td>
<td valign="top" align="center">0.8765</td>
<td valign="top" align="center">0.1248</td>
<td valign="top" align="center">0.9221</td>
<td valign="top" align="center">0.8234</td>
<td valign="top" align="center">1.1102</td>
</tr> <tr style="background-color:#dee1e1">
<td valign="top" align="left" colspan="7"><bold>BALANCED</bold></td>
</tr> <tr>
<td valign="top" align="left">ResNet50</td>
<td valign="top" align="center">0.7366</td>
<td valign="top" align="center">0.8426</td>
<td valign="top" align="center">0.5544</td>
<td valign="top" align="center">0.2757</td>
<td valign="top" align="center">0.8229</td>
<td valign="top" align="center">1.1225</td>
</tr> <tr>
<td valign="top" align="left">Xception</td>
<td valign="top" align="center">0.7094</td>
<td valign="top" align="center">0.3792</td>
<td valign="top" align="center">0.7227</td>
<td valign="top" align="center">2.3039</td>
<td valign="top" align="center">0.7640</td>
<td valign="top" align="center">0.6154</td>
</tr> <tr>
<td valign="top" align="left">InceptionV3</td>
<td valign="top" align="center">0.5640</td>
<td valign="top" align="center">0.4239</td>
<td valign="top" align="center">0.8906</td>
<td valign="top" align="center">0.7901</td>
<td valign="top" align="center">0.4339</td>
<td valign="top" align="center">0.8938</td>
</tr> <tr>
<td valign="top" align="left">VGG16</td>
<td valign="top" align="center">0.8013</td>
<td valign="top" align="center">0.8478</td>
<td valign="top" align="center">0.2309</td>
<td valign="top" align="center">3.2348</td>
<td valign="top" align="center">0.2358</td>
<td valign="top" align="center">2.2365</td>
</tr> <tr>
<td valign="top" align="left">VGG19</td>
<td valign="top" align="center">0.7061</td>
<td valign="top" align="center">0.7230</td>
<td valign="top" align="center">0.2216</td>
<td valign="top" align="center">1.2473</td>
<td valign="top" align="center">0.1736</td>
<td valign="top" align="center">1.2383</td>
</tr> <tr>
<td valign="top" align="left">EfficientNet-B4</td>
<td valign="top" align="center">0.9434</td>
<td valign="top" align="center">0.9165</td>
<td valign="top" align="center">0.1724</td>
<td valign="top" align="center">0.6853</td>
<td valign="top" align="center">0.8134</td>
<td valign="top" align="center">0.4356</td>
</tr></tbody>
</table>
</table-wrap>
<p>The performance of various models was analyzed using both original, unbalanced datasets and additional, varied datasets to understand each model&#x00027;s generalizability and tendency toward overfitting. The mathematical representation of model accuracy, &#x003B1;, is defined as the ratio of correctly predicted observations, <italic>C</italic><sub><italic>p</italic></sub>, to the total observations, <italic>T</italic><sub><italic>o</italic></sub>.</p>
<disp-formula id="E20"><label>(20)</label><mml:math id="M24"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mi>&#x003B1;</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>T</mml:mi></mml:mrow><mml:mrow><mml:mi>o</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Overfitting is quantitatively assessed by comparing training accuracy, &#x003B1;<sub><italic>train</italic></sub>, and validation accuracy, &#x003B1;<sub><italic>val</italic></sub>, where a significant discrepancy indicates potential overfitting:</p>
<disp-formula id="E21"><label>(21)</label><mml:math id="M25"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mi>O</mml:mi><mml:mi>v</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>f</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>g</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mi>I</mml:mi><mml:mi>n</mml:mi><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mo>=</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:msub><mml:mrow><mml:mi>&#x003B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>v</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<sec>
<title>12.4.1 Comparison with state-of-the-art models</title>
<p>To put our model&#x00027;s performance into perspective, we compared it to other state-of-the-art models in COVID-19 detection using chest X-ray images. The results are summarized in <xref ref-type="table" rid="T8">Table 8</xref>, which shows key performance metrics like accuracy, precision, recall, and F1-score for each model. This comparison highlights the strengths of our approach and demonstrates its effectiveness in detecting COVID-19 from chest X-rays. By benchmarking our model against others in the field, we can see how it stacks up against the current state of the art. This comparison is essential for understanding the advancements in COVID-19 detection and how our model contributes to the ongoing efforts. Our goal is to provide a comprehensive view of the current landscape in COVID-19 detection using chest X-ray images and demonstrate the value of our approach in this critical area of research.</p>
<table-wrap position="float" id="T8">
<label>Table 8</label>
<caption><p>Segmented images tested with pre-trained models.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center"><bold>Accuracy (%)</bold></th>
<th valign="top" align="center"><bold>Precision (%)</bold></th>
<th valign="top" align="center"><bold>Recall (%)</bold></th>
<th valign="top" align="center"><bold>F1-Score (%)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">COVID-Net</td>
<td valign="top" align="center">93.5</td>
<td valign="top" align="center">94.0</td>
<td valign="top" align="center">92.8</td>
<td valign="top" align="center">93.4</td>
</tr> <tr>
<td valign="top" align="left">ResNet50</td>
<td valign="top" align="center">91.2</td>
<td valign="top" align="center">90.5</td>
<td valign="top" align="center">90.0</td>
<td valign="top" align="center">90.2</td>
</tr> <tr>
<td valign="top" align="left">EfficientNet-B4</td>
<td valign="top" align="center">92.7</td>
<td valign="top" align="center">92.3</td>
<td valign="top" align="center">92.1</td>
<td valign="top" align="center">92.2</td>
</tr> <tr>
<td valign="top" align="left">Proposed model</td>
<td valign="top" align="center">97.5</td>
<td valign="top" align="center">97.0</td>
<td valign="top" align="center">96.4</td>
<td valign="top" align="center">96.7</td>
</tr></tbody>
</table>
</table-wrap>
</sec>
<sec>
<title>12.4.2 Results</title>
<p>Analysis revealed that MobileNet yielded the highest accuracy for the original, unbalanced dataset. Conversely, VGG16 demonstrated superior performance across all other datasets but exhibited clear signs of overfitting on the original, unbalanced dataset, as highlighted by its performance metrics.</p>
</sec>
<sec>
<title>12.4.3 Discussion</title>
<p>The differential performance of MobileNet and VGG16 underscores the importance of selecting appropriate models based on dataset characteristics. The observed overfitting of VGG16 on the unbalanced dataset emphasizes the need for careful model evaluation and dataset preprocessing and the heatmap generation is shown in <xref ref-type="fig" rid="F11">Figure 11</xref>.</p>
<fig id="F11" position="float">
<label>Figure 11</label>
<caption><p>Different model heat map visualization.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-07-1489020-g0011.tif"/>
</fig>
</sec>
</sec>
</sec>
<sec id="s13">
<title>13 Localization</title>
<p>Disease localization in medical images is a critical step in diagnostic processes. The application of Grad-CAM to models trained on various datasets elucidates the relationship between training data quality, model accuracy, image resolution, and localization precision. Grad-CAM uses the gradients of any target concept, flowing into the final convolutional layer to produce a coarse localization map highlighting the important regions for predicting the concept. Mathematically, it can be represented as</p>
<disp-formula id="E22"><label>(22)</label><mml:math id="M26"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mrow><mml:mi>G</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>d</mml:mi><mml:mo>-</mml:mo><mml:mi>C</mml:mi><mml:mi>A</mml:mi><mml:mi>M</mml:mi></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>L</mml:mi><mml:mi>U</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:msubsup><mml:mrow><mml:mi>&#x003B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msubsup><mml:msup><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Where <inline-formula><mml:math id="M27"><mml:msubsup><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mrow><mml:mi>G</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>d</mml:mi><mml:mo>-</mml:mo><mml:mi>C</mml:mi><mml:mi>A</mml:mi><mml:mi>M</mml:mi></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> is is the localization map for class <italic>c</italic>, <inline-formula><mml:math id="M28"><mml:msubsup><mml:mrow><mml:mi>&#x003B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> are the weights for feature map k, <italic>A</italic><sup><italic>k</italic></sup> is the activation of k-th feature map, ReLU is applied to focus on features that have positive influence on the class of interest. Results the application of Grad-CAM on models trained with initial and enhanced 478 datasets revealed that models trained on initial data more accurately highlighted disease-affected areas. This accuracy in localization is directly proportional to the model&#x00027;s overall accuracy and the image resolution, described as:</p>
<disp-formula id="E23"><label>(23)</label><mml:math id="M29"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mi>L</mml:mi><mml:mi>o</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>i</mml:mi><mml:mi>z</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mo>&#x0221D;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>M</mml:mi><mml:mi>o</mml:mi><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mi>l</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mi>A</mml:mi><mml:mi>c</mml:mi><mml:mi>c</mml:mi><mml:mi>u</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>y</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mo>&#x000D7;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>I</mml:mi><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>g</mml:mi><mml:mi>e</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>emphasizing the compounded effect of higher accuracy and better resolution on precise disease localization. Our findings underscore the importance of image quality and model accuracy for effective disease localization using Grad-CAM. The study advocates for the optimization of these factors to improve diagnostic efficiency in medical imaging as shown in <xref ref-type="fig" rid="F12">Figures 12</xref>, <xref ref-type="fig" rid="F13">13</xref>.</p>
<fig id="F12" position="float">
<label>Figure 12</label>
<caption><p>Heat map extracted by GRADCAM algorithm.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-07-1489020-g0012.tif"/>
</fig>
<fig id="F13" position="float">
<label>Figure 13</label>
<caption><p>Heat map visualization by GRADCAM algorithm.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-07-1489020-g0013.tif"/>
</fig>
<p>While Grad-CAM provides valuable insights into the decision-making process behind COVID-19 predictions, deep learning models are often criticized for their lack of transparency. This &#x0201C;black box&#x0201D; nature can be a significant barrier for clinical adoption, as clinicians may struggle to understand the reasoning behind model predictions. To address this, future work could explore combining Grad-CAM with other explainability techniques, such as LIME or SHAP. These methods offer unique perspectives on model behavior, providing clinicians with a more comprehensive understanding of prediction rationales. By shedding light on the decision-making process, we can increase trust and usability in medical settings.</p>
</sec>
<sec id="s14">
<title>14 Results and discussions of the work carried out</title>
<p>The advent of deep learning in medical imaging has facilitated the development of automated diagnostic tools. This paper presents an evaluation of transfer learning models, specifically EfficientNet and MobileNet, in the classification of chest X-ray images. Transfer learning models were trained on a comprehensive dataset comprising images categorized as COVID-19, normal, and viral pneumonia. The performance was assessed based on the accuracy of classifications, with further analysis conducted through confusion matrices and ROC curves.</p>
<sec>
<title>14.1 Model performance</title>
<p>The models&#x00027; diagnostic capabilities were visualized as follows:</p>
<list list-type="bullet">
<list-item><p>Classification results are depicted in <xref ref-type="fig" rid="F14">Figure 14</xref>.</p></list-item>
<list-item><p>The confusion matrix for validation dataset diagnoses is shown in <xref ref-type="fig" rid="F15">Figure 15</xref>.</p></list-item>
<list-item><p>ROC curve analysis for the EfficientNet model is presented in <xref ref-type="fig" rid="F16">Figure 16</xref>.</p></list-item>
<list-item><p>Accuracy progression of the MobileNet model over epochs is illustrated in <xref ref-type="fig" rid="F17">Figures 17</xref>, <xref ref-type="fig" rid="F18">18</xref>.</p></list-item>
</list>
<fig id="F14" position="float">
<label>Figure 14</label>
<caption><p>The classification results obtained.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-07-1489020-g0014.tif"/>
</fig>
<fig id="F15" position="float">
<label>Figure 15</label>
<caption><p>Diagnoses from the validation dataset.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-07-1489020-g0015.tif"/>
</fig>
<fig id="F16" position="float">
<label>Figure 16</label>
<caption><p>ROC curve visualization for the efficient net model, based on the true positive rate.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-07-1489020-g0016.tif"/>
</fig>
<fig id="F17" position="float">
<label>Figure 17</label>
<caption><p>Efficient net accuracy obtained.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-07-1489020-g0017.tif"/>
</fig>
<fig id="F18" position="float">
<label>Figure 18</label>
<caption><p>Efficient net validation loss.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-07-1489020-g0018.tif"/>
</fig>
</sec>
</sec>
<sec id="s15">
<title>15 Conclusion</title>
<p>In response to the COVID-19 pandemic, our study demonstrates promising results for COVID-19 detection using chest X-rays. However, we must note that our model has not yet been tested in real-world clinical settings, which limits our ability to fully assess its performance in a practical healthcare environment. To address this, we plan to validate our model in clinical settings to evaluate its effectiveness, robustness, and potential impact on patient diagnosis and care. This will provide valuable insights into our model&#x00027;s applicability in different medical scenarios and move us closer to broader adoption in clinical practice. Our proposed solution leverages the power of pre-trained models and demonstrates commendable efficacy, achieving an accuracy rate of 88.48% in training and 88.1% in validation on the initial dataset. By harnessing Efficient Net-based transfer learning on a balanced and enhanced dataset, our developed models have attained exemplary performance, registering training and validation accuracies of 95.64% and 97.31%, respectively. These results not only parallel but, in some instances, surpass the accuracy levels of existing models, demonstrating the robustness of our approach. Notably, our models&#x00027; enhanced capability to precisely localize affected areas significantly bolsters their diagnostic utility, providing a valuable tool for physicians in the fight against COVID-19. Our study contributes to the growing body of research in AI-assisted medical imaging, showcasing the potential of deep learning architectures to revolutionize healthcare diagnostics.</p>
</sec>
<sec id="s16">
<title>16 Future work</title>
<p>The future directions section would benefit from a more comprehensive roadmap. Specifically, the paper should address several promising avenues: (1) the integration of advanced explainability techniques like SHAP or LIME to enhance model interpretability; (2) validation across diverse datasets from different domains to establish broader generalizability; and (3) exploration of hybrid approaches combining the current method with emerging techniques in the field. These extensions could address current limitations while advancing the broader research agenda.</p>
</sec>
<sec id="s17">
<title>17 Limitations</title>
<p>First, while our dataset includes chest X-ray images, it may not fully represent the diversity of COVID-19 cases across different populations and imaging equipment. The model, though showing high accuracy in experimental settings, requires validation in real-world clinical environments to establish practical utility. While we employed Grad-CAM for visualization, we recognize that our model&#x00027;s interpretability could be enhanced through additional techniques like SHAP or LIME to increase clinician trust. Despite implementing class weighting and oversampling, inherent dataset imbalances persist, potentially affecting prediction reliability for minority classes. The model&#x00027;s computational requirements may pose challenges in resource-constrained settings, suggesting a need for architectural optimization. Although we implemented dropout and data augmentation, the high-test accuracy warrants external validation to conclusively demonstrate generalizability. Finally, our focus on X-ray imaging alone may not capture all relevant COVID-19 clinical features, indicating potential value in incorporating additional imaging modalities like CT scans or clinical data in future work.</p>
</sec>
</body>
<back>
<sec sec-type="author-contributions" id="s18">
<title>Author contributions</title>
<p>AV: Writing &#x02013; original draft, Conceptualization, Formal analysis, Investigation, Methodology, Project administration, Resources, Supervision, Visualization, Writing &#x02013; review &#x00026; editing. SB: Formal analysis, Methodology, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing, Validation. SP: Investigation, Project administration, Resources, Writing &#x02013; review &#x00026; editing. PS: Data curation, Formal analysis, Writing &#x02013; review &#x00026; editing. ML: Validation, Investigation, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing.</p>
</sec>
<sec sec-type="funding-information" id="s19">
<title>Funding</title>
<p>The author(s) declare that no financial support was received for the research, authorship, and/or publication of this article.</p>
</sec>
<ack><p>We want to express our sincere gratitude for the financial support provided by the Nitte, Deemed to be a University, NMAM Institute of Technology, Nitte, Karkala, Karnataka. Their funding enabled us to pursue this research and achieve our goals. We appreciate their commitment to advancing scientific knowledge and innovation.</p>
</ack>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s20">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ai</surname> <given-names>T.</given-names></name> <name><surname>Yang</surname> <given-names>Z.</given-names></name> <name><surname>Hou</surname> <given-names>H.</given-names></name> <name><surname>Zhan</surname> <given-names>C.</given-names></name> <name><surname>Chen</surname> <given-names>C.</given-names></name> <name><surname>Lv</surname> <given-names>W.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Correlation of chest CT and RT-pcr testing in coronavirus disease 2019 (COVID-19) in China: a report of 1014 cases</article-title>. <source>Radiology</source> <volume>296</volume>, <fpage>E32</fpage>&#x02013;<lpage>E40</lpage>. <pub-id pub-id-type="doi">10.1148/radiol.2020200642</pub-id><pub-id pub-id-type="pmid">32101510</pub-id></citation></ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Alharbi</surname> <given-names>A. H.</given-names></name> <name><surname>Aravinda</surname> <given-names>C. V.</given-names></name> <name><surname>Lin</surname> <given-names>M.</given-names></name> <name><surname>Venugopala</surname> <given-names>P. S.</given-names></name> <name><surname>Reddicherla</surname> <given-names>P. R.</given-names></name> <name><surname>Shah</surname> <given-names>M. A.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Segmentation and classification of white blood cells using the unet</article-title>. <source>Contrast Media Mol. Imag.</source> <volume>571</volume>:<fpage>5913905</fpage>. <pub-id pub-id-type="doi">10.1155/2022/5913905</pub-id><pub-id pub-id-type="pmid">35919503</pub-id></citation></ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Alharbi</surname> <given-names>A. H.</given-names></name> <name><surname>Aravinda</surname> <given-names>C. V.</given-names></name> <name><surname>Shetty</surname> <given-names>J.</given-names></name> <name><surname>Jabarulla</surname> <given-names>M. Y.</given-names></name> <name><surname>Sudeepa</surname> <given-names>K. B.</given-names></name> <name><surname>Singh</surname> <given-names>S. K.</given-names></name> <etal/></person-group>. (<year>2022a</year>). <article-title>Computational models-based detection of peripheral malarial parasites in blood smears</article-title>. <source>Contrast Media Mol. Imag</source>. <volume>2022</volume>:<fpage>9171343</fpage>. <pub-id pub-id-type="doi">10.1155/2022/9171343</pub-id><pub-id pub-id-type="pmid">35800239</pub-id></citation></ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Alharbi</surname> <given-names>A. H.</given-names></name> <name><surname>Aravinda</surname> <given-names>V. C.</given-names></name> <name><surname>Lin</surname> <given-names>M.</given-names></name> <name><surname>Ashwini</surname> <given-names>B.</given-names></name> <name><surname>Jabarulla</surname> <given-names>M. Y.</given-names></name> <name><surname>Shah</surname> <given-names>M. A.</given-names></name> <etal/></person-group>. (<year>2022b</year>). <article-title>Detection of peripheral malarial parasites in blood smears using deep learning models</article-title>. <source>Comput. Intell. Neurosci</source>. <volume>2022</volume>:<fpage>3922763</fpage>. <pub-id pub-id-type="doi">10.1155/2022/3922763</pub-id><pub-id pub-id-type="pmid">35655511</pub-id></citation></ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Apostolopoulos</surname> <given-names>I. D.</given-names></name> <name><surname>Mpesiana</surname> <given-names>T. A.</given-names></name></person-group> (<year>2020</year>). <article-title>Covid-19: automatic detection from X-ray images utilizing transfer learning with convolutional neural networks</article-title>. <source>Phys. Eng. Sci. Med.</source> <volume>43</volume>, <fpage>635</fpage>&#x02013;<lpage>640</lpage>. <pub-id pub-id-type="doi">10.1007/s13246-020-00865-4</pub-id><pub-id pub-id-type="pmid">32524445</pub-id></citation></ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ching</surname> <given-names>T.</given-names></name> <name><surname>Himmelstein</surname> <given-names>D. S.</given-names></name> <name><surname>Beaulieu-Jones</surname> <given-names>B. K.</given-names></name> <name><surname>Kalinin</surname> <given-names>A. A.</given-names></name> <name><surname>Do</surname> <given-names>B. T.</given-names></name> <name><surname>Way</surname> <given-names>G. P.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Opportunities and obstacles for deep learning in biology and medicine</article-title>. <source>J. R. Soc. Interface</source> <volume>15</volume>:<fpage>20170387</fpage>. <pub-id pub-id-type="doi">10.1098/rsif.2017.0387</pub-id><pub-id pub-id-type="pmid">29618526</pub-id></citation></ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Choe</surname> <given-names>J.</given-names></name> <name><surname>Lee</surname> <given-names>S. M.</given-names></name> <name><surname>Do</surname> <given-names>K. H.</given-names></name> <name><surname>Lee</surname> <given-names>G.</given-names></name> <name><surname>Lee</surname> <given-names>J. G.</given-names></name> <name><surname>Lee</surname> <given-names>S. M.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Deep learning-based image conversion of CT reconstruction kernels improves radiomics reproducibility for pulmonary nodules or masses</article-title>. <source>Radiology</source> <volume>292</volume>, <fpage>365</fpage>&#x02013;<lpage>373</lpage>. <pub-id pub-id-type="doi">10.1148/radiol.2019181960</pub-id><pub-id pub-id-type="pmid">31210613</pub-id></citation></ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Giri</surname> <given-names>A.</given-names></name> <name><surname>Rana</surname> <given-names>D.</given-names></name></person-group> (<year>2020</year>). <article-title>Charting the challenges behind the testing of COVID-19 in developing countries: Nepal as a case study</article-title>. <source>Biosaf Health</source>. <volume>2</volume>, <fpage>53</fpage>&#x02013;<lpage>56</lpage>. <pub-id pub-id-type="doi">10.1016/j.bsheal.2020.05.002</pub-id><pub-id pub-id-type="pmid">38620322</pub-id></citation></ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Haseli</surname> <given-names>S.</given-names></name> <name><surname>Khalili</surname> <given-names>N.</given-names></name> <name><surname>Bakhshayeshkaram</surname> <given-names>M.</given-names></name> <name><surname>Sanei-Taheri</surname> <given-names>M.</given-names></name> <name><surname>Moharramzad</surname> <given-names>Y.</given-names></name></person-group> (<year>2020</year>). <article-title>Lobar distribution of COVID-19 pneumonia based on chest computed tomography findings: a retrospective study</article-title>. <source>Arch. Acad. Emerg. Med</source>. 8:e55.</citation>
</ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>He</surname> <given-names>K.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Ren</surname> <given-names>S.</given-names></name> <name><surname>Sun</surname> <given-names>J.</given-names></name></person-group> (<year>2016</year>). <article-title>&#x0201C;Deep residual learning for image recognition,&#x0201D;</article-title> in <source>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</source>, 770&#x02013;778. <pub-id pub-id-type="doi">10.1109/CVPR.2016.90</pub-id></citation>
</ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hemdan</surname> <given-names>E. E. D.</given-names></name> <name><surname>Shouman</surname> <given-names>M. A.</given-names></name> <name><surname>Karar</surname> <given-names>M. E.</given-names></name></person-group> (<year>2020</year>). <article-title>Covidx-net: a framework of deep learning classifiers to diagnose covid-19 in x-ray images</article-title>. <source>arXiv preprint arXiv:2003.11055</source>.</citation>
</ref>
<ref id="B12">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Islam</surname> <given-names>M. Z.</given-names></name> <name><surname>Islam</surname> <given-names>M. M.</given-names></name> <name><surname>Asraf</surname> <given-names>A.</given-names></name></person-group> (<year>2020</year>). <article-title>A combined deep CNN-LSTM network for the detection of novel coronavirus (COVID-19) using X-ray images</article-title>. <source>Inform. Med. Unlocked</source>. <volume>20</volume>:<fpage>100412</fpage>. <pub-id pub-id-type="doi">10.1016/j.imu.2020.100412</pub-id><pub-id pub-id-type="pmid">32835084</pub-id></citation></ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kermany</surname> <given-names>D. S.</given-names></name> <name><surname>Goldbaum</surname> <given-names>M.</given-names></name> <name><surname>Cai</surname> <given-names>W.</given-names></name> <name><surname>Valentim</surname> <given-names>C. C. S.</given-names></name> <name><surname>Liang</surname> <given-names>H.</given-names></name> <name><surname>Baxter</surname> <given-names>S. L.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Identifying medical diagnoses and treatable diseases by image-based deep learning</article-title>. <source>Cell</source> <volume>172</volume>, <fpage>1122</fpage>&#x02013;<lpage>1131</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2018.02.010</pub-id><pub-id pub-id-type="pmid">29474911</pub-id></citation></ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kong</surname> <given-names>W.</given-names></name> <name><surname>Agarwal</surname> <given-names>P. P.</given-names></name></person-group> (<year>2020</year>). <article-title>Chest imaging appearance of COVID-19 infection</article-title>. <source>Radiol. Cardiothorac. Imag.</source> <volume>2</volume>:<fpage>e200028</fpage>. <pub-id pub-id-type="doi">10.1148/ryct.2020200028</pub-id><pub-id pub-id-type="pmid">33778544</pub-id></citation></ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>L.</given-names></name> <name><surname>Qin</surname> <given-names>L.</given-names></name> <name><surname>Xu</surname> <given-names>Z.</given-names></name> <name><surname>Yin</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Kong</surname> <given-names>B.</given-names></name> <etal/></person-group>. (<year>2020b</year>). <article-title>Artificial intelligence distinguishes COVID-19 from community-acquired pneumonia on chest CT</article-title>. <source>Radiology</source> <volume>296</volume>, <fpage>E65</fpage>&#x02013;<lpage>E71</lpage>. <pub-id pub-id-type="doi">10.1148/radiol.2020200905</pub-id><pub-id pub-id-type="pmid">32191588</pub-id></citation></ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>X. J.</given-names></name> <name><surname>Shuai</surname> <given-names>B. X.</given-names></name> <name><surname>Zhang</surname> <given-names>Z. W.</given-names></name> <name><surname>Kang</surname> <given-names>Y.</given-names></name></person-group> (<year>2020a</year>). <article-title>COVID-19 versus non-COVID-19 pneumonia: a retrospective cohort study in Chengdu, China</article-title>. <source>medRxiv 2020&#x02013;04</source>. <pub-id pub-id-type="doi">10.1101/2020.04.28.20082784</pub-id></citation>
</ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mohammad</surname> <given-names>R.</given-names></name> <name><surname>Abolfazl</surname> <given-names>A.</given-names></name></person-group> (<year>2020</year>). <article-title>A new modified deep convolutional neural network for detecting COVID-19 from X-ray images</article-title>. <source>arXiv preprint arXiv:2004.08052</source>.</citation>
</ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Narin</surname> <given-names>A.</given-names></name> <name><surname>Kaya</surname> <given-names>C.</given-names></name> <name><surname>Pamuk</surname> <given-names>Z.</given-names></name></person-group> (<year>2021</year>). <article-title>Automatic detection of coronavirus disease (COVID-19) using X-ray images and deep convolutional neural networks</article-title>. <source>Pattern Anal. Applic.</source> <volume>24</volume>, <fpage>1207</fpage>&#x02013;<lpage>1220</lpage>. <pub-id pub-id-type="doi">10.1007/s10044-021-00984-y</pub-id><pub-id pub-id-type="pmid">33994847</pub-id></citation></ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ng</surname> <given-names>M. Y.</given-names></name> <name><surname>Lee</surname> <given-names>E. Y.</given-names></name> <name><surname>Yang</surname> <given-names>J.</given-names></name> <name><surname>Yang</surname> <given-names>F.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name> <name><surname>Wang</surname> <given-names>H.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Imaging profile of the COVID-19 infection: radiologic findings and literature review</article-title>. <source>Radiol Cardiothorac. Imag.</source> <volume>2</volume>:<fpage>e200034</fpage>. <pub-id pub-id-type="doi">10.1148/ryct.2020200034</pub-id><pub-id pub-id-type="pmid">33778547</pub-id></citation></ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ozturk</surname> <given-names>T.</given-names></name> <name><surname>Talo</surname> <given-names>M.</given-names></name> <name><surname>Yildirim</surname> <given-names>E. A.</given-names></name> <name><surname>Baloglu</surname> <given-names>U. B.</given-names></name> <name><surname>Yildirim</surname> <given-names>O.</given-names></name> <name><surname>Acharya</surname> <given-names>U. R.</given-names></name></person-group> (<year>2020</year>). <article-title>Automated detection of COVID-19 cases using deep neural networks with X-ray images</article-title>. <source>Comput. Biol. Med.</source> <volume>121</volume>:<fpage>103792</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2020.103792</pub-id><pub-id pub-id-type="pmid">32568675</pub-id></citation></ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Perumal</surname> <given-names>V.</given-names></name> <name><surname>Narayanan</surname> <given-names>V.</given-names></name> <name><surname>Rajasekar</surname> <given-names>S.</given-names></name></person-group> (<year>2020</year>). <article-title>Detection of COVID-19 using CXR and CT images using Transfer Learning and Haralick features</article-title>. <source>Appl. Intell</source>. <volume>51</volume>, <fpage>341</fpage>&#x02013;<lpage>358</lpage>. <pub-id pub-id-type="doi">10.1007/s10489-020-01831-z</pub-id><pub-id pub-id-type="pmid">35194321</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Saha</surname> <given-names>G. C.</given-names></name> <name><surname>Ganie</surname> <given-names>I. A.</given-names></name> <name><surname>Rajendran</surname> <given-names>G.</given-names></name> <name><surname>Nathalia</surname> <given-names>D.</given-names></name></person-group> (<year>2020</year>). <article-title>CNN analysis for the detection of SARS-CoV-2 in human body</article-title>. <source>Int. J. Adv. Sci. Technol</source>. <volume>29</volume>, <fpage>2369</fpage>&#x02013;<lpage>2374</lpage>.</citation>
</ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Selvaraju</surname> <given-names>R. R.</given-names></name> <name><surname>Das</surname> <given-names>A.</given-names></name> <name><surname>Vedantam</surname> <given-names>R.</given-names></name> <name><surname>Cogswell</surname> <given-names>M.</given-names></name> <name><surname>Parikh</surname> <given-names>D.</given-names></name> <name><surname>Batra</surname> <given-names>D.</given-names></name></person-group> (<year>2016</year>). Grad-CAM: why did you say that? <italic>arXiv preprint arXiv:1611.07450</italic>.</citation>
</ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shi</surname> <given-names>F.</given-names></name> <name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Shi</surname> <given-names>J.</given-names></name> <name><surname>Wu</surname> <given-names>Z.</given-names></name> <name><surname>Wang</surname> <given-names>Q.</given-names></name> <name><surname>Tang</surname> <given-names>Z.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Review of artificial intelligence techniques in imaging data acquisition, segmentation, and diagnosis for COVID-19</article-title>. <source>IEEE Rev. Biomed. Eng.</source> <volume>14</volume>, <fpage>4</fpage>&#x02013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1109/RBME.2020.2987975</pub-id><pub-id pub-id-type="pmid">32305937</pub-id></citation></ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Simonyan</surname> <given-names>K.</given-names></name> <name><surname>Zisserman</surname> <given-names>A.</given-names></name></person-group> (<year>2015</year>). <article-title>&#x0201C;Very deep convolutional networks for large-scale image recognition,&#x0201D;</article-title> in <source>3rd International Conference on Learning Representations, ICLR 2015 - Conference Track Proceedings</source>, <fpage>1</fpage>&#x02013;<lpage>14</lpage>.</citation>
</ref>
<ref id="B26">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Singh</surname> <given-names>D.</given-names></name> <name><surname>Aravinda</surname> <given-names>C. V.</given-names></name> <name><surname>Kaur</surname> <given-names>M.</given-names></name> <name><surname>Lin</surname> <given-names>M.</given-names></name> <name><surname>Shetty</surname> <given-names>J.</given-names></name> <name><surname>Reddicherla</surname> <given-names>V. R.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Dknet: deep kuzushiji characters recognition network</article-title>. <source>IEEE Access</source> <volume>10</volume>, <fpage>75872</fpage>&#x02013;<lpage>75883</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2022.3191429</pub-id></citation>
</ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Soares</surname> <given-names>E.</given-names></name> <name><surname>Angelov</surname> <given-names>P.</given-names></name> <name><surname>Biaso</surname> <given-names>S.</given-names></name> <name><surname>Froes</surname> <given-names>M. H.</given-names></name> <name><surname>Abe</surname> <given-names>D. K.</given-names></name></person-group> (<year>2020</year>). <article-title>SARS-CoV-2 CT-scan dataset: a large dataset of real patients CT scans for SARS-CoV-2 identification</article-title>. <source>MedRxiv 2020&#x02013;04</source>.</citation>
</ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Song</surname> <given-names>Y.</given-names></name> <name><surname>Zheng</surname> <given-names>S.</given-names></name> <name><surname>Li</surname> <given-names>L.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Huang</surname> <given-names>Z.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Deep learning enables accurate diagnosis of novel coronavirus (COVID-19) with CT images</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinform</source>. <volume>18</volume>, <fpage>2775</fpage>&#x02013;<lpage>2780</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2021.3065361</pub-id><pub-id pub-id-type="pmid">33705321</pub-id></citation></ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>L.</given-names></name> <name><surname>Lin</surname> <given-names>Z. Q.</given-names></name></person-group> (<year>2020</year>). <article-title>Covid-net: a tailored deep convolutional neural network design for detection of COVID-19 cases from chest X-ray images</article-title>. <source>Sci. Rep.</source> <volume>10</volume>:<fpage>19549</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-020-76550-z</pub-id><pub-id pub-id-type="pmid">33177550</pub-id></citation></ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>S.</given-names></name> <name><surname>Zha</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>W.</given-names></name> <name><surname>Wu</surname> <given-names>Q.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name> <name><surname>Niu</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>A fully automatic deep learning system for COVID-19 diagnostic and prognostic analysis</article-title>. <source>Eur. Respir. J</source>. <volume>56</volume>, <fpage>1</fpage>&#x02013;<lpage>44</lpage>. <pub-id pub-id-type="doi">10.1183/13993003.00775-2020</pub-id><pub-id pub-id-type="pmid">32444412</pub-id></citation></ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Peng</surname> <given-names>Y.</given-names></name> <name><surname>Lu</surname> <given-names>L.</given-names></name> <name><surname>Lu</surname> <given-names>Z.</given-names></name> <name><surname>Bagheri</surname> <given-names>M.</given-names></name> <name><surname>Summers</surname> <given-names>R. M.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;ChestX-ray8: hospital-scale chest X-ray database and benchmarks on weakly-supervised classification and localization of common thorax diseases,&#x0201D;</article-title> in <source>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</source>, 2097&#x02013;2106. <pub-id pub-id-type="doi">10.1109/CVPR.2017.369</pub-id></citation>
</ref>
</ref-list>
</back>
</article>