<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<?covid-19-tdm?>
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Artif. Intell.</journal-id>
<journal-title>Frontiers in Artificial Intelligence</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Artif. Intell.</abbrev-journal-title>
<issn pub-type="epub">2624-8212</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frai.2024.1410841</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Artificial Intelligence</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Uncertainty quantification in multi-class image classification using chest X-ray images of COVID-19 and pneumonia</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Whata</surname> <given-names>Albert</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2395398/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Dibeco</surname> <given-names>Katlego</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Madzima</surname> <given-names>Kudakwashe</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x02020;</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Obagbuwa</surname> <given-names>Ibidun</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2326835/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Department of Mathematical Sciences, Sol Plaatje University</institution>, <addr-line>Kimberley</addr-line>, <country>South Africa</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Computer Science and Information Technology, Sol Plaatje University</institution>, <addr-line>Kimberley</addr-line>, <country>South Africa</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Cornelio Y&#x000E1;&#x000F1;ez-M&#x000E1;rquez, National Polytechnic Institute (IPN), Mexico</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Nguyen Quoc Khanh Le, Taipei Medical University, Taiwan</p>
<p>Gokhan Altan, Iskenderun Technical University, T&#x000FC;rkiye</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Albert Whata <email>albert.whata&#x00040;spu.ac.za</email></corresp>
<fn fn-type="other" id="fn001"><p>&#x02020;ORCID: Kudakwashe Madzima <ext-link ext-link-type="uri" xlink:href="https://orcid.org/0000-0001-6672-8713">orcid.org/0000-0001-6672-8713</ext-link></p></fn></author-notes>
<pub-date pub-type="epub">
<day>18</day>
<month>09</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>7</volume>
<elocation-id>1410841</elocation-id>
<history>
<date date-type="received">
<day>01</day>
<month>04</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>29</day>
<month>08</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2024 Whata, Dibeco, Madzima and Obagbuwa.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Whata, Dibeco, Madzima and Obagbuwa</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>This paper investigates uncertainty quantification (UQ) techniques in multi-class classification of chest X-ray images (COVID-19, Pneumonia, and Normal). We evaluate Bayesian Neural Networks (BNN) and the Deep Neural Network with UQ (DNN with UQ) techniques, including Monte Carlo dropout, Ensemble Bayesian Neural Network (EBNN), Ensemble Monte Carlo (EMC) dropout, across different evaluation metrics. Our analysis reveals that DNN with UQ, especially EBNN and EMC dropout, consistently outperform BNNs. For example, in Class 0 vs. All, EBNN achieved a <italic>U</italic>Acc of 92.6%, <italic>U</italic>AUC-ROC of 95.0%, and a Brier Score of 0.157, significantly surpassing BNN&#x00027;s performance. Similarly, EMC Dropout excelled in Class 1 vs. All with a <italic>U</italic>Acc of 83.5%, <italic>U</italic>AUC-ROC of 95.8%, and a Brier Score of 0.165. These advanced models demonstrated higher accuracy, better discriaminative capability, and more accurate probabilistic predictions. Our findings highlight the efficacy of DNN with UQ in enhancing model reliability and interpretability, making them highly suitable for critical healthcare applications like chest X-ray imageQ6 classification.</p></abstract>
<kwd-group>
<kwd>uncertainty quantification deep neural networks</kwd>
<kwd>Bayesian neural networks</kwd>
<kwd>Monte Carlo dropout</kwd>
<kwd>Ensemble Monte Carlo</kwd>
<kwd>chest-X-ray</kwd>
<kwd>classification metrics</kwd>
<kwd>multi-class classification</kwd>
</kwd-group>
<counts>
<fig-count count="2"/>
<table-count count="13"/>
<equation-count count="18"/>
<ref-count count="41"/>
<page-count count="14"/>
<word-count count="10539"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Machine Learning and Artificial Intelligence</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1 Introduction</title>
<p>Computer vision has made enormous progress in recent times. The development of advanced deep learning techniques for computer vision is motivated by the human visual system, which is one of the richest senses that we have. While computer vision aims to replicate the capabilities of the human visual system, it is important to acknowledge that achieving this goal is still a considerable distance away. Thus, deep learning algorithms that can achieve state-of-the-art performance are still required for computer vision problems such as face recognition (Schroff et al., <xref ref-type="bibr" rid="B35">2015</xref>), object detection (Ren et al., <xref ref-type="bibr" rid="B34">2015</xref>), and image classification (Krizhevsky et al., <xref ref-type="bibr" rid="B22">2012</xref>). These algorithms are well suited to analyze images and signals. Machine learning (ML) refers to a collection of expert systems that encompass the creation of expert computer systems capable of learning from their mistakes and improving their performance, as described by Novakovi&#x00107; et al. (<xref ref-type="bibr" rid="B32">2017</xref>). Traditional machine learning algorithms, in contrast to deep learning algorithms, which may learn representations directly from raw data, rely on examples expressly constructed by human specialists to represent specific problem areas. Applying machine learning (ML) techniques to computer vision and image analysis systems encounters several challenges, including handling noisy and imperfect images, addressing complex background removal, and accommodating variations in illumination. These factors pose difficulties in developing handcrafted representations suitable for supervised machine learning algorithms.</p>
<p>Deep learning (DL) algorithms provide a viable solution to the difficulties faced by traditional machine learning (ML) models that manually extract features from images. The inherent multi-layered processing architecture of deep learning algorithms empowers these algorithms to autonomously learn and derive representations, thereby circumventing the limitations associated with manual feature extraction.</p>
<p>This study focuses on utilizing DL to distinguish between chest X-ray images associated with pneumonia, COVID-19, and normal cases. In order to classify these chest X-ray images and return the probability of an instance falling into a particular class, a considerable number of training examples that comprise healthy images (with no pneumonia nor COVID-19) or infected (with pneumonia or COVID-19) are utilized.</p>
<p>Deep learning-based techniques have demonstrated remarkable performance in distinguishing chest X-ray images that are infected with pneumonia or COVID-19 and those that are normal. The development of deep learning-based techniques for disease detection in real-life scenarios may face challenges due to the potential drawback of overconfident diagnostics, despite achieving high classification results (Hern&#x000E1;ndez and L&#x000F3;pez, <xref ref-type="bibr" rid="B18">2020</xref>). The reason behind the overconfidence is that most state-of-the-art deep learning models often fail to provide information about uncertainties, such as epistemic uncertainty (uncertainty stemming from the model itself) and/or aleatoric uncertainty (uncertainty arising from the data). Therefore, important features and sensitive information can be lost if traditional machine learning outcomes that do not account for either aleatoric or epistemic uncertainty are trusted (Abdar et al., <xref ref-type="bibr" rid="B4">2021b</xref>). Furthermore, deep learning methods are not designed to account for the uncertainty in a model&#x00027;s predictions, nor are they able to identify the important features that are responsible for a specific prediction (Abdar et al., <xref ref-type="bibr" rid="B4">2021b</xref>). The &#x0201C;inner workings&#x0201D; of these deep learning methods are not understood as they are typically used as &#x0201C;black boxes.&#x0201D;</p>
<p>This study seeks to gain better insights about deep learning models by quantifying the uncertainty that is inherent in these models when they are applied in computer vision tasks. We employ the Bayesian Deep Learning (BDL) techniques as well as the Deep Neural Network (DNN) that is coupled with the widely used method, the Monte Carlo Dropout (MCD), and other dropout techniques to quantify uncertainty. The MCD approach is better than the computationally expensive techniques that include the Markov Chain Monte Carlo (MCMC) (Kendall and Gal, <xref ref-type="bibr" rid="B19">2017</xref>). Moreover, the use of Bayesian uncertainty quantification (UQ) techniques such as MCD produces well-calibrated and precise estimates of uncertainty (Hern&#x000E1;ndez and L&#x000F3;pez, <xref ref-type="bibr" rid="B18">2020</xref>). It is important to note that the Bayesian UQ techniques use a distinct approach that derives the posterior probabilities of the parameters (weights) as opposed to traditional deep learning estimates that generate point estimates.</p>
<p>This study presents an innovative approach to uncertainty quantification in deep learning models used in medical image analysis. By focusing on multi-class classification of chest X-ray images, it aims to improve the interpretability and reliability of these models. Therefore, to advance UQ using Bayesian methods this study specifically makes the following significant contributions:</p>
<list list-type="simple">
<list-item><p>(i) Adapt the innovative concept of a binary uncertainty confusion matrix, along with its novel performance metrics proposed by Asgharnezhad et al. (<xref ref-type="bibr" rid="B7">2022</xref>), for objective uncertainty quantification. This adaptation extends the binary uncertainty confusion matrix with its novel performance metrics for multi-class tasks, enhancing the evaluation of model performance and reliability across various classes.</p></list-item>
<list-item><p>(ii) Provide valuable insights into the performance and reliability of different uncertainty quantification models across various classes.</p></list-item>
<list-item><p>(iii) Provides a comprehensive evaluation of uncertainty quantification techniques that include Bayesian neural networks (BNN), Monte Carlo dropout, Ensemble Bayesian Neural Network (EBNN), Ensemble dropout, and Ensemble Monte-Carlo (EMC) dropout. The aim is to compare the effectiveness of these methods in capturing and quantifying uncertainty in the predictions of multi-class classification models for chest X-ray images. This will give valuable insights into the performance and reliability of different uncertainty quantification techniques across various classes. Such information would assist practitioners when they are selecting appropriate uncertainty quantification methods for their neural network models.</p></list-item>
<list-item><p>(iv) Highlights the potential of uncertainty-aware models to enhance the reliability and interpretability of predictions in critical medical image analysis. This will in turn help to improve the safety and efficacy of AI-driven healthcare solutions, particularly in the classification of COVID-19 cases.</p></list-item>
</list></sec>
<sec id="s2">
<title>2 Related works</title>
<p>We note that machine learning methods have achieved great success in solving many real-life problems, but they have not been able to provide more information about the reliability of their predictions in most cases. This challenge has necessitated the use of the promising Bayesian neural networks (BNNs) which model prior distributions on the model parameters to quantify uncertainty (Alarab and Prakoonwit, <xref ref-type="bibr" rid="B5">2023</xref>). The authors indicate that assigning a prior distribution over a model&#x00027;s parameters and then marginalizing the parameters creates a predictive distribution that uses Bayesian averaging. With this framework, prior distributions are assigned to the weights of the model, and thereafter, Bayes&#x00027; theorem is used to determine the posterior distributions which are approximated because they cannot be evaluated analytically. Bayesian UQ techniques have been used in several image classification tasks (Harakeh et al., <xref ref-type="bibr" rid="B15">2020</xref>; Kwon et al., <xref ref-type="bibr" rid="B23">2020</xref>; Bessai-Mechmache et al., <xref ref-type="bibr" rid="B8">2022</xref>). However, Monte Carlo (MC) sampling has emerged as an effective technique that can be used to estimate the posterior distribution and thereby quantify uncertainty (Neal, <xref ref-type="bibr" rid="B31">2012</xref>). The use of MC sampling has a limitation in that when it is deployed in deep architectures, it can be slow and computationally expensive. Gal and Ghahramani (<xref ref-type="bibr" rid="B12">2016</xref>) state that this limitation can be addressed by employing efficient techniques such as MC-dropout, which has been developed as a regularization technique to quantify uncertainty and avoid overfitting. Furthermore, the authors highlight that applying the dropout regularization technique after each hidden layer allows the MC-dropout technique to evaluate uncertainty in neural networks. Moreover, the MC-dropout technique is employed during the testing phase to generate uncertainty-aware estimates (Alarab and Prakoonwit, <xref ref-type="bibr" rid="B5">2023</xref>). Alarab and Prakoonwit (<xref ref-type="bibr" rid="B5">2023</xref>) demonstrated that MC-dropout was more effective in quantifying uncertainty when applied to the Elliptic (Bitcoin-derived) dataset compared to other techniques. Lemay et al. (<xref ref-type="bibr" rid="B26">2022</xref>) indicated that the predictions derived from the Monte Carlo dropout were better calibrated when it was employed on four medical image classification tasks that used DenseNet and ResNet architectures. According to the authors, the output probabilities produced were more accurate and reflected the likelihood of correct classification. Mobiny et al. (<xref ref-type="bibr" rid="B29">2021</xref>) proposed the Monte Carlo DropConnect (MC-DropConnect) technique that incorporated Bayesian Inference in deep neural networks (DNNs). In this approach, the weights/parameters were assumed to follow a Bernoulli distribution. The empirical results showed that the predictive accuracy of MC-DropConnect significantly outperformed other state-of-the-art techniques. The multi-class classification based Monte Carlo-based adversarial attack (MC-AA) method on the Cora dataset was introduced by Alarab and Prakoonwit (<xref ref-type="bibr" rid="B5">2023</xref>). The authors compared MC-AA with other recent uncertainty models such as, convolutional neural networks (CNN) and LeConv. The best results for modeling uncertainty were obtained using LeConv (AUC = 0.889) deployed on the Cora datasets and CNN (AUC = 0.98) deployed on the MNIST datasets.</p>
<p>Thiagarajan et al. (<xref ref-type="bibr" rid="B39">2021</xref>) classified histopathological images using a hybrid Bayesian-convolutional neural network (Bayesian-CNN). When applied to a large portion of the test dataset, the Bayesian-CNN used the quantified uncertainties to significantly enhance the performance of the CNN. Abdar et al. (<xref ref-type="bibr" rid="B4">2021b</xref>) employed techniques such as EMC dropout and deep ensemble for uncertainty quantification in skin cancer image classification. Abdar et al. (<xref ref-type="bibr" rid="B3">2023</xref>) employed the effective Ensemble MC Dropout (EMCD) technique, achieving a prediction accuracy of 99.08% for the computed tomography (CT) scan dataset and 96.35% for the chest X-ray dataset. The authors also indicate that EMCD was used not only to detect COVID-19 but also to quantify uncertainty using chest X-ray images. McDermott and Wikle (<xref ref-type="bibr" rid="B28">2019</xref>) employed deep ensemble (DE) to quantify uncertainty. DE uses an ensemble model that comprises several neural networks. The authors used a DE echo state network model for spatio-temporal forecasting while also evaluating and quantifying uncertainty. DE methods have been found to outperform the Bayesian neural networks in uncertainty quantification, yielding more accurate UQ estimates (Alarab and Prakoonwit, <xref ref-type="bibr" rid="B5">2023</xref>). However, Abdar et al. (<xref ref-type="bibr" rid="B4">2021b</xref>) noted that DE methods tend to be more computationally expensive.</p>
<p>Narl&#x00131; (<xref ref-type="bibr" rid="B30">2021</xref>) investigated the impact of applying local histogram equalization (LHE) on the performance of deep learning architectures for COVID-19 classification using chest X-ray images. The effect of the disk factor in LHE on transfer learning was examined by comparing the results obtained with and without LHE preprocessing. The dataset used by Narl&#x00131; (<xref ref-type="bibr" rid="B30">2021</xref>) consisted of chest X-ray images from three classes: COVID-19, Pneumonia, and Normal. Each chest X-ray image was segmented into two parts: the right lung lobe and the left lung lobe. The classification performance of transfer learning was evaluated by applying different disk values for LHE and the experiments were conducted using various pre-trained DL architectures, including VGG16, AlexNet, and Inception models. Altan and Narl&#x00131; (<xref ref-type="bibr" rid="B6">2022</xref>) employed simplistic CNN architectures with enhanced medical images using contrast limited adaptive histogram equalization (CLAHE) to classify of healthy chest X-rays (CXRs) and those with COVID-19. The study utilized a large-scale dataset of 3,615 COVID-19 cases, demonstrating the clinical applicability of the proposed method, which enhanced feature learning and preprocessing stages to facilitate early diagnosis of COVID-19. The study achieved an impressive accuracy rate of 95.878% for binary classification of COVID-19 and healthy cases using the VGG16 model with optimal CLAHE parameters.</p>
<p>Yang and Fevens (<xref ref-type="bibr" rid="B40">2021</xref>) conducted experiments using two medical imaging datasets: a SARS-CoV2 CT dataset and the BreaKHis dataset (Spanhol et al., <xref ref-type="bibr" rid="B36">2015</xref>). The study highlighted the ability to identify uncertain samples and categories, demonstrating that by excluding a percentage of the most uncertain inputs, the accuracy of the model&#x00027;s predictions could be significantly improved. This approach ensured better clinical outcomes by providing a more reliable framework for the application of DNNs in medical diagnostics. Moreover, the findings underscored the potential of UQ methodologies to enhance the practical utility of DNNs in healthcare, ultimately supporting better patient management and treatment strategies.</p>
<p>Machine learning models have typically been evaluated in biomedical research using measures such as sensitivity, specificity, precision, accuracy, and Matthews correlation coefficient (MCC). Rabiei et al. (<xref ref-type="bibr" rid="B33">2022</xref>) used sensitivity, specificity, and accuracy metrics to evaluate machine learning models for predicting breast cancer recurrence, demonstrating their effectiveness in accurately identifying true positive and true negative cases, which is crucial for clinical decision-making. Similarly, Helaly et al. (<xref ref-type="bibr" rid="B17">2022</xref>) used deep learning models to detect early Alzheimer&#x00027;s disease and measured sensitivity, specificity, precision, and AUC-ROC. The inclusion of AUC-ROC enabled a detailed assessment of the models&#x00027; discriminative performance across various threshold settings, offering a robust evaluation framework. Hajian-Tilaki (<xref ref-type="bibr" rid="B14">2013</xref>) discussed the application of sensitivity, specificity, precision, and accuracy within the context of ROC curve analysis for medical diagnostic test evaluation, highlighting their importance in capturing trade-offs between true positive and false positive rates and offering a comprehensive tool for evaluating diagnostic accuracy. However, traditional evaluation metrics do not account for uncertainty in models. It is important to derive and use performance metrics that quantify uncertainty, as understanding model uncertainty can significantly enhance the reliability and interpretability of predictions in real-world applications. Calculating uncertainty-aware evaluation metrics is crucial as it can boost confidence and trust in machine learning models. Several deep learning models struggle to provide necessary uncertainty-aware predictions, as they often fail to capture inherent uncertainties effectively. Consequently, these models lack the required uncertainty-aware reasoning when deployed in computer vision tasks. To address this limitation, it is essential to explore methodologies that effectively quantify uncertainty within deep learning models. Therefore, this study investigates the application of Bayesian methods to determine if they offer improved uncertainty quantification in deep learning techniques for multi-class classification of chest X-ray images. Uncertainty quantification (UQ) in multi-class classification has not received much attention, as UQ research primarily focuses on regression and binary classification tasks, overlooking the unique techniques and challenges specific to multi-class classification.</p>
<p>The remainder of the paper is organized as follows. Related works are presented in Section 2. The proposed methodology is described in Section 3.1. The experiments are introduced in Section 3.8. Section 4 discusses the results and Section 6 concludes the paper.</p></sec>
<sec sec-type="materials and methods" id="s3">
<title>3 Materials and methods</title>
<sec>
<title>3.1 Dataset</title>
<p>This paper aimed to achieve significant advancements in uncertainty quantification through multi-class classification using a publicly available chest X-ray image dataset, accessible at <ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/datasets/prashant268/chest-xray-covid19-pneumonia">https://www.kaggle.com/datasets/prashant268/chest-xray-covid19-</ext-link><ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/datasets/prashant268/chest-xray-covid19-pneumonia">pneumonia</ext-link>. The dataset comprised 619 chest X-ray images of patients with COVID-19, 526 chest X-ray images of patients with pneumonia, along with 732 images depicting healthy/normal lungs. We noted that there was a moderate imbalance in the dataset and we applied the resampling technique to achieve balance by oversampling the minority class. For this, we used the <italic>imblearn</italic> library in Python. Specifically, the <italic>RandomOverSampler</italic> was employed to oversample the minority classes to obtain the following class distributions: Class 1 (COVID-19 images) 732, Class 0 (Normal images) 732, and Class 2 (Pneumonia images) 732.</p>
</sec>
<sec>
<title>3.2 Multi-classification</title>
<p>In real-life scenarios, numerous classification problems involve the need to distinguish between more than two classes. Examples of such problems include face recognition, hand gesture recognition, general object detection, speech recognition, and many others. These applications require algorithms and techniques that can effectively classify data into multiple distinct categories, enabling various tasks and applications in fields such as computer vision, natural language processing, and human-computer interaction.</p>
<p>In this paper, we adopt a methodology for addressing multi-class classification tasks referred to as the &#x0201C;One vs. All&#x0201D; (OvA) strategy. Under this approach, we train multiple linear classifiers <italic>C</italic>, where <italic>C</italic>&#x0003E;2. <italic>C</italic> denotes the number of classes within the classification task. Each classifier is tasked with distinguishing one class from the remaining classes in the dataset. Therefore, this method entails the training of <italic>C</italic> binary classifiers, each dedicated to distinguishing a single class from the rest. During training, for each class, a binary target variable is created. We assign a positive label to instances belonging to the class being considered and a negative label to instances belonging to other classes. Then, we train a linear classifier using this binary target variable and the corresponding features. At prediction time, we apply all <italic>C</italic> classifiers to the input data, and each classifier produces a score or probability indicating the likelihood of the input belonging to its respective class. The class with the highest score is assigned as the predicted class for the input. This technique effectively reduces the multi-class problem into a series of binary classification problems. Each classifier learns to discriminate one class from the rest, allowing us to handle problems with more than two classes using linear classifiers.</p>
<p>In a dataset with multiple classes, denoted as <inline-formula><mml:math id="M1"><mml:msubsup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>N</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula>, there exist <italic>C</italic> distinct classes of data. Similar to the two-class scenario, we have the flexibility to use any <italic>C</italic> distinct labels for distinguishing between these classes. For convenience&#x00027;s sake the following label values <italic>y</italic><sub><italic>n</italic></sub> &#x02208; {0, 1, ..., <italic>C</italic>&#x02212;1} are assigned.</p>
</sec>
<sec>
<title>3.3 Deep learning methods</title>
<sec>
<title>3.3.1 Bayesian neural networks</title>
<p>In this section, we provide a brief overview of Bayesian neural networks (BNNs). BNNs are robust against overfitting and capable of handling high-dimensional inputs, such as images (Abdar et al., <xref ref-type="bibr" rid="B4">2021b</xref>).</p>
<p>A neural network is considered as a probabilistic model when it is able to account for and quantify uncertainty within its predictions. A probabilistic neural network can be represented by <italic>P</italic>(<italic>y</italic> &#x02223; <italic>x</italic>, &#x003C9;), where <italic>X</italic> &#x0003D; {<italic>x</italic><sub>1</sub>, ..., <italic>x</italic><sub><italic>n</italic></sub>} are the training samples (input data), and <italic>Y</italic> &#x0003D; {<italic>y</italic><sub>1</sub>, ..., <italic>y</italic><sub><italic>n</italic></sub>} represents the set of all possible outcomes (output data). In addition, we consider <bold>&#x003C9;</bold>, to be the set of parameters that are learnt through a complete Bayesian approach. Furthermore, using a training dataset <inline-formula><mml:math id="M2"><mml:mi>D</mml:mi><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mi>X</mml:mi><mml:mo>,</mml:mo><mml:mi>Y</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula>, the posterior distribution, <italic>P</italic>(&#x003C9; &#x02223; <italic>D</italic>) is evaluated through Bayesian inferenece that employs marginalization over all the values of &#x003C9; (Khairnar et al., <xref ref-type="bibr" rid="B20">2020</xref>). Therefore, to estimate <italic>P</italic>(&#x003C9; &#x02223; <italic>D</italic>) the Bayes theorem is employed as follows:</p>
<disp-formula id="E1"><label>(1)</label><mml:math id="M3"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x003C9;</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mi>D</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>D</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mi>&#x003C9;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x003C9;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where the likelihood of training the data <italic>D</italic> given the parameters &#x003C9; is given by <italic>P</italic>(<italic>D</italic> &#x02223; &#x003C9;) and, <inline-formula><mml:math id="M4"><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>D</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mi>&#x003C9;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mo>&#x0220F;</mml:mo></mml:mrow><mml:mrow><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:msubsup><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>&#x00177;</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msup><mml:mo>&#x02223;</mml:mo><mml:msup><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:mi>&#x003C9;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> is the product of the likelihoods, assuming that each training data point is independently and identically distributed (<italic>iid</italic>). (<inline-formula><mml:math id="M5"><mml:msup><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mrow><mml:mi>&#x00177;</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula>) represents, respectively, the input data and associated labels. The term <italic>P</italic>(&#x003C9;) denotes the distribution of weights before observing the data, and <inline-formula><mml:math id="M6"><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mo>&#x0222B;</mml:mo></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mtext>&#x003A9;</mml:mtext></mml:mrow><mml:mrow><mml:mi>&#x003C9;</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>D</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mi>&#x003C9;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x003C9;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mi>d</mml:mi><mml:mi>&#x003C9;</mml:mi></mml:math></inline-formula> denotes the marginalization over the weight distribution. <xref ref-type="disp-formula" rid="E1">Equation 1</xref> shows how well the parameters &#x003C9; explain the training data that was observed.</p>

<p>Once <italic>P</italic>(&#x003C9; &#x02223; <italic>D</italic>) has been determined, the expected values of the predictive distributions can be used to obtain predictions for test data. Thus, for an unknown label &#x00177; of a data observation <inline-formula><mml:math id="M7"><mml:mover accent="true"><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula>, the predictive distribution can be expressed as:</p>
<disp-formula id="E2"><label>(2)</label><mml:math id="M8"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x00177;</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mover accent="true"><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mo class="qopname">&#x1D53C;</mml:mo></mml:mrow><mml:mrow><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x003C9;</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mi>D</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo></mml:mrow><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x00177;</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mover accent="true"><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo class="qopname">^</mml:mo></mml:mover><mml:mo>,</mml:mo><mml:mi>&#x003C9;</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:msub><mml:mrow><mml:mo class="qopname">&#x0222B;</mml:mo></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mtext>&#x003A9;</mml:mtext></mml:mrow><mml:mrow><mml:mi>&#x003C9;</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:mstyle><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x00177;</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mover accent="true"><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo class="qopname">^</mml:mo></mml:mover><mml:mo>,</mml:mo><mml:mi>&#x003C9;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x003C9;</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mi>D</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mi>d</mml:mi><mml:mi>&#x003C9;</mml:mi><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>The Bayes approach aims to optimize the parameters, &#x003C9;, by maximizing the likelihood, <italic>P</italic>(<italic>y</italic> &#x02223; <italic>x</italic>, &#x003C9;). In this study, our task involves multi-classification of images, so we utilize the softmax (Liu et al., <xref ref-type="bibr" rid="B27">2016</xref>) likelihood to compute the predictive probabilities as follows:</p>
<disp-formula id="E3"><label>(3)</label><mml:math id="M9"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mi>k</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi>&#x003C9;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>e</mml:mi><mml:mi>x</mml:mi><mml:mi>p</mml:mi><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003C9;</mml:mi></mml:mrow></mml:msubsup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:msubsup></mml:mstyle><mml:mi>e</mml:mi><mml:mi>x</mml:mi><mml:mi>p</mml:mi><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003C9;</mml:mi></mml:mrow></mml:msubsup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>For classification purposes, the model&#x00027;s output can be obtained using a softmax function this allows sampling from the probability vector: <inline-formula><mml:math id="M10"><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mi>k</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi>&#x003C9;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">softmax</mml:mtext></mml:mstyle><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x00175;</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>. The resulting model output is then mapped to a set of class labels for multi-classification.</p>
<p>The computation of the posterior predictive probabilities, <italic>P</italic>(<italic>y</italic> &#x0003D; <italic>k</italic> &#x02223; <italic>x</italic>, &#x003C9;), as depicted in <xref ref-type="disp-formula" rid="E4">Equation 4</xref>, presents a significant challenge as it cannot be evaluated analytically. This is because it requires explicit modeling of uncertainties and can be computationally intensive, especially when handling complex data distributions or large datasets. On the other hand, BNNs naturally handle uncertainty through their probabilistic weight sampling mechanism.</p>
<p>To quantify uncertainty using BNN, we employ the architecture shown in <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>Architecture of the Bayesian Neural Network (BNN).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-07-1410841-g0001.tif"/>
</fig>


<p>This architecture uses <monospace>tfpl.DenseFlipout</monospace> layers (<ext-link ext-link-type="uri" xlink:href="https://www.tensorflow.org/probability/api_docs/python/tfp/layers/DenseFlipout">https://www.tensorflow.org/probability/api_docs/python/tfp/layers/DenseFlipout</ext-link>), which are components of TensorFlow Probability (TFP) designed to integrate uncertainty into neural network predictions. These layers employ Bayesian inference by introducing stochastic weights during training, enabling the model to quantify uncertainty in its predictions. This Bayesian approach is crucial for enhancing the reliability of the neural network&#x00027;s outputs, particularly in applications where understanding the confidence of predictions is essential. Incorporating <monospace>tfpl.DenseFlipout</monospace> layers allows the model to effectively account for uncertainty, resulting in more reliable and insightful predictions (Dillon et al., <xref ref-type="bibr" rid="B11">2017</xref>; Abdar et al., <xref ref-type="bibr" rid="B2">2021a</xref>).</p></sec>
<sec>
<title>3.3.2 Deep neural networks</title>
<p>To quantify uncertainty using DNN, we employ a practical approach to quantify uncertainty by leveraging various dropout techniques. We adapt a deep neural network (DNN) (<xref ref-type="fig" rid="F2">Figure 2</xref>) to accommodate dropout techniques such as Monte-Carlo dropout.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>Architecture of Deep Neural Network (DNN) that is modified to quantify uncertainty using different dropout techniques.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-07-1410841-g0002.tif"/>
</fig>


<p>Dropout techniques provide a means for estimating uncertainty in model predictions (Gal and Ghahramani, <xref ref-type="bibr" rid="B12">2016</xref>; Kendall and Gal, <xref ref-type="bibr" rid="B19">2017</xref>). Specifically, during inference or testing, dropout is applied stochastically to the network, resulting in multiple predictions for the same input. By averaging these predictions, we can obtain an estimate of the model&#x00027;s uncertainty (Srivastava et al., <xref ref-type="bibr" rid="B37">2014</xref>; Gal and Ghahramani, <xref ref-type="bibr" rid="B12">2016</xref>).</p>
<p>We explore different dropout techniques, such as Monte Carlo dropout, Ensemble dropout, and Expected Model Change dropout applied to DNNs, to comprehensively assess and quantify uncertainty in our multi-class classification task. In addition, we compare the performance of the modified DNN with different dropout techniques to that of the BNN.</p>
</sec>
</sec>
<sec>
<title>3.4 Uncertainty quantification using Monte-Carlo (MC) dropout</title>
<p>We use the MC dropout technique as a regularization method for computing predictions during both the training and inference phases. By averaging multiple predictions, we aim to improve accuracy. As discussed earlier, estimating the posterior distribution poses computational challenges. To address this issue, we leverage MC sampling methods (Asgharnezhad et al., <xref ref-type="bibr" rid="B7">2022</xref>). These methods involve performing multiple stochastic forward passes with dropout during testing, which generates MC samples from the posterior distribution. This approach reduces the computational burden of approximating the output posterior distribution.</p>
<p>In practice, the model&#x00027;s predictive mean resembles the expectation of &#x00177; (the predicted output). As a result, the final prediction for a test sample is obtained by using the predictive mean, denoted as &#x003BC;<sub>pred</sub>, computed over the MC iterations (Ghoshal and Tucker, <xref ref-type="bibr" rid="B13">2020</xref>).</p>
<disp-formula id="E4"><label>(4)</label><mml:math id="M11"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>&#x003BC;</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mi>k</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi>&#x003C9;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where the test input is denoted by <italic>x</italic>. The prediction probability produced from the softmax output is denoted by <italic>P</italic>(<italic>y</italic> &#x0003D; <italic>k</italic> &#x02223; <italic>x</italic>, &#x003C9;). Additionally, &#x003C9; denotes the model&#x00027;s parameters for the <italic>i</italic>th forward pass and the Monte Carlo (MC) iterations or forward passes are represented by <italic>T</italic>.</p>
<p>The output prediction for each test sample <inline-formula><mml:math id="M12"><mml:mover accent="true"><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula> is determined by selecting the class with the highest predictive mean, while the variance provides a measure of predictive uncertainty. Validating the model under epistemic uncertainty has traditionally been a complex task. In order to quantify epistemic uncertainty, Ghoshal and Tucker (<xref ref-type="bibr" rid="B13">2020</xref>) suggests utilizing predictive entropy (PE):</p>
<disp-formula id="E5"><label>(5)</label><mml:math id="M13"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>P</mml:mi><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:munder></mml:mstyle><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mi>k</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi>&#x003C9;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mtext>&#x000A0;</mml:mtext><mml:mi>l</mml:mi><mml:mi>o</mml:mi><mml:mi>g</mml:mi><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mi>k</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi>&#x003C9;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>c</italic> is the number of classes. <xref ref-type="disp-formula" rid="E5">Equation 5</xref> provides a measure of the confidence of model in making predictions in classification tasks. Additionally, <bold>PE</bold> evaluates how much a prediction correlates to each individual class, or how much a prediction differs from its true label. A model is more confident in making predictions when the value of <italic>PE</italic> gets smaller.</p>
</sec>
<sec>
<title>3.5 Uncertainty quantification using Ensemble Monte-Carlo (EMC) dropout</title>
<p>Uncertainty quantification using Ensemble Monte Carlo (EMC) dropout entails employing a technique that combines ensemble methods with Monte-Carlo dropout. It utilizes an ensemble of different DNN architectures. When using the Monte Carlo dropout algorithm, multiple stochastic forward passes are performed to evaluate each network in the ensemble. The resulting posterior probabilities are averaged to estimate a single Gaussian distribution. The calculation of the predictive entropy (PE) metric is identical to that of the ensemble approach, with the only significant difference being the methodology that is used in determining the following posterior distribution;</p>
<disp-formula id="E6"><label>(6)</label><mml:math id="M14"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mover accent="true"><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>y</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mover accent="true"><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x00177;</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mover accent="true"><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover><mml:mo>,</mml:mo><mml:mover accent="true"><mml:mrow><mml:mi>&#x003C9;</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>and the predictive entropy (PE) metric is then expressed as follows</p>
<disp-formula id="E7"><label>(7)</label><mml:math id="M15"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>P</mml:mi><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:munder></mml:mstyle><mml:mover accent="true"><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>y</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mtext>&#x000A0;</mml:mtext><mml:mi>l</mml:mi><mml:mi>o</mml:mi><mml:mi>g</mml:mi><mml:mover accent="true"><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>y</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>C</italic> is the number of classes and &#x003C9; are the model&#x00027;s parameters.</p>
</sec>
<sec>
<title>3.6 Uncertainty quantification using Ensemble Bayesian Neural Network (EBNN)</title>
<p>The Ensemble Bayesian Network (EBNN) is a collection of networks that collaborate to perform a particular task. Each network generates predictive probabilities, that are veraged to obtain the final predictive probability. The predictive entropy (PE) (<xref ref-type="disp-formula" rid="E9">Equation 9</xref>) is again used to quantify the uncertainty.</p>
<disp-formula id="E8"><label>(8)</label><mml:math id="M16"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>&#x003BC;</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mi>k</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi>&#x003C9;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="E9"><label>(9)</label><mml:math id="M17"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>P</mml:mi><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:munder></mml:mstyle><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mi>k</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi>&#x003C9;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mtext>&#x000A0;</mml:mtext><mml:mi>l</mml:mi><mml:mi>o</mml:mi><mml:mi>g</mml:mi><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mi>k</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi>&#x003C9;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>&#x003B8;</italic><sub><italic>i</italic></sub> is set of the <italic>i</italic>th network element&#x00027;s parameters, and <italic>C</italic> represents the number of classes. A smaller value of <italic>PE</italic> indicates similarity of the predictions from all individual networks.</p>
</sec>
<sec>
<title>3.7 Model training</title>
<p>We begin by extracting essential features from the CXR images by pretraining a DenseNet121 model using normal chest X-ray images and subsequently use the extracted features to evaluate whether DNNs have the potential to classify chest X-ray images. This approach, adapted from Asgharnezhad et al. (<xref ref-type="bibr" rid="B7">2022</xref>) is differnt from the conventional practice of training deep models from scratch in a transfer learning scenario. Instead, the approach fine-tunes the weights of pre-existing deep neural networks that are pretrained on natural image datasets such as ImageNet, which are specifically tailored for medical image analysis. Next, we utilize the methodology suggested by Alarab and Prakoonwit (<xref ref-type="bibr" rid="B5">2023</xref>) to perform One vs. All (OvA) classification of chest X-ray images of pneumonia, COVID-19 and normal images. Here, we transform the multi-class problem classification task into multiple binary classification problems by assigning temporary labels to the dataset, distinguishing each class from the rest of the data. This step involves training multiple two-class classifiers, each focused on discerning one class from the remaining <italic>C</italic>&#x02212;1 classes. Thereafter, the Deep Neural Network (DNN) model is incorporated into our experimental procedure.</p>
<p>To achieve optimal performance during training, it is important to employ optimal hyper-parameters of the deep learning algorithms, particularly the learning rate (lr). According to Zhang et al. (<xref ref-type="bibr" rid="B41">2020</xref>), a lower lr enhances the reliability of the training phase but can prolong the optimization process due to smaller updates in the loss function. Conversely, a higher lr risks non-convergence or divergence, as it may cause the optimization phase to skip over the optimal value, worsening the loss function. This can lead to unproductive oscillations and poor generalization, as the training weights fail to stabilize at an optimal value. Following the recommendations in Kingma and Ba (<xref ref-type="bibr" rid="B21">2014</xref>), Zhang et al. (<xref ref-type="bibr" rid="B41">2020</xref>), Asgharnezhad et al. (<xref ref-type="bibr" rid="B7">2022</xref>), and Sun et al. (<xref ref-type="bibr" rid="B38">2024</xref>), who obtained the best values of all evaluation metric when we set the learning rate is set to 0.001, we used a default learning rate of 0.001 for the Adam algorithm, which is deemed effective for stochastic optimization.</p>
<p>The architecture of the deep neural network (DNN) used in this study is defined by the <monospace>create_model()</monospace> function, which generates the DNN model using TensorFlow (Abadi et al., <xref ref-type="bibr" rid="B1">2015</xref>). The model is designed to include three hidden layers with 128, 64, and 32 neurons, respectively, and utilizes <italic>ReLU</italic> activation functions for non-linearity. Dropout layers with a dropout rate of 0.5 are included after each hidden layer to prevent overfitting. The output layer produces probabilistic predictions using a sigmoid activation function. The model is compiled with a binary cross-entropy loss function and optimized using the Adam optimizer (Kingma and Ba, <xref ref-type="bibr" rid="B21">2014</xref>) with a learning rate of 0.001. This architecture is tailored for binary classification tasks and enables robust model performance through dropout-based regularization.</p>
<p><xref ref-type="table" rid="T1">Table 1</xref> presents the architecture of the DNN and the corresponding number of parameters.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Architecture of the Deep Neural Network (DNN) and number of parameters.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Layer</bold></th>
<th valign="top" align="center"><bold>Number of parameters</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Input layer (Dense)</td>
<td valign="top" align="center">19,328</td>
</tr> <tr>
<td valign="top" align="left">Dropout layer 1</td>
<td valign="top" align="center">0</td>
</tr> <tr>
<td valign="top" align="left">Hidden layer 1</td>
<td valign="top" align="center">8,256</td>
</tr> <tr>
<td valign="top" align="left">Dropout layer 2</td>
<td valign="top" align="center">0</td>
</tr> <tr>
<td valign="top" align="left">Hidden layer 2</td>
<td valign="top" align="center">2,080</td>
</tr> <tr>
<td valign="top" align="left">Dropout layer 3</td>
<td valign="top" align="center">0</td>
</tr> <tr>
<td valign="top" align="left">Output layer (dense)</td>
<td valign="top" align="center">33</td>
</tr>
<tr>
<td valign="top" align="left">Total</td>
<td valign="top" align="center">29, 697</td>
</tr></tbody>
</table>
</table-wrap>


<p>The DNN architecture iterates over each class label in the dataset and trains separate models for each binary classification task (e.g., Class 0 vs. All, Class 1 vs. All, etc.). For each class, the DNN model is trained using the training data and evaluated using the test data. The evaluation metrics include accuracy, F1 score, precision, recall, and ROC AUC score, which are computed and stored. This approach ensures a comprehensive assessment of the model&#x00027;s performance across different classification tasks.</p>
<p>The training parameters for the DNN are detailed in <xref ref-type="table" rid="T2">Table 2</xref>. The model was trained over 15 epochs with a batch size of 64. In addition, five different models were trained for Ensemble Monte Carlo (EMC) and Ensemble Bayesian neural network (EBNN) to enhance model robustness.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Training parameters for DNN.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Parameter</bold></th>
<th valign="top" align="center"><bold>Value</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Learning rate</td>
<td valign="top" align="center">0.001</td>
</tr> <tr>
<td valign="top" align="left">Epochs</td>
<td valign="top" align="center">15</td>
</tr> <tr>
<td valign="top" align="left">Batch size</td>
<td valign="top" align="center">64</td>
</tr>
<tr>
<td valign="top" align="left">Number of models</td>
<td valign="top" align="center">5</td>
</tr></tbody>
</table>
</table-wrap>


<p>After training and during the inference stage of DNN, we use different uncertainty quantification techniques to assess model performance independently. Monte Carlo Dropout (MC Dropout) involves maintaining dropout layers active during inference, enabling stochastic sampling of predictions by randomly deactivating units and their connections in each forward pass. This allows the model to generate multiple predictions per input, thereby, capturing the variance in the outcomes to estimate uncertainty. On the other hand, Ensemble Bayesian Neural Network (EBNN) averages predictions across multiple stochastic passes, thereby smoothing out prediction variability and providing a more stable estimate of uncertainty. In addition, Ensemble Monte Carlo Dropout (EMC) refines uncertainty estimation by aggregating predictions from an ensemble of models, each trained with dropout, to produce a consensus view of prediction uncertainty. These techniques individually yield probabilistic distributions that quantify uncertainty, offering more insights into the confidence level of the model&#x00027;s outputs. Such detailed uncertainty quantification is crucial in applications like medical diagnostics, where understanding prediction confidence supports informed decision-making.</p>
<p>Model training was conducted using Python libraries such as NumPy, PyTorch, and pandas. Each experiment was repeated 100 times with different random seeds to ensure repeatability. <xref ref-type="table" rid="T3">Table 3</xref> shows the training times for DNN with different uncertainty quantification techniques, trained for 15 epochs with a batch size of 64.</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>The table also shows the training times for DNN trained using Monte Carlo (MC) dropout, as well as the times for DNN trained with five different models of Ensemble Monte Carlo (EMC) and Ensemble Bayesian Neural Network (EBNN) dropout techniques.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center"><bold>Trainable parameters</bold></th>
<th valign="top" align="center"><bold>Number of epochs</bold></th>
<th valign="top" align="center"><bold>CPU elapsed time (min)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">DNN &#x0002B; MC dropout</td>
<td valign="top" align="center">29,697</td>
<td valign="top" align="center">15</td>
<td valign="top" align="center">13.618</td>
</tr> <tr>
<td valign="top" align="left">DNN &#x0002B; EMC dropout</td>
<td valign="top" align="center">148,485</td>
<td valign="top" align="center">15</td>
<td valign="top" align="center">20.477</td>
</tr>
<tr>
<td valign="top" align="left">DNN &#x0002B; EBNN dropout</td>
<td valign="top" align="center">148,485</td>
<td valign="top" align="center">15</td>
<td valign="top" align="center">20.477</td>
</tr></tbody>
</table>
</table-wrap>


<p>The experiments were executed on Google Colab, utilizing its default hardware settings, which include a GPU (1xTesla K80, compute 3.7, 2496 CUDA cores, 12GB GDDR5 VRAM).</p>
<p>The performance of DNN (with diferrent dropout techniques) was compared using model evaluation metrics described in Section 3.8.</p>
</sec>
<sec>
<title>3.8 Model evaluation metrics</title>
<sec>
<title>3.8.1 Traditional evaluation metrics</title>
<p>To assess the performance of DNN without the dropout techniques, we adopted several traditional evaluation metrics namely; sensitivity, specificity, accuracy, that have been used in previous biomedical papers (Hajian-Tilaki, <xref ref-type="bibr" rid="B14">2013</xref>; Boughorbel et al., <xref ref-type="bibr" rid="B9">2017</xref>; He et al., <xref ref-type="bibr" rid="B16">2021</xref>; Helaly et al., <xref ref-type="bibr" rid="B17">2022</xref>; Le and Xu, <xref ref-type="bibr" rid="B25">2023</xref>; Nguyen Quoc Khanh Le, <xref ref-type="bibr" rid="B24">2023</xref>). These metrics are defined by the following equations. We used the number of true positives (TP), true negatives (TN), false positives (FP), and false negatives (FN) to compute these metrics.</p>
<disp-formula id="E10"><label>(10)</label><mml:math id="M18"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">Sensitivity</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="E11"><label>(11)</label><mml:math id="M19"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">Specificity</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>N</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="E12"><label>(12)</label><mml:math id="M20"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">Accuracy</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="E13"><label>(13)</label><mml:math id="M21"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">Precision</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where TP represents the true positives, TN represents the true negatives, and FP and FN represent the false positives and false negatives, respectively. Boughorbel et al. (<xref ref-type="bibr" rid="B9">2017</xref>) states that the MCC lies is in the interval [&#x02212;1, 1], with 1 indicating perfect classification and -1 indicating perfect misclassification.</p></sec>
<sec>
<title>3.8.2 Performance metrics for the predictive uncertainty estimations</title>
<p>The dropout techniques, including MC dropout, EMC, and EBNN, are incorporated into deep neural network (DNN) for uncertainty quantification. Subsequently, we utilize the uncertainty confusion matrix developed by Asgharnezhad et al. (<xref ref-type="bibr" rid="B7">2022</xref>), which employs a concept akin to a confusion matrix, to carry out the predictive uncertainty evaluation. In this study, we employ the &#x0201C;One 2 vs. All&#x0201D; multiclassification strategy, which reduces to binary classification for each class. The performance indicators for predictive uncertainty estimations are quantified by the uncertainty confusion matrix as shown in <xref ref-type="table" rid="T4">Table 4</xref>.</p>




<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>Confusion matrix for calculating uncertainty quantification metrics (Asgharnezhad et al., <xref ref-type="bibr" rid="B7">2022</xref>).</p></caption>
<table frame="box" rules="all">
<tbody>
<tr>
<td valign="top" align="left"><inline-graphic xlink:href="frai-07-1410841-i0001.tif"/></td>
</tr>
</tbody>
</table>
</table-wrap>


<p>The uncertainty confusion matrix objectively and quantitatively evaluates the predictive uncertainty estimates. As shown in <xref ref-type="table" rid="T4">Table 4</xref>, the predictions are categorized into two groups, <italic>correct</italic> and <italic>incorrect</italic>, after being compared to the labels of the ground truth. Additionally, a threshold is employed to evaluate and categorize prediction uncertainty estimates into two groups: <italic>uncertain</italic> and <italic>certain</italic>.</p>
<p>Four different outcomes can result from the combination of correctness and confidence, as shown in <xref ref-type="table" rid="T4">Table 4</xref> namely: (i) <italic>true certainty (TC)</italic>, which represents a combination of correct and certain predictions; (ii) <italic>true uncertainty (TU)</italic>, which represents a combination of incorrect and uncertain predictions, (iii) <italic>False certainty (FC)</italic> denotes forecasts that are certain but erroneous and, (iv) <italic>false uncertainty (FU)</italic> denotes predictions that are definite but incorrect. The intended results are the diagonal entries <italic>TC</italic> and <italic>TU</italic>. These outcomes are referred to as True Negative (TN) and True Positive (TP) in the standard confusion matrix, respectively. The following quantitative performance metrics that purely and objectively quantify the prediction uncertainty estimations are produced as a result of these combinations of correctness and confidence groups:</p>
<list list-type="simple">
<list-item><p>(i) Uncertainty sensitivity <italic>(USen</italic>):</p>
<p><disp-formula id="E14"><label>(14)</label><mml:math id="M22"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mi>U</mml:mi><mml:mi>S</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>U</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>U</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>C</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula></p>
<p>The classic confusion matrix&#x00027;s sensitivity (<italic>recall</italic>) or true positive (TP) rate corresponds to <italic>USen</italic> or (<italic>URec</italic>), respectively. <italic>USen</italic> is very important in that it quantifies the model&#x00027;s power to express confidence in incorrectly classified samples.</p></list-item>
<list-item><p>(ii) Uncertainty Specificity (<italic>USpe</italic>):</p>
<p><disp-formula id="E15"><label>(15)</label><mml:math id="M23"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mi>U</mml:mi><mml:mi>S</mml:mi><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>C</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>U</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula></p>
<p>USpe is equivalent to the specificity performance metric derived from the tradional confusion matrix.</p></list-item>
<list-item><p>(iii) Uncertainty precision (<italic>UPre</italic>):</p>
<p><disp-formula id="E16"><label>(16)</label><mml:math id="M24"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mi>U</mml:mi><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>U</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>U</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>U</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula></p>
<p>UPre is equivalent to precision derived from the traditional confusion maytrix</p></list-item>
<list-item><p>(iv) Uncertainty accuracy (UAcc):</p>
<p><disp-formula id="E17"><label>(17)</label><mml:math id="M25"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mi>U</mml:mi><mml:mi>A</mml:mi><mml:mi>c</mml:mi><mml:mi>c</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>U</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>T</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>U</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>T</mml:mi><mml:mi>C</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>U</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>C</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula></p>
<p>A model with a high <italic>UAcc</italic> is considered reliable.</p></list-item>
</list>
<p>Other metrics that are used to evaluate the performance of the models include the area under curve receiver operating curve (AUC-ROC), confidence, and the expected calibration error (ECE) described below.</p>
<list list-type="simple">
<list-item><p>(v) AUC-ROC:</p>
<p>The results of the classification task are further cross-validate using the area under curve receiver operating curve (AUC-ROC). AUC-ROC is a probability curve that represents a degree or measure of separability. This means that we can measure how a model can distinguish between classes. The AUC-ROC is a function of sensitivity and specificity.</p></list-item>
<list-item><p>(vi) Expected Calibration Error (ECE) and Brier Score:</p>
<p>Predictions are categorized into different <italic>M bins</italic> (based on the value of the maximum softmax output) according to their confidence in order to calculate the ECE. The calibration errors in each bin quantify the discrepancy between the percentage of correctly classified predictions (accuracy) and the probability average (confidence). The calisensitivity (<italic>U</italic>Sensebration errors across all bins are weighted to produce the ECE.</p>
<p><disp-formula id="E18"><label>(18)</label><mml:math id="M26"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mi>E</mml:mi><mml:mi>C</mml:mi><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>M</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mfrac><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>B</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:mfrac><mml:mo>|</mml:mo><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>c</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>B</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>B</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>|</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula></p>
<p>where <italic>acc</italic>(<italic>B</italic><sub><italic>m</italic></sub>) and <italic>conf</italic>(<italic>B</italic><sub><italic>m</italic></sub>) are the accuracy and confidence in the <italic>m-th</italic> bin:</p></list-item>
</list>
<p>Additionally, we examine the Brier score, as described in Brier (<xref ref-type="bibr" rid="B10">1950</xref>). The Brier score is a metric used to evaluate the accuracy of probabilistic predictions made by a model. A lower Brier score indicates better calibration and accuracy of the model&#x00027;s predictions.</p>
</sec></sec>
</sec>
<sec id="s4">
<title>4 Results and discussions</title>
<sec>
<title>4.1 DNN without uncertainty quantification</title>
<p>Features were extracted from chest X-ray (CXR) images in the Pneumonia, Normal, and COVID-19 datasets using two distinct pre-trained DenseNet models. For the Pneumonia and Normal datasets, we employed the DenseNet121 model, trained on various sources, including the RSNA Pneumonia Challenge, CheXpert, and normal CXR images. Each CXR image was resized to a standard dimension of 224 &#x000D7; 224 pixels, converted into an array, and pre-processed to meet the model&#x00027;s input requirements. The images were then processed through the DenseNet121 model, from which features were extracted from the feature layer and subsequently flattened into a one-dimensional vector.</p>
<p>For the COVID-19 dataset, we utilized a DenseNet model with &#x0201C;all&#x0201D; weights. The images were read in grayscale, resized to 224 &#x000D7; 224 pixels, and passed through the PyTorch-implemented DenseNet model. Features were extracted from the feature layer, detached from the computational graph, converted to a NumPy array, and flattened into a one-dimensional vector. These feature vectors, along with their corresponding labels (Normal: &#x0201C;Label&#x0201D;: Class 0, COVID-19: &#x0201C;Label&#x0201D;: Class 1, Pneumonia: &#x0201C;Label&#x0201D;: Class 2) and filenames, were systematically organized in a Pandas DataFrame (Features), which was subsequently used for model training and UQ quantification using different techniques.</p>
<p>After pretraining a DenseNet121 model using normal chest X-ray images to extract essential features, we trained DNN on Class 0 vs. All, on Class 1 vs. All, and on Class 2 vs. All without uncertainty quantification to give a normal confusion matrix and subsequently evaluate the performance metrics for each class.</p>
<p><xref ref-type="table" rid="T5">Table 5</xref> shows the results of the one vs. all classifications without uncertainty quantification.</p>


<table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>Results of the &#x0201C;One vs. All&#x0201D; (OvA) multi-class classification of chest-xray images using DNN without uncertainty quantification.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Class</bold></th>
<th valign="top" align="center"><bold>AUC-ROC</bold></th>
<th valign="top" align="center"><bold>Accuracy (%)</bold></th>
<th valign="top" align="center"><bold>F1 score (%)</bold></th>
<th valign="top" align="center"><bold>Sensitivity (%)</bold></th>
<th valign="top" align="center"><bold>Specificity (%)</bold></th>
<th valign="top" align="center"><bold>Precision (%)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Class 0 vs. all</td>
<td valign="top" align="center">96.5</td>
<td valign="top" align="center">92.8</td>
<td valign="top" align="center">90.0</td>
<td valign="top" align="center">90.4</td>
<td valign="top" align="center">94.2</td>
<td valign="top" align="center">89.7</td>
</tr> <tr>
<td valign="top" align="left">Class 1 vs. all</td>
<td valign="top" align="center">97.3</td>
<td valign="top" align="center">93.4</td>
<td valign="top" align="center">90.6</td>
<td valign="top" align="center">91.7</td>
<td valign="top" align="center">94.3</td>
<td valign="top" align="center">89.6</td>
</tr>
<tr>
<td valign="top" align="left">Class 2 vs. all</td>
<td valign="top" align="center">93.6</td>
<td valign="top" align="center">91.7</td>
<td valign="top" align="center">85.4</td>
<td valign="top" align="center">83.5</td>
<td valign="top" align="center">95.1</td>
<td valign="top" align="center">89.9</td>
</tr></tbody>
</table>
</table-wrap>


<p><xref ref-type="table" rid="T5">Table 5</xref> shows that DNN has high discriminative performance across all classes, with AUC-ROC values of 96.5% for normal images, 97.3% for COVID-19 images, and 93.6% for pneumonia images. The model&#x00027;s high AUC-ROC values indicate excellent effectiveness in discriminating each class from others. The model achieves good overall accuracy, with 92.8% for normal images, 93.4% for COVID-19 images, and 91.7% for pneumonia images, demonstrating effective classification across image types. Sensitivity and specificity measures are important in real-world applications, especially in the medical industry. The DNN accurately detected normal images with a sensitivity of 90.4% and specificity of 94.2%, minimizing false positives. For COVID-19 images, the sensitivity is 91.7% and specificity is 94.3%, underscoring the model&#x00027;s ability to accurately detect COVID-19 cases while minimizing the misclassification of other conditions as COVID-19. In the case of pneumonia images, the sensitivity is slightly lower at 83.5%, but the specificity remains high at 95.1%, demonstrating the model&#x00027;s effectiveness in identifying true pneumonia cases and reducing the likelihood of misclassifying other conditions as pneumonia.</p>
<p>These results have significant implications for real-life healthcare settings. High sensitivity in detecting COVID-19 and pneumonia images is crucial for early diagnosis and treatment, which can prevent the spread of infections and mitigate complications. On the other hand, high specificity across all classes ensures that healthy individuals are not subjected to unnecessary medical interventions, while patients receive appropriate treatments for their conditions. Additionally, high precision for each class indicates that the model reliably identifies true cases, enhancing trust in the diagnostic process and ensuring efficient allocation of medical resources. Overall, the DNN&#x00027;s strong performance metrics suggest its potential as a valuable tool in medical diagnostics, contributing to improved patient care and public health outcomes.</p>
<p>While the results demonstrate that DNN is suitable for &#x0201C;OvA&#x0201D; multiclassifying tasks, it lacks the ability to quantify uncertainty, which can be crucial in certain scenarios such as identifying for example chest X-rays of pneumonia patients. To quantify predictive uncertainty, we start by assessing the calibrations of the predictions produced by the DNN.</p>
</sec>
<sec>
<title>4.2 Expected calibration error</title>
<p>The expected calibration error (ECE) values for the different multi-class classification tasks are presented in <xref ref-type="table" rid="T6">Table 6</xref>. These ECE values, derived from 19, provide a quantitative evaluation of the alignment between the actual outcomes and the predicted probabilities by the DNN. For Class 0 vs. All, Class 1 vs. All, Class 2 vs. All the ECE values are 0.0134, 0.0128, and 0.0130, respectively. These ECE values are much lower, indicating better calibration by the DNN. This shows that the probabilities that are predicted by the DNN are closely aligned with the actual outcomes across the three multi-class classification scenarios.</p>


<table-wrap position="float" id="T6">
<label>Table 6</label>
<caption><p>Calibration metrics for each class.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Class</bold></th>
<th valign="top" align="center"><bold>ECE</bold></th>
<th valign="top" align="center"><bold>Brier score</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Class 0 vs. All</td>
<td valign="top" align="center">0.0134</td>
<td valign="top" align="center">0.123</td>
</tr> <tr>
<td valign="top" align="left">Class 1 vs. All</td>
<td valign="top" align="center">0.0128</td>
<td valign="top" align="center">0.234</td>
</tr>
<tr>
<td valign="top" align="left">Class 2 vs. All</td>
<td valign="top" align="center">0.0130</td>
<td valign="top" align="center">0.345</td>
</tr></tbody>
</table>
</table-wrap>


<p>In addition, <xref ref-type="table" rid="T6">Table 6</xref> presents the Brier scores that evaluate the DNN&#x00027;s calibration and accuracy performances across the multi-class classification scenarios. For Class 0 vs. All, the Brier score is 0.123, indicating that predictions are well-calibrated and accurate. For Class 1 vs. All, the Brier score of 0.234 is slightly higher, showing that the calibration and accuracy are reasonable, with some indication of uncertainty. On the other hand, Class 2 vs. All has the highest Brier score (0.345), suggesting that it may be more challenging to predict this class correctly than the others.</p>
<p>To quantify uncertainty, we employ a specialized type of DNN called the Bayesian neural network (BNN). This BNN incorporates various dropout techniques, including Monte Carlo (MC) dropout, Ensemble Monte Carlo (EMC), and Ensemble Bayesian neural networks (EBNN) to quantify uncertainty.</p>
</sec>
<sec>
<title>4.3 Uncertainty quantification using Bayesian neural networks</title>
<p>The results presented in <xref ref-type="table" rid="T7">Table 7</xref> reveal varying performance metrics across different classes in the &#x0201C;One vs. All&#x0201D; (OvA) multi-class classification of chest X-ray images. For Class 0 vs. All, the model achieves an AUC-ROC of 89.8%, indicating strong discriminative ability in distinguishing normal images from others, supported by high sensitivity (82.2%) and specificity (83.0%). However, the F1 score of 77.4% suggests a moderate balance between precision and recall. In contrast, Class 1 vs. All shows a lower AUC-ROC of 79.2%, reflecting greater difficulty in accurately identifying COVID-19 cases, with sensitivity and precision at 77.1 and 72.1%, respectively. Class 2 vs. All exhibits challenges in achieving precision (F1 score 58.9%), despite a reasonable AUC-ROC of 77.6% and high specificity (84.3%).</p>


<table-wrap position="float" id="T7">
<label>Table 7</label>
<caption><p>Comparison of the uncertainty-aware evaluation metrics produced by DNN with uncertainty quantification techniques (MC Dropout, EBNN, Ensemble, and EMC Dropout) for Class 0 vs. All.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>Acc</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>F1 Score</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>Prec</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>Sens</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>Spec</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>AUC-ROC</bold></th>
<th valign="top" align="center"><bold>Brier score</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">MC Dropout</td>
<td valign="top" align="center">81.0</td>
<td valign="top" align="center">75.5</td>
<td valign="top" align="center">70.3</td>
<td valign="top" align="center">81.5</td>
<td valign="top" align="center">80.7</td>
<td valign="top" align="center">87.9</td>
<td valign="top" align="center">0.168</td>
</tr> <tr>
<td valign="top" align="left">EBNN</td>
<td valign="top" align="center">92.6</td>
<td valign="top" align="center">90.0</td>
<td valign="top" align="center">86.9</td>
<td valign="top" align="center">93.3</td>
<td valign="top" align="center">92.1</td>
<td valign="top" align="center">95.0</td>
<td valign="top" align="center">0.157</td>
</tr>
<tr>
<td valign="top" align="left">EMC Dropout</td>
<td valign="top" align="center">87.5</td>
<td valign="top" align="center">80.7</td>
<td valign="top" align="center">90.7</td>
<td valign="top" align="center">72.6</td>
<td valign="top" align="center">95.9</td>
<td valign="top" align="center">95.1</td>
<td valign="top" align="center">0.178</td>
</tr></tbody>
</table>
</table-wrap>


</sec>
<sec>
<title>4.4 Uncertainty quantification using deep neural networks</title>
<p>Uncertainty quantification was performed for each of Class 0 vs. All, Class 1 vs. All, and Class 2 vs. All. <xref ref-type="table" rid="T7">Table 7</xref> presents a comparison of the uncertainty-aware evaluation metrics produced by the BBNN, MC Dropout, EBNN, Ensemble, and EMC Dropout uncertainty quantification techniques for Class 0 vs. All.</p>
<sec>
<title>4.4.1 Class 0 vs. all</title>
<p>The results show that EBNN outperforms the other models achieving the highest <italic>U</italic>Acc of 92.6% and a <italic>U</italic>AUC-ROC value of 87.9%, indicating its superior ability in accurately classifying the OvA instances while quantifying uncertainty. EBNN also demonstrates superiority across other metrics such as Brier Score, with a score of 0.1567, re-affirming its overall effectiveness in predictive uncertainty quantification.</p>
<p><italic>U</italic>F1 Score, <italic>U</italic>Prec, <italic>U</italic>Sens, and <italic>U</italic>Spec were also employed to quantify prediction uncertainty. These metrics are important because they provide insights into a model&#x00027;s ability to make predictions. In this study we used a threshold of 0.30, following Asgharnezhad et al. (<xref ref-type="bibr" rid="B7">2022</xref>)&#x00027;s recommendation, to calculate these performance metrics. The best-performing model, EBNN, showed outstanding performance across these metrics: <italic>U</italic>F1 Score = 90.0%, <italic>U</italic>Prec = 86.9%, <italic>U</italic>Sens = 93.3%, and <italic>U</italic>Spec = 92.1%. These results emphasize EBNN&#x00027;s reliability and accuracy in estimating uncertainty.</p>
<p>The results in <xref ref-type="table" rid="T8">Table 8</xref> show that the Bayesian Neural Network (BNN) employed for &#x0201C;One vs. All&#x0201D; multi-class classification of chest X-ray images exhibits varying performance across classes, with the highest accuracy (82.7%) achieved by Class 0 vs. All. In addition, Class 0 vs. All&#x00027;s AUC-ROC is 89.8%, indicating better discriminative ability compared to the other classes. However, Class 2 vs. All achieved the lowest F1 score of 58.9%, suggesting it had challenges in correctly identifying the true positives. The Brier Scores ranged from 0.161 to 0.194, reflecting reasonable but not perfect calibration of predicted probabilities across all classes.</p>
<table-wrap position="float" id="T8">
<label>Table 8</label>
<caption><p>Results of the &#x0201C;One vs. All&#x0201D; (OvA) multi-class classification of chest-X ray images using Bayesian Neural Networks (BNN).</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Class</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>Acc</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>F1 Score</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>Prec</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>Sens</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>Spec</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>AUC-ROC</bold></th>
<th valign="top" align="center"><bold>Brier score</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Class 0 vs. All</td>
<td valign="top" align="center">82.7</td>
<td valign="top" align="center">77.4</td>
<td valign="top" align="center">73.0</td>
<td valign="top" align="center">82.2</td>
<td valign="top" align="center">83.0</td>
<td valign="top" align="center">89.8</td>
<td valign="top" align="center">0.161</td>
</tr> <tr>
<td valign="top" align="left">Class 1 vs. All</td>
<td valign="top" align="center">72.9</td>
<td valign="top" align="center">69.0</td>
<td valign="top" align="center">72.1</td>
<td valign="top" align="center">77.1</td>
<td valign="top" align="center">92.2</td>
<td valign="top" align="center">79.2</td>
<td valign="top" align="center">0.194</td>
</tr> <tr>
<td valign="top" align="left">Class 2 vs. All</td>
<td valign="top" align="center">76.6</td>
<td valign="top" align="center">58.9</td>
<td valign="top" align="center">60.0</td>
<td valign="top" align="center">57.8</td>
<td valign="top" align="center">84.3</td>
<td valign="top" align="center">77.6</td>
<td valign="top" align="center">0.194</td>
</tr></tbody>
</table>
</table-wrap>


<p>Notably, <xref ref-type="table" rid="T9">Table 9</xref>, shows that the different multi-class classification models perform differently quantifying the percentage uncertainty in chest X-ray image classification for the Class 0 vs. All case.</p>
<table-wrap position="float" id="T9">
<label>Table 9</label>
<caption><p>Comparison of the percentage uncertainty (%) in the different uncertainty-aware evaluation metrics produced by the DNN, MC Dropout, EBNN, Ensemble, and EMC Dropout uncertainty quantification techniques for Class 0 vs. All.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>Acc (%)</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>ROC-AUC</bold></th>
<th valign="top" align="center"><bold>UPrec (%)</bold></th>
<th valign="top" align="center"><bold>USens (%)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">MC Dropout</td>
<td valign="top" align="center">11.8</td>
<td valign="top" align="center">8.6</td>
<td valign="top" align="center">19.4</td>
<td valign="top" align="center">8.9</td>
</tr> <tr>
<td valign="top" align="left">EBNN</td>
<td valign="top" align="center">0.2</td>
<td valign="top" align="center">1.5</td>
<td valign="top" align="center">2.8</td>
<td valign="top" align="center">2.9</td>
</tr>
<tr>
<td valign="top" align="left">EMC Dropout</td>
<td valign="top" align="center">5.3</td>
<td valign="top" align="center">1.4</td>
<td valign="top" align="center">1.0</td>
<td valign="top" align="center">17.8</td>
</tr></tbody>
</table>
</table-wrap>


<p>EBNN emerges as the best-performing model, achieving the lowest uncertainty values across several performance metrics for Class 0 vs. All. Specifically, EBNN achieves the lowest uncertainty in accuracy (<italic>U</italic>Acc = 0.2%), signifying its robustness in making accurate predictions. Moreover, for Class 0 vs. All, EBNN exhibits high discriminatory power with the lowest uncertainty in <italic>U</italic>ROC-AUC = 1.5%. Furthermore, EBNN produces outstanding performance not only in achieving lower uncertainty in overall accuracy but also in achieving the lowest percent uncertainty in precision (<italic>U</italic>Prec = 2.8%). This indicates that the model correctly identifies Class 0 instances among all positive predictions. Additionally, EBNN produces the lowest percent uncertainty in sensitivity (<italic>U</italic>Sens = 2.9%), demonstrating its effectiveness in capturing the true positive instances of Class 0 while reducing false negatives.</p>
<p>The superior performance of EBNN&#x00027;s indicates that it provides more reliable uncertainty estimates across various key metrics compared to other models. This enhances its suitability for deployment in critical healthcare applications.</p></sec>
<sec>
<title>4.4.2 Class 1 vs. all</title>
<p><xref ref-type="table" rid="T10">Table 10</xref> presents a comparison of the uncertainty-aware evaluation metrics produced by the DNN, MC Dropout, EBNN, Ensemble, and EMC Dropout uncertainty quantification techniques for the Class 1 vs. All scenario.</p>


<table-wrap position="float" id="T10">
<label>Table 10</label>
<caption><p>Comparison of the uncertainty-aware evaluation metrics produced by the DNN, MC Dropout, EBNN, Ensemble, and EMC Dropout uncertainty quantification techniques for Class 1 vs. All.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>Acc</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>F1 Score</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>Prec</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>Sens</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>Spec</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>AUC-ROC</bold></th>
<th valign="top" align="center"><bold>Brier score</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">MC Dropout</td>
<td valign="top" align="center">74.7</td>
<td valign="top" align="center">53.1</td>
<td valign="top" align="center">76.0</td>
<td valign="top" align="center">59.9</td>
<td valign="top" align="center">93.0</td>
<td valign="top" align="center">80.0</td>
<td valign="top" align="center">0.189</td>
</tr> <tr>
<td valign="top" align="left">EBNN</td>
<td valign="top" align="center">70.7</td>
<td valign="top" align="center">73.7</td>
<td valign="top" align="center">82.4</td>
<td valign="top" align="center">71.4</td>
<td valign="top" align="center">97.5</td>
<td valign="top" align="center">93.4</td>
<td valign="top" align="center">0.182</td>
</tr>
<tr>
<td valign="top" align="left">EMC Dropout</td>
<td valign="top" align="center">83.5</td>
<td valign="top" align="center">72.3</td>
<td valign="top" align="center">88.0</td>
<td valign="top" align="center">61.7</td>
<td valign="top" align="center">95.5</td>
<td valign="top" align="center">95.8</td>
<td valign="top" align="center">0.165</td>
</tr></tbody>
</table>
</table-wrap>


<p><xref ref-type="table" rid="T10">Table 10</xref> shows that the EMC Dropout model is the best-performing model for Class 1 vs. All classification, as indicated by the different performance evaluation metrics. The model achieved a <italic>U</italic>Acc of 83.5% and a <italic>U</italic>F1 Score of 72.3%, indicating its accuracy in correctly classifying Class 1 vs. All instances. In addition, the model has superior <italic>U</italic>Prec (88.0%) and <italic>U</italic>Spec values (95.5%), showing good predictive performance. Although its <italic>U</italic>Sens is relatively lower at 61.7% compared to other metrics, it still demonstrates solid sensitivity. The high <italic>U</italic>AUC-ROC value of 95.8% and a low Brier Score of 0.165 further indicate that the EMC Dropout model performs well in predictive uncertainty estimation.</p>
<p><xref ref-type="table" rid="T11">Table 11</xref> summarizes the percentage uncertainty for different metrics produced by the uncertainty quantification techniques for the Class 1 vs. All scenario.</p>
<table-wrap position="float" id="T11">
<label>Table 11</label>
<caption><p>Comparison of the percent uncertainty (%) in the different uncertainty-aware evaluation metrics produced by the DNN, MC Dropout, EBNN, Ensemble, and EMC Dropout uncertainty quantification techniques for Class 1 vs. All.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>Acc (%)</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>ROC-AUC</bold></th>
<th valign="top" align="center"><bold>UPrec (%)</bold></th>
<th valign="top" align="center"><bold>USens (%)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">MC Dropout</td>
<td valign="top" align="center">18.7</td>
<td valign="top" align="center">17.3</td>
<td valign="top" align="center">13.6</td>
<td valign="top" align="center">31.8</td>
</tr> <tr>
<td valign="top" align="left">EBNN</td>
<td valign="top" align="center">22.7</td>
<td valign="top" align="center">3.9</td>
<td valign="top" align="center">7.2</td>
<td valign="top" align="center">20.3</td>
</tr>
<tr>
<td valign="top" align="left">EMC Dropout</td>
<td valign="top" align="center">9.9</td>
<td valign="top" align="center">1.5</td>
<td valign="top" align="center">1.6</td>
<td valign="top" align="center">30</td>
</tr></tbody>
</table>
</table-wrap>


<p>The results show that EMC Dropout emerges as the best-performing model by achieving the lowest uncertainty percentage values across most metrics for Class 1 vs. All. Specifically, EMC Dropout achieves the lowest uncertainty in accuracy (<italic>U</italic>Acc = 9.9%), <italic>U</italic>ROC-AUC (1.5%), precision (<italic>U</italic>Prec = 1.6%), and sensitivity (<italic>U</italic>Sens = 30%). The results indicate that the EMC Dropout model is capable of providing more reliable uncertainty estimates compared to other models.</p></sec>
<sec>
<title>4.4.3 Class 2 vs. all</title>
<p><xref ref-type="table" rid="T12">Table 12</xref> presents a comparison of the uncertainty-aware evaluation metrics produced by the DNN, MC Dropout, EBNN, Ensemble, and EMC Dropout uncertainty quantification techniques for Class 2 vs. All. The Ensemble Bayesian Neural Network (EBNN) model outperforms the other models evaluated for Class 2 vs. All classification, showing very good classification performance across various evaluation metrics. EBNN accurately classifies Class 2 and Rest instances, with an <italic>U</italic>Accuracy of 87.8% and a <italic>U</italic>F1 Score of 75.5%. Also, it has a high <italic>U</italic>Prec 89.9%, which indicates accurate positive cases predictions, and a <italic>U</italic>Spec of 97.00%, which highlights its accuracy in identifying negative cases. Nevertheless, It is noteworthy that its <italic>U</italic>Sens, is just 65.1%, indicating room for improvement in classifying all positive instances. This results show that EBNN model performs very well in predictive uncertainty estimation, as evidenced by its high <italic>U</italic>AUC-ROC value of 0.91 and comparatively low Brier Score of 0.19. These results confirm the model&#x00027;s reliability and efficiency in handling uncertain predictions for Class 2 vs. All classification tasks. Futhermore, we quantified the percentage uncertainty produced by the different uncertainty quantification techniques across different evaluation metrics and the results are shown in <xref ref-type="table" rid="T13">Table 13</xref>.</p>
<table-wrap position="float" id="T12">
<label>Table 12</label>
<caption><p>Comparison of the uncertainty-aware evaluation metrics produced by the DNN, MC Dropout, EBNN, Ensemble, and EMC Dropout uncertainty quantification techniques for Class 2 vs. All.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>Acc</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>F1 Score</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>Prec</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>Sens</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>Spec</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>AUC-ROC</bold></th>
<th valign="top" align="center"><bold>Brier Score</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">MC Dropout</td>
<td valign="top" align="center">74.4</td>
<td valign="top" align="center">53.9</td>
<td valign="top" align="center">56.5</td>
<td valign="top" align="center">51.6</td>
<td valign="top" align="center">83.7</td>
<td valign="top" align="center">75.8</td>
<td valign="top" align="center">0.196</td>
</tr> <tr>
<td valign="top" align="left">EBNN</td>
<td valign="top" align="center">87.8</td>
<td valign="top" align="center">75.5</td>
<td valign="top" align="center">87.5</td>
<td valign="top" align="center">65.1</td>
<td valign="top" align="center">97.0</td>
<td valign="top" align="center">90.6</td>
<td valign="top" align="center">0.185</td>
</tr> <tr>
<td valign="top" align="left">EMC Dropout</td>
<td valign="top" align="center">71.8</td>
<td valign="top" align="center">65.0</td>
<td valign="top" align="center">77.2</td>
<td valign="top" align="center">62.8</td>
<td valign="top" align="center">91.0</td>
<td valign="top" align="center">84.4</td>
<td valign="top" align="center">0.190</td>
</tr></tbody>
</table>
</table-wrap>



<table-wrap position="float" id="T13">
<label>Table 13</label>
<caption><p>Comparison of the percentage uncertainty (%) in the different uncertainty-aware evaluation metrics produced by the DNN, MC Dropout, EBNN, Ensemble, and EMC Dropout uncertainty quantification techniques for Class 2 vs. All.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>Acc (%)</bold></th>
<th valign="top" align="center"><bold><italic>U</italic>ROC-AUC</bold></th>
<th valign="top" align="center"><bold>UPrec (%)</bold></th>
<th valign="top" align="center"><bold>USens (%)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">MC Dropout</td>
<td valign="top" align="center">17.3</td>
<td valign="top" align="center">17.8</td>
<td valign="top" align="center">31.0</td>
<td valign="top" align="center">31.9</td>
</tr> <tr>
<td valign="top" align="left">EBNN</td>
<td valign="top" align="center">3.9</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">2.4</td>
<td valign="top" align="center">18.4</td>
</tr>
<tr>
<td valign="top" align="left">EMC Dropout</td>
<td valign="top" align="center">19.9</td>
<td valign="top" align="center">9.2</td>
<td valign="top" align="center">12.7</td>
<td valign="top" align="center">20.7</td>
</tr></tbody>
</table>
</table-wrap>



<p>The results in <xref ref-type="table" rid="T13">Table 13</xref> indicate that EBNN has the lowest percentage uncertainty across all the different evaluation metrics (<italic>U</italic>Acc =12.766%, UAUC-ROC = 9.405%, <italic>U</italic>Prec = 10.127%, and <italic>U</italic>Sens = 34.862%).</p></sec></sec></sec>
<sec sec-type="discussion" id="s5">
<title>5 Discussion</title>
<p>This paper presented an in-depth quantification of uncertainty across different evaluation metrics using various uncertainty quantification methods deployed on multi-class classification tasks: Class 0 vs. All, Class 1 vs. All, and Class 2 vs. All. A chest X-ray image dataset with three classes (COVID-19, Pneumonia, and Normal images) was used to perform the multi-class classification tasks. Several uncertainty quantification techniques (MC dropout, EBNN, Ensemble, and EMC dropout) were employed for all classifications in chest X-ray analysis. Through the evaluation of these techniques, the percentage uncertainty estimates were obtained for each method across different evaluation metrics. The quantified uncertainty provides insights into the reliability and predictive performance of these quantification techniques.</p>
<p>A comparative analysis of Bayesian Neural Networks (BNN) and Deep Neural Networks with Uncertainty Quantification (DNN with UQ) techniques for multi-class classification of chest X-ray images shows notable differences in performance metrics for Class 0 vs. All, Class 1 vs. All, and Class 2 vs. All scenarios. For Class 0 vs. All, the EBNN method achieved a <italic>U</italic>Acc of 92.6%, <italic>U</italic>AUC-ROC of 95.0%, and a Brier Score of 0.157, significantly outperforming the BNN&#x00027;s Acc of 82.7%, AUC-ROC of 89.8%, and Brier Score of 0.161. Similarly, for Class 1 vs. All, the EMC Dropout technique showed superior results with a <italic>U</italic>Acc of 83.5%, <italic>U</italic>AUC-ROC of 95.8%, and a Brier Score of 0.165, compared to the BNN&#x00027;s Acc of 72.9%, AUC-ROC of 79.2%, and Brier Score of 0.194. In the Class 2 vs. All scenario, the EBNN also excelled with a <italic>U</italic>Acc of 87.8%, <italic>U</italic>AUC-ROC of 90.6%, and a Brier Score of 0.185, versus the BNN&#x00027;s Acc of 76.6%, AUC-ROC of 77.6%, and Brier Score of 0.194.</p>
<p>Across all classes, DNNs with UQ techniques, especially EBNN and EMC Dropout, consistently demonstrated superior performance metrics compared to BNNs. They achieved higher <italic>U</italic>Acc and <italic>U</italic>AUC-ROC values, indicating better classification accuracy and discriminative capability. Additionally, these models reported lower Brier Scores, reflecting more accurate probabilistic predictions. Further analysis of metrics such as F1 Score, Precision, Sensitivity, and Specificity revealed that DNNs with UQ maintained a better balance between precision and recall and exhibited greater robustness in identifying both positive and negative cases. Overall, the advanced DNNs with UQ not only provided enhanced performance but also ensured reliable uncertainty quantification, making them more suitable for critical healthcare applications like chest X-ray image classification.</p></sec>
<sec sec-type="conclusions" id="s6">
<title>6 Conclusion</title>
<p>This study made important contributions to the existing literature by providing novel insights into uncertainty quantification in the classification of COVID-19, particularly by extending the binary classification of COVID-19 task to the multi-class classification task. In addition, we have demonstrated that uncertainty-aware estimates of evaluation metrics can effectively be obtained from uncertainty quantification techniques across different multi-class classification scenarios, particularly in the context of medical image analysis. For example, the EBNN demonstrated superior performance in quantifying uncertainty, paving the way for improved model reliability and interpretability.</p>
<p>This study has offered substantial clinical relevance by integrating advanced UQ techniques into deep neural networks (DNNs), thereby significantly enhancing the interpretability and reliability of diagnostic predictions an essential factor for clinical decision-making. With UQ, clinicians can better gauge the confidence in model predictions, enabling more informed decisions regarding patient referrals and treatment plans. For instance, if a DNN model indicates high uncertainty in classifying a chest X-ray, it prompts clinicians to investigate further or refer the patient to a specialist, potentially leading to earlier diagnosis and intervention.</p>
<p>Accurately estimating the uncertainty associated with predictions helps mitigate the risks of misdiagnosis, especially in critical conditions like COVID-19 and pneumonia, where timely and accurate diagnosis is vital. Our findings demonstrate that models that are equipped with UQ achieve higher accuracy and thus, offer probabilistic predictions that may guide clinical actions more effectively than traditional models. Implementing the UQ techniques employed in this study in clinical settings can significantly improve diagnostic outcomes and patient care, underscoring its clinical implications and value in medical practice.</p>
<p>A limitation of this study is that uncertainty quantification relies on a specific chest X-ray image dataset, and the results may not generalize well to other image datasets. For future studies, we will explore the use of different image datasets, model architectures, and training strategies to investigate their impact on uncertainty quantification in the multi-class classification of COVID-19.</p></sec>
</body>
<back>
<sec sec-type="data-availability" id="s7">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found at: <ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/datasets/prashant268/chest-xray-covid19-pneumonia">https://www.kaggle.com/datasets/prashant268/chest-xray-covid19-pneumonia</ext-link>.</p>
</sec>
<sec sec-type="author-contributions" id="s8">
<title>Author contributions</title>
<p>AW: Conceptualization, Data curation, Formal analysis, Investigation, Methodology, Software, Visualization, Writing &#x02013; original draft. KD: Data curation, Formal analysis, Investigation, Writing &#x02013; original draft. KM: Validation, Visualization, Writing &#x02013; review &#x00026; editing. IO: Project administration, Resources, Supervision, Writing &#x02013; review &#x00026; editing.</p>
</sec>
<sec sec-type="funding-information" id="s9">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. This project was funded by Sol Plaatje University&#x00027;s Multi/Inter/Trans (MIT) Disciplinary Research Seed Funding.</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Abadi</surname> <given-names>M.</given-names></name> <name><surname>Barham</surname> <given-names>P.</given-names></name> <name><surname>Chen</surname> <given-names>J.</given-names></name> <name><surname>Chen</surname> <given-names>Z.</given-names></name> <name><surname>Davis</surname> <given-names>A.</given-names></name> <name><surname>Dean</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2015</year>). <source>Tensorflow: Large-scale machine learning on heterogeneous systems</source>. Available at: <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/1605.08695">https://arxiv.org/abs/1605.08695</ext-link> (accessed September 3, 2024).</citation>
</ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Abdar</surname> <given-names>M.</given-names></name> <name><surname>Pourpanah</surname> <given-names>F.</given-names></name> <name><surname>Hussain</surname> <given-names>S.</given-names></name> <name><surname>Rezazadegan</surname> <given-names>D.</given-names></name> <name><surname>Liu</surname> <given-names>L.</given-names></name> <name><surname>Ghavamzadeh</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2021a</year>). <article-title>A review of uncertainty quantification in deep learning: techniques, applications and challenges</article-title>. <source>Inf. Fusion</source> <volume>76</volume>, <fpage>243</fpage>&#x02013;<lpage>297</lpage>. <pub-id pub-id-type="doi">10.1016/j.inffus.2021.05.008</pub-id></citation>
</ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Abdar</surname> <given-names>M.</given-names></name> <name><surname>Salari</surname> <given-names>S.</given-names></name> <name><surname>Qahremani</surname> <given-names>S.</given-names></name> <name><surname>Lam</surname> <given-names>H.-K.</given-names></name> <name><surname>Karray</surname> <given-names>F.</given-names></name> <name><surname>Hussain</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Uncertaintyfusenet: robust uncertainty-aware hierarchical feature fusion model with ensemble monte carlo dropout for COVID-19 detection</article-title>. <source>Inf. Fusion</source> <volume>90</volume>, <fpage>364</fpage>&#x02013;<lpage>381</lpage>. <pub-id pub-id-type="doi">10.1016/j.inffus.2022.09.023</pub-id><pub-id pub-id-type="pmid">36217534</pub-id></citation></ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Abdar</surname> <given-names>M.</given-names></name> <name><surname>Samami</surname> <given-names>M.</given-names></name> <name><surname>Mahmoodabad</surname> <given-names>S. D.</given-names></name> <name><surname>Doan</surname> <given-names>T.</given-names></name> <name><surname>Mazoure</surname> <given-names>B.</given-names></name> <name><surname>Hashemifesharaki</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2021b</year>). <article-title>Uncertainty quantification in skin cancer classification using three-way decision-based Bayesian deep learning</article-title>. <source>Comput. Biol. Med</source>. <volume>135</volume>:<fpage>104418</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2021.104418</pub-id><pub-id pub-id-type="pmid">34052016</pub-id></citation></ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Alarab</surname> <given-names>I.</given-names></name> <name><surname>Prakoonwit</surname> <given-names>S.</given-names></name></person-group> (<year>2023</year>). <article-title>Uncertainty estimation based adversarial attack in multi-class classification</article-title>. <source>Multimed. Tools Appl</source>. <volume>82</volume>, <fpage>1519</fpage>&#x02013;<lpage>1536</lpage>. <pub-id pub-id-type="doi">10.1007/s11042-022-13269-1</pub-id></citation>
</ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Altan</surname> <given-names>G.</given-names></name> <name><surname>Narl&#x00131;</surname> <given-names>S. S.</given-names></name></person-group> (<year>2022</year>). <article-title>Clahe based enhancement to transfer learning in COVID-19 detection</article-title>. <source>Gazi m&#x000FC;hendis. Bilim. Derg</source>. <volume>8</volume>, <fpage>406</fpage>&#x02013;<lpage>416</lpage>. <pub-id pub-id-type="doi">10.30855/gmbd.0705001</pub-id></citation>
</ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Asgharnezhad</surname> <given-names>H.</given-names></name> <name><surname>Shamsi</surname> <given-names>A.</given-names></name> <name><surname>Alizadehsani</surname> <given-names>R.</given-names></name> <name><surname>Khosravi</surname> <given-names>A.</given-names></name> <name><surname>Nahavandi</surname> <given-names>S.</given-names></name> <name><surname>Sani</surname> <given-names>Z. A.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Objective evaluation of deep uncertainty predictions for COVID-19 detection</article-title>. <source>Sci. Rep</source>. <volume>12</volume>:<fpage>815</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-022-05052-x</pub-id><pub-id pub-id-type="pmid">35039620</pub-id></citation></ref>
<ref id="B8">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Bessai-Mechmache</surname> <given-names>F. Z.</given-names></name> <name><surname>Ghaffar</surname> <given-names>M. N.</given-names></name> <name><surname>Laouti</surname> <given-names>R. Y.</given-names></name></person-group> (<year>2022</year>). <article-title>&#x0201C;Bayesian convolutional neural networks for image classification with uncertainty estimation,&#x0201D;</article-title> in <source>2022 3rd International Conference on Embedded Distributed Systems (EDiS)</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>69</fpage>&#x02013;<lpage>74</lpage>. <pub-id pub-id-type="doi">10.1109/EDiS57230.2022.9996478</pub-id></citation>
</ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Boughorbel</surname> <given-names>S.</given-names></name> <name><surname>Jarray</surname> <given-names>F.</given-names></name> <name><surname>El-Anbari</surname> <given-names>M.</given-names></name></person-group> (<year>2017</year>). <article-title>Optimal classifier for imbalanced data using matthews correlation coefficient metric</article-title>. <source>PLoS ONE</source> <volume>12</volume>:<fpage>e0177678</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0177678</pub-id><pub-id pub-id-type="pmid">28574989</pub-id></citation></ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Brier</surname> <given-names>G. W.</given-names></name></person-group> (<year>1950</year>). <article-title>Verification of forecasts expressed in terms of probability</article-title>. <source>Mon. Weather Rev</source>. <volume>78</volume>, <fpage>1</fpage>&#x02013;<lpage>3</lpage>. <pub-id pub-id-type="doi">10.1175/1520-0493(1950)078&#x00026;lt;0001:VOFEIT&#x00026;gt;2.0.CO;2</pub-id><pub-id pub-id-type="pmid">33502177</pub-id></citation></ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dillon</surname> <given-names>J. V.</given-names></name> <name><surname>Langmore</surname> <given-names>I.</given-names></name> <name><surname>Tran</surname> <given-names>D.</given-names></name> <name><surname>Brevdo</surname> <given-names>E.</given-names></name> <name><surname>Vasudevan</surname> <given-names>S.</given-names></name> <name><surname>Moore</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Tensorflow distributions</article-title>. <source>arXiv</source> [Preprint]. arXiv:<volume>1711</volume>:<fpage>10604</fpage>. <pub-id pub-id-type="doi">10.48550/arXiv.1711:10604</pub-id></citation>
</ref>
<ref id="B12">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Gal</surname> <given-names>Y.</given-names></name> <name><surname>Ghahramani</surname> <given-names>Z.</given-names></name></person-group> (<year>2016</year>). <article-title>&#x0201C;Dropout as a Bayesian approximation: representing model uncertainty in deep learning,&#x0201D;</article-title> in <source>International conference on machine learning</source> (<publisher-loc>PMLR</publisher-loc>), <fpage>1050</fpage>&#x02013;<lpage>1059</lpage>.</citation>
</ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ghoshal</surname> <given-names>B.</given-names></name> <name><surname>Tucker</surname> <given-names>A.</given-names></name></person-group> (<year>2020</year>). <article-title>Estimating uncertainty and interpretability in deep learning for coronavirus (COVID-19) detection</article-title>. <source>arXiv</source> [Preprint]. arXiv:2003.10769. <pub-id pub-id-type="doi">10.48550/arXiv.2003.10769</pub-id></citation>
</ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hajian-Tilaki</surname> <given-names>K.</given-names></name></person-group> (<year>2013</year>). <article-title>Receiver operating characteristic (ROC) curve analysis for medical diagnostic test evaluation</article-title>. <source>Caspian J. Intern. Med</source>. <volume>4</volume>:<fpage>627</fpage>&#x02013;<lpage>35</lpage>.</citation>
</ref>
<ref id="B15">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Harakeh</surname> <given-names>A.</given-names></name> <name><surname>Smart</surname> <given-names>M.</given-names></name> <name><surname>Waslander</surname> <given-names>S. L.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;Bayesod: a Bayesian approach for uncertainty estimation in deep object detectors,&#x0201D;</article-title> in <source>2020 IEEE International Conference on Robotics and Automation (ICRA)</source> (<publisher-loc>Paris</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>87</fpage>&#x02013;<lpage>93</lpage>. <pub-id pub-id-type="doi">10.1109/ICRA40945.2020.9196544</pub-id></citation>
</ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>He</surname> <given-names>Z.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Yuan</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name></person-group> (<year>2021</year>). <article-title>Integrating somatic mutations for breast cancer survival prediction using machine learning methods</article-title>. <source>Front. Genet</source>. <volume>11</volume>:<fpage>632901</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2020.632901</pub-id><pub-id pub-id-type="pmid">33537063</pub-id></citation></ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Helaly</surname> <given-names>H. A.</given-names></name> <name><surname>Badawy</surname> <given-names>M.</given-names></name> <name><surname>Haikal</surname> <given-names>A. Y.</given-names></name></person-group> (<year>2022</year>). <article-title>Deep learning approach for early detection of Alzheimer&#x00027;s disease</article-title>. <source>Cogn. Comput</source>. <volume>14</volume>, <fpage>1711</fpage>&#x02013;<lpage>1727</lpage>. <pub-id pub-id-type="doi">10.1007/s12559-021-09946-2</pub-id><pub-id pub-id-type="pmid">34745371</pub-id></citation></ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hern&#x000E1;ndez</surname> <given-names>S.</given-names></name> <name><surname>L&#x000F3;pez</surname> <given-names>J. L.</given-names></name></person-group> (<year>2020</year>). <article-title>Uncertainty quantification for plant disease detection using Bayesian deep learning</article-title>. <source>Appl. Soft Comput</source>. <volume>96</volume>:<fpage>106597</fpage>. <pub-id pub-id-type="doi">10.1016/j.asoc.2020.106597</pub-id></citation>
</ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kendall</surname> <given-names>A.</given-names></name> <name><surname>Gal</surname> <given-names>Y.</given-names></name></person-group> (<year>2017</year>). <article-title>What uncertainties do we need in Bayesian deep learning for computer vision?</article-title> <source>Adv. Neural Inf. Process. Syst</source>. <volume>30</volume>, <fpage>489</fpage>&#x02013;<lpage>496</lpage>. <pub-id pub-id-type="doi">10.5555/3295222.3295309</pub-id></citation>
</ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Khairnar</surname> <given-names>P.</given-names></name> <name><surname>Thiagarajan</surname> <given-names>P.</given-names></name> <name><surname>Ghosh</surname> <given-names>S.</given-names></name></person-group> (<year>2020</year>). A modified Bayesian convolutional neural network for breast histopathology image classification and uncertainty quantification.</citation>
</ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kingma</surname> <given-names>D. P.</given-names></name> <name><surname>Ba</surname> <given-names>J.</given-names></name></person-group> (<year>2014</year>). <article-title>Adam: a method for stochastic optimization</article-title>. <source>arXiv</source> [Preprint]. arXiv:1412.6980. <pub-id pub-id-type="doi">10.48550/arXiv.1412.6980</pub-id></citation>
</ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Krizhevsky</surname> <given-names>A.</given-names></name> <name><surname>Sutskever</surname> <given-names>I.</given-names></name> <name><surname>Hinton</surname> <given-names>G. E.</given-names></name></person-group> (<year>2012</year>). <article-title>Imagenet classification with deep convolutional neural networks</article-title>. <source>Adv. Neural Inf. Process. Syst</source>. <volume>25</volume>, <fpage>1</fpage>&#x02013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1145/3065386</pub-id></citation>
</ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kwon</surname> <given-names>Y.</given-names></name> <name><surname>Won</surname> <given-names>J.-H.</given-names></name> <name><surname>Kim</surname> <given-names>B. J.</given-names></name> <name><surname>Paik</surname> <given-names>M. C.</given-names></name></person-group> (<year>2020</year>). <article-title>Uncertainty quantification using Bayesian neural networks in classification: application to biomedical image segmentation</article-title>. <source>Comput. Stat. Data Anal</source>. <volume>142</volume>:<fpage>106816</fpage>. <pub-id pub-id-type="doi">10.1016/j.csda.2019.106816</pub-id></citation>
</ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Le</surname> <given-names>N. Q. K.</given-names></name> <name><surname>Li</surname> <given-names>W.</given-names></name> <name><surname>Cao</surname> <given-names>Y.</given-names></name></person-group> (<year>2023</year>). <article-title>Sequence-based prediction model of protein crystallization propensity using machine learning and two-level feature selection</article-title>. <source>Brief. Bioinform</source>. <volume>24</volume>:<fpage>bbad319</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbad319</pub-id><pub-id pub-id-type="pmid">37649385</pub-id></citation></ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Le</surname> <given-names>N. Q. K.</given-names></name> <name><surname>Xu</surname> <given-names>L.</given-names></name></person-group> (<year>2023</year>). <article-title>Optimizing hyperparameter tuning in machine learning to improve the predictive performance of cross-species n6-methyladenosine sites</article-title>. <source>ACS Omega</source> <volume>8</volume>, <fpage>39420</fpage>&#x02013;<lpage>39426</lpage>. <pub-id pub-id-type="doi">10.1021/acsomega.3c05074</pub-id><pub-id pub-id-type="pmid">37901522</pub-id></citation></ref>
<ref id="B26">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lemay</surname> <given-names>A.</given-names></name> <name><surname>Hoebel</surname> <given-names>K.</given-names></name> <name><surname>Bridge</surname> <given-names>C. P.</given-names></name> <name><surname>Befano</surname> <given-names>B.</given-names></name> <name><surname>De Sanjos&#x000E9;</surname> <given-names>S.</given-names></name> <name><surname>Egemen</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Improving the repeatability of deep learning models with monte carlo dropout</article-title>. <source>NPJ Digit. Med</source>. <volume>5</volume>:<fpage>174</fpage>. <pub-id pub-id-type="doi">10.1038/s41746-022-00709-3</pub-id><pub-id pub-id-type="pmid">36400939</pub-id></citation></ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>W.</given-names></name> <name><surname>Wen</surname> <given-names>Y.</given-names></name> <name><surname>Yu</surname> <given-names>Z.</given-names></name> <name><surname>Yang</surname> <given-names>M.</given-names></name></person-group> (<year>2016</year>). <article-title>Large-margin softmax loss for convolutional neural networks</article-title>. <source>arXiv</source> [Preprint]. arXiv:1612.02295. <pub-id pub-id-type="doi">10.48550/arXiv.1612.02295</pub-id></citation>
</ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>McDermott</surname> <given-names>P. L.</given-names></name> <name><surname>Wikle</surname> <given-names>C. K.</given-names></name></person-group> (<year>2019</year>). <article-title>Deep echo state networks with uncertainty quantification for spatio-temporal forecasting</article-title>. <source>Environmetrics</source> <volume>30</volume>:<fpage>e2553</fpage>. <pub-id pub-id-type="doi">10.1002/env.2553</pub-id></citation>
</ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mobiny</surname> <given-names>A.</given-names></name> <name><surname>Yuan</surname> <given-names>P.</given-names></name> <name><surname>Moulik</surname> <given-names>S. K.</given-names></name> <name><surname>Garg</surname> <given-names>N.</given-names></name> <name><surname>Wu</surname> <given-names>C. C.</given-names></name> <name><surname>Van Nguyen</surname> <given-names>H.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Dropconnect is effective in modeling uncertainty of Bayesian deep networks</article-title>. <source>Sci. Rep</source>. <volume>11</volume>, <fpage>1</fpage>&#x02013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1038/s41598-021-84854-x</pub-id><pub-id pub-id-type="pmid">33750847</pub-id></citation></ref>
<ref id="B30">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Narl&#x00131;</surname> <given-names>S. S.</given-names></name></person-group> (<year>2021</year>). <article-title>Impact of local histogram equalization on deep learning architectures for diagnosis of COVID-19 on chest X-rays</article-title>. <source>Manchester J. Artif. Intell. Appl. Sci</source>. 2. Available at: <ext-link ext-link-type="uri" xlink:href="https://mjaias.co.uk/mj-en/article/view/15">https://mjaias.co.uk/mj-en/article/view/15</ext-link></citation>
</ref>
<ref id="B31">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Neal</surname> <given-names>R. M.</given-names></name></person-group> (<year>2012</year>). <source>Bayesian Learning for Neural Networks, Volume 118</source>. <publisher-loc>Cham</publisher-loc>: <publisher-name>Springer Science &#x00026;Business Media</publisher-name>.</citation>
</ref>
<ref id="B32">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Novakovi&#x00107;</surname> <given-names>J. D.</given-names></name> <name><surname>Veljovi&#x00107;</surname> <given-names>A.</given-names></name> <name><surname>Ili&#x00107;</surname> <given-names>S. S.</given-names></name> <name><surname>Papi&#x00107;</surname> <given-names>&#x0017D;.</given-names></name> <name><surname>Milica</surname> <given-names>T.</given-names></name></person-group> (<year>2017</year>). <article-title>Evaluation of classification models in machine learning</article-title>. <source>Theory Appl. Math. Comput. Sci</source>. <volume>7</volume>, <fpage>39</fpage>&#x02013;<lpage>46</lpage>. Available at: <ext-link ext-link-type="uri" xlink:href="https://typeset.io/pdf/evaluation-of-classification-models-in-machine-learning-1u2pog86m5.pdf">https://typeset.io/pdf/evaluation-of-classification-models-in-machine-learning-1u2pog86m5.pdf</ext-link></citation>
</ref>
<ref id="B33">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rabiei</surname> <given-names>R.</given-names></name> <name><surname>Ayyoubzadeh</surname> <given-names>S. M.</given-names></name> <name><surname>Sohrabei</surname> <given-names>S.</given-names></name> <name><surname>Esmaeili</surname> <given-names>M.</given-names></name> <name><surname>Atashi</surname> <given-names>A.</given-names></name></person-group> (<year>2022</year>). <article-title>Prediction of breast cancer using machine learning approaches</article-title>. <source>J. Biomed. Phys. Eng.</source> <volume>12</volume>, <fpage>297</fpage>&#x02013;<lpage>308</lpage>. <pub-id pub-id-type="doi">10.31661/jbpe.v0i0.2109-1403</pub-id><pub-id pub-id-type="pmid">35698545</pub-id></citation></ref>
<ref id="B34">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ren</surname> <given-names>S.</given-names></name> <name><surname>He</surname> <given-names>K.</given-names></name> <name><surname>Girshick</surname> <given-names>R.</given-names></name> <name><surname>Sun</surname> <given-names>J.</given-names></name></person-group> (<year>2015</year>). <article-title>Faster R-CNN: towards real-time object detection with region proposal networks</article-title>. <source>Adv. Neural Inf. Process. Syst</source>. <volume>28</volume>, <fpage>1</fpage>&#x02013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1109/TPAMI.2016.2577031</pub-id><pub-id pub-id-type="pmid">27295650</pub-id></citation></ref>
<ref id="B35">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Schroff</surname> <given-names>F.</given-names></name> <name><surname>Kalenichenko</surname> <given-names>D.</given-names></name> <name><surname>Philbin</surname> <given-names>J.</given-names></name></person-group> (<year>2015</year>). <article-title>&#x0201C;Facenet: a unified embedding for face recognition and clustering,&#x0201D;</article-title> in <source>Proceedings of the IEEE conference on computer vision and pattern recognition</source> (<publisher-loc>Boston, MA</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>815</fpage>&#x02013;<lpage>823</lpage>. <pub-id pub-id-type="doi">10.1109/CVPR.2015.7298682</pub-id></citation>
</ref>
<ref id="B36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Spanhol</surname> <given-names>F. A.</given-names></name> <name><surname>Oliveira</surname> <given-names>L. S.</given-names></name> <name><surname>Petitjean</surname> <given-names>C.</given-names></name> <name><surname>Heutte</surname> <given-names>L.</given-names></name></person-group> (<year>2015</year>). <article-title>A dataset for breast cancer histopathological image classification</article-title>. <source>IEEE Trans. Biomed. Eng</source>. <volume>63</volume>, <fpage>1455</fpage>&#x02013;<lpage>1462</lpage>. <pub-id pub-id-type="doi">10.1109/TBME.2015.2496264</pub-id><pub-id pub-id-type="pmid">26540668</pub-id></citation></ref>
<ref id="B37">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Srivastava</surname> <given-names>N.</given-names></name> <name><surname>Hinton</surname> <given-names>G.</given-names></name> <name><surname>Krizhevsky</surname> <given-names>A.</given-names></name> <name><surname>Sutskever</surname> <given-names>I.</given-names></name> <name><surname>Salakhutdinov</surname> <given-names>R.</given-names></name></person-group> (<year>2014</year>). <article-title>Dropout: a simple way to prevent neural networks from overfitting</article-title>. <source>J. Mach. Learn. Res</source>. <volume>15</volume>, <fpage>1929</fpage>&#x02013;<lpage>1958</lpage>. Available at: <ext-link ext-link-type="uri" xlink:href="https://jmlr.org/papers/v15/srivastava14a.html">https://jmlr.org/papers/v15/srivastava14a.html</ext-link><pub-id pub-id-type="pmid">33259321</pub-id></citation></ref>
<ref id="B38">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sun</surname> <given-names>H.</given-names></name> <name><surname>Yu</surname> <given-names>H.</given-names></name> <name><surname>Shao</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Xing</surname> <given-names>L.</given-names></name> <name><surname>Zhang</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>An improved Adam&#x00027;s algorithm for stomach image classification</article-title>. <source>Algorithms</source> <volume>17</volume>:<fpage>272</fpage>. <pub-id pub-id-type="doi">10.3390/a17070272</pub-id></citation>
</ref>
<ref id="B39">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Thiagarajan</surname> <given-names>P.</given-names></name> <name><surname>Khairnar</surname> <given-names>P.</given-names></name> <name><surname>Ghosh</surname> <given-names>S.</given-names></name></person-group> (<year>2021</year>). <article-title>Explanation and use of uncertainty quantified by Bayesian neural network classifiers for breast histopathology images</article-title>. <source>IEEE Trans. Med. Imaging</source> <volume>41</volume>, <fpage>815</fpage>&#x02013;<lpage>825</lpage>. <pub-id pub-id-type="doi">10.1109/TMI.2021.3123300</pub-id><pub-id pub-id-type="pmid">34699354</pub-id></citation></ref>
<ref id="B40">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>S.</given-names></name> <name><surname>Fevens</surname> <given-names>T.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;Uncertainty quantification and estimation in medical image classification,&#x0201D;</article-title> in <source>Artificial Neural Networks and Machine Learning-ICANN 2021: 30th International Conference on Artificial Neural Networks, Bratislava, Slovakia, September 14-17, 2021, Proceedings, Part III 30</source> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>671</fpage>&#x02013;<lpage>683</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-030-86365-4_54</pub-id></citation>
</ref>
<ref id="B41">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Qiao</surname> <given-names>S.</given-names></name> <name><surname>Ji</surname> <given-names>S.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name></person-group> (<year>2020</year>). <article-title>Deepsite: bidirectional lstm and cnn models for predicting DNA-protein binding</article-title>. <source>Int. J. Mach. Learn. Cybern</source>. <volume>11</volume>, <fpage>841</fpage>&#x02013;<lpage>851</lpage>. <pub-id pub-id-type="doi">10.1007/s13042-019-00990-x</pub-id></citation>
</ref>
</ref-list>
</back>
</article>