<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Oncol.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Oncology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Oncol.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2234-943X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fonc.2025.1751090</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Explainable and uncertainty-aware ensemble framework with causal analysis for breast cancer detection</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Zaheer Sajid</surname><given-names>Muhammad</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3225626/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project-administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Fareed Hamid</surname><given-names>Muhammad</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Qureshi</surname><given-names>Imran</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project-administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Department of Electrical and Computer Engineering, George Mason University</institution>, <city>Fairfax</city>, <state>VA</state>,&#xa0;<country country="us">United States</country></aff>
<aff id="aff2"><label>2</label><institution>Department of Computer Software Engineering, Military College of Signals, National University of Sciences and Technology</institution>, <city>Islamabad</city>,&#xa0;<country country="pk">Pakistan</country></aff>
<aff id="aff3"><label>3</label><institution>College of Computer and Information Sciences, Imam Mohammad Ibn Saud Islamic University (IMSIU)</institution>, <city>Riyadh</city>,&#xa0;<country country="sa">Saudi Arabia</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Imran Qureshi, <email xlink:href="mailto:iqureshi@imamu.edu.sa">iqureshi@imamu.edu.sa</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-20">
<day>20</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>15</volume>
<elocation-id>1751090</elocation-id>
<history>
<date date-type="received">
<day>21</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>22</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>19</day>
<month>12</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Zaheer Sajid, Fareed Hamid and Qureshi.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Zaheer Sajid, Fareed Hamid and Qureshi</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-20">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Breast cancer is one of the main causes of cancer deaths around the world and is known for its aggressive growth and ability to spread. While machine learning has shown good results for diagnosis, most existing methods do not handle uncertainty or explain their predictions clearly. In this study, we present an integrated framework that combines uncertainty-aware ensemble learning with causal feature analysis and multimodal explainability for breast cancer prediction. The framework uses a mix of Light Gradient Boosting Machine (LightGBM), random forest, and gradient boosting classifiers that include uncertainty estimation so that the model can mark predictions that are less confident. It also applies causal analysis to detect possible clinical confounders and uses SHAP (Shapley Additive Explanations), permutation importance, and feature attribution for interpretation. Tests on two public datasets showed strong and consistent performance. On the UCTH Clinical Dataset, the model reached an area under the curve (AUC) of 0.97%, an accuracy of 0.95%, and an F1 score of 0.94%, with 100% precision for high confidence cases and no false positives. On the Breast Cancer Wisconsin dataset, it achieved an AUC of 0.99, an accuracy of 0.94%, and an F1 score of 0.92%, which increased to 0.98% accuracy and 0.98% F1 score when only certain predictions were considered. Causal analysis pointed out important clinical confounders like lymph node involvement, tumor size, and metastasis, while fairness tests showed balanced results across demographic groups. Overall, the framework combines uncertainty estimation and causal interpretability to give predictions that are both accurate and trustworthy. It provides clinicians with clear confidence levels for every prediction and supports transparent decision-making that can reduce diagnostic errors and improve reliability in clinical use.</p>
</abstract>
<kwd-group>
<kwd>breast cancer prediction</kwd>
<kwd>causal interpretability</kwd>
<kwd>clinical decision support</kwd>
<kwd>ensemble learning</kwd>
<kwd>SHAP explainability</kwd>
<kwd>uncertainty quantification</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for&#xa0;this work and/or its publication. This work was supported and funded by the Deanship of Scientific Research at Imam Mohammad Ibn Saud Islamic University (IMSIU) (grant number IMSIU-DDRSP2601).</funding-statement>
</funding-group>
<counts>
<fig-count count="12"/>
<table-count count="10"/>
<equation-count count="12"/>
<ref-count count="20"/>
<page-count count="18"/>
<word-count count="9092"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Breast Cancer</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Breast cancer is a severe disease. It happens when abnormal cells in the breast grow out of control (<xref ref-type="bibr" rid="B1">1</xref>). It is the second most common cancer found in women after skin cancer. In 2022, approximately 2.3 million women in the world were diagnosed with breast cancer, and approximately 670,000 died from it (<xref ref-type="bibr" rid="B2">2</xref>). Primarily, it affects women, but in some rare cases, men can also have breast cancer. According to abnormal cell growth, breast cancer is divided into two types: invasive and non-invasive (<xref ref-type="bibr" rid="B3">3</xref>).</p>
<p>The main risk factors of breast cancer include aging, genetic hereditary mutation, hormonal changes, lifestyle habits, and environmental factors (<xref ref-type="bibr" rid="B2">2</xref>). If not treated, the disease spreads to other organs and causes multiple organ failure and death (<xref ref-type="bibr" rid="B4">4</xref>). Early detection increases the survival rate of patients (<xref ref-type="bibr" rid="B1">1</xref>&#x2013;<xref ref-type="bibr" rid="B3">3</xref>). The common symptoms of breast cancer are swelling or a lump in the breast, chest, or underarm, bloody discharge from the nipple, continuous pain in the breast, and a change in the size or shape of the breast (<xref ref-type="bibr" rid="B5">5</xref>).</p>
<p>Mammography is the most common screening method for breast cancer. It helps to find breast density, calcifications, structural problems, and tumor masses (<xref ref-type="bibr" rid="B6">6</xref>). Other diagnostic tests include magnetic resonance imaging (MRI), positron emission tomography (PET) scans, computed tomography (CT) scans, biopsy, gene testing, and estrogen or progesterone receptor tests. Treatment methods include surgery, chemotherapy, radiation therapy, hormone therapy, targeted therapy, and immunotherapy (<xref ref-type="bibr" rid="B4">4</xref>). Humans cannot control genetic factors, but a healthy lifestyle can reduce the chances of breast cancer (<xref ref-type="bibr" rid="B7">7</xref>). The preventive steps include less use of alcohol and smoking, keeping body weight normal, doing regular exercise, breastfeeding, and not using hormone therapy after menopause.</p>
<p>Even with advanced diagnostic technologies, some problems persist, including low imaging resolution, the inability to detect minor symptoms, high procedure costs, and slow diagnosis results (<xref ref-type="bibr" rid="B8">8</xref>). According to the National Institutes of Health (NIH), human error is the main reason for almost 96.3% of diagnostic failures (<xref ref-type="bibr" rid="B9">9</xref>). The use of machine learning (ML) can solve many of these problems. ML can learn patterns from a large amount of data, and it is helpful for disease prediction and early detection. Many studies show that ML can help doctors diagnose breast cancer correctly. By checking medical data, ML can improve medical decisions, reduce human mistakes, and make diagnoses and treatments faster (<xref ref-type="bibr" rid="B9">9</xref>).</p>
<p>Explainable artificial intelligence (XAI) is the method that makes ML results easy to understand for humans (<xref ref-type="bibr" rid="B10">10</xref>). XAI not only checks model predictions but also enhances the reliability, stability, and transparency of models, helping to identify and correct errors. Much research has been done on the use of ML and XAI for breast cancer detection and classification. One research used the Wisconsin Diagnostic Breast Cancer (WDBC) dataset, which has 569 records with 32 features, and compared three ML algorithms for breast cancer detection. The k-Nearest Neighbors (kNN) algorithm achieved the highest accuracy of 95.9%, which demonstrates its strong performance in classification (<xref ref-type="bibr" rid="B11">11</xref>). Another research used the same dataset but focused on five main features and found that support vector classifier (SVC) gave better result with 93% accuracy (<xref ref-type="bibr" rid="B12">12</xref>). One more research used the Mendeley dataset, which has 400 Indonesian patient cases, in which 200 were patients with breast cancer, and found that the XGBoost model gave 85% accuracy. The SHAP (Shapley Additive Explanations) and CRISP MLQ frameworks were used to make ML results more understandable and to check the quality of the model (<xref ref-type="bibr" rid="B13">13</xref>).</p>
<p>An ensemble model that used support vector machine (SVM) and random forest (RF) algorithms got a very high accuracy of 99.99% (<xref ref-type="bibr" rid="B14">14</xref>). Another research, based on data from 500 patients at Dhaka Medical College Hospital in Bangladesh, used five ML algorithms and found that XGBoost yielded the best performance with 97% accuracy. The use of SHAP made the model clearer and increased trust in the results (<xref ref-type="bibr" rid="B15">15</xref>). One additional study utilized a pre-trained ResNet50 model to classify breast tumor images as benign or malignant, achieving 96.84% accuracy. The dataset of this study was taken from the Kaggle repository, and it has 7,909 breast tumor images of 82 patients collected through surgical open biopsy (<xref ref-type="bibr" rid="B16">16</xref>).</p>
<p>Despite the notable progress of ML and XAI techniques in breast cancer diagnosis, a critical methodological gap remains. Most existing ML/XAI approaches primarily focus on improving predictive accuracy and providing <italic>post-hoc</italic> explanations, while neglecting three key aspects required for reliable clinical deployment. First, current models rarely incorporate explicit uncertainty estimation, resulting in overconfident predictions that fail to indicate when model outputs may be unreliable. Second, widely used explanation techniques, such as SHAP and LIME, are primarily correlation-based and do not distinguish between causal clinical factors and spurious associations, thereby limiting their ability to identify proper diagnostic drivers and potential confounders. Third, fairness assessment across demographic or temporal patient subgroups is often overlooked, raising concerns about biased or inconsistent performance in real-world settings. Importantly, these limitations are typically addressed in isolation, if at all, and no unified framework jointly integrates uncertainty awareness, causal reasoning, and fairness analysis within a single diagnostic pipeline. This gap restricts the clinical trustworthiness, transparency, and ethical applicability of existing breast cancer prediction systems.</p>
<p>To address these problems, this research presents a new Uncertainty-Aware Causal Explainable Ensemble Framework for breast cancer prediction. The proposed method integrates uncertainty estimation, causal feature analysis, and multimodal explainability into a single ensemble model. By giving predictions with information of when the model is not sure, the framework improves the reliability, understanding, and fairness of diagnosis, and makes a base for trusted clinical decision support in real healthcare use.</p>
<sec id="s1_1">
<label>1.1</label>
<title>Research motivation</title>
<p>Breast cancer is still one of the biggest health problems in the world, and every year, many people die from it despite the fast progress in diagnostic imaging and molecular testing. Modern diagnostic methods like mammography MRI and histopathological test give useful information, but their performance is limited because of human mistakes, high cost, and less availability in low-resource areas. Because of this, there is a growing need for automatic and intelligent diagnostic systems that can help doctors detect cancer fast and correctly. ML and XAI are new solutions that can learn complex clinical patterns from data and give understandable predictions to support medical decision-making.</p>
<p>Nevertheless, a significant gap persists between model accuracy and clinical reliability. Existing ML models often exhibit overconfidence in uncertain scenarios, lack mechanisms to convey prediction reliability, and ignore causal relationships among medical features. These shortcomings undermine the trust, clarity, and ethical deployment of models in real clinical settings. Moreover, many predictive models fail to assess fairness across different patient groups, risking biases related to age, ethnicity, or timing of diagnosis. To foster trust in artificial intelligence (AI) for healthcare, predictive systems must not only prioritize accuracy but also incorporate uncertainty estimation, causal reasoning, interpretability, and fairness assessment.</p>
<p>Driven by these gaps, this research aims to develop an integrated diagnostic framework that bridges advanced ensemble learning with clinical accountability. The proposed Uncertainty-Aware Causal Explainable Ensemble Framework is designed to deliver not only high-accuracy predictions but also explicit uncertainty indicators, interpretable explanations, and fair performance across all patient subgroups. By embedding uncertainty quantification and causal feature analysis into the prediction pipeline, this work seeks to narrow the divide between algorithmic performance and clinical trust, paving the way for reliable, transparent, and ethically aligned AI systems in breast cancer prediction.</p>
</sec>
<sec id="s1_2">
<label>1.2</label>
<title>Research contribution</title>
<p>This research presents an integrated uncertainty-aware and causally explainable ensemble framework for breast cancer prediction that focuses on diagnostic reliability and clinical transparency. The main contributions are as follows:</p>
<list list-type="order">
<list-item>
<p>Developed a mixed ensemble of Light Gradient Boosting Machine (LightGBM), RF, and gradient boosting models to calculate epistemic uncertainty and produce reliable and qualified predictions.</p></list-item>
<list-item>
<p>Added causal inference by using Cramer&#x2019;s <italic>V</italic> and point biserial correlation to find confounding clinical factors and make sure the feature relations are causally correct.</p></list-item>
<list-item>
<p>Combined SHAP values&#x2019; permutation importance and model-based importance to give a clear and multi-view understanding of diagnostic predictions.</p></list-item>
<list-item>
<p>Used bootstrap-based confidence intervals and fairness difference metrics to check model stability and fair performance for different demographic groups.</p></list-item>
<list-item>
<p>The proposed research achieved 0.95 area under the curve (AUC) and 97.0% accuracy on certain predictions with 100% precision, which makes a clinically reliable and trusted diagnostic support system.</p></list-item>
</list>
<p>Overall, this research reduces the gap between prediction accuracy and clinical trust by combining uncertainty calculation causal reasoning and fairness-based explainability in one diagnostic AI framework.</p>
</sec>
</sec>
<sec id="s2">
<label>2</label>
<title>Dataset description</title>
<p>The experimental testing was carried out by using two datasets: one is the UCTH Breast Cancer Clinical Dataset (<xref ref-type="bibr" rid="B17">17</xref>) and the other is the Breast Cancer Wisconsin Diagnostic Dataset (<xref ref-type="bibr" rid="B18">18</xref>). The use of these two datasets gives both clinical and morphological validation of the proposed framework.</p>
<sec id="s2_1">
<label>2.1</label>
<title>Clinical dataset</title>
<p>The Breast Cancer Clinical Dataset is a structured table dataset that includes demographic pathological and treatment-related features important for breast cancer diagnosis. The dataset has a total of 213 patient records collected from verified clinical sources, and each record is labeled with a binary value that shows the presence (malignant = 1) or absence (benign = 0) of cancer.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Data composition</title>
<p>The dataset has 16 main features that show tumor characteristics, lymph node conditions, and patient demographics. After preprocessing and adding time-related features, the total features increased to 20 predictors. <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref> provides an overview of the key attributes. <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref> shows the UCTH Breast Cancer dataset comprehensive overview. <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref> shows the UCTH Breast Cancer dataset detailed feature analysis.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Overview of breast cancer clinical dataset attributes.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Category</th>
<th valign="middle" align="left">Features</th>
<th valign="middle" align="center">Type</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left">Age, Age_bin, Menopause</td>
<td valign="middle" align="center">Num./Cat.</td>
</tr>
<tr>
<td valign="middle" align="left">Demo.</td>
<td valign="middle" align="left">Tumor Size, Inv-Nodes, Node-Caps</td>
<td valign="middle" align="center">Num./Cat.</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left">Deg-Malig (Grade)</td>
<td valign="middle" align="center">Ord.</td>
</tr>
<tr>
<td valign="middle" align="left">Tumor Char.</td>
<td valign="middle" align="left">Breast, Breast-Quad</td>
<td valign="middle" align="center">Cat.</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left">Irradiation, History</td>
<td valign="middle" align="center">Bin.</td>
</tr>
<tr>
<td valign="middle" align="left">Clin. Hist. Tempora</td>
<td valign="middle" align="left">Diagnosis Era, Recency, Age&#x2013;Era Int.</td>
<td valign="middle" align="center">Der. Num.</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p><italic>Target:</italic> Class (0 = Benign, 1 = Malignant).</p></fn>
</table-wrap-foot>
</table-wrap>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>UCTH Breast Cancer dataset comprehensive overview.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1751090-g001.tif">
<alt-text content-type="machine-generated">Five visualizations provide an overview of a breast cancer dataset. The pie chart shows the target variable distribution: 56.3% benign and 43.7% malignant. The correlation heatmap highlights feature interrelations, with strong correlations in red. A bar chart details S/N distribution between two classes. The missing values heatmap illustrates gaps across variables. A chart shows the distribution of data types: float, int, and categorical. Dataset statistics reveal 213 samples, 16 features, including 12 numerical and 4 categorical, with a memory usage of 0.1 megabytes and 205 complete cases.</alt-text>
</graphic></fig>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>UCTH Breast Cancer dataset detailed feature analysis.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1751090-g002.tif">
<alt-text content-type="machine-generated">Nine violin plots show the distribution of features by target class, comparing Class 0 and Class 1. Features include SN, Year, Age, Menopause, Tumor Size (cm), Inv-Nodes, Metastasis, History, and years_since_first_record. Each plot displays the data spread, with average values marked.</alt-text>
</graphic></fig>
<p>There was a total of 213 samples, of which 120 (56.3%) were benign and 93 (43.7%) were malignant. The dataset was randomly divided into training (70%, <italic>n</italic> = 149) and testing (30%, <italic>n</italic> = 64) parts with the same class ratio. The training data have 84 benign and 65 malignant samples, and the testing data have 36 benign and 28 malignant samples.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Preprocessing and quality assurance</title>
<p>Data preprocessing included filling missing values by using the statistical methods&#x2019; mean for numerical variables and mode for categorical variables. All categorical variables were converted by one-hot encoding and numerical features were scaled to zero mean and unit variance. Time-related feature addition was performed to make the dataset richer with attributes like diagnostic era and recency, which show clinical change with time.</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Benchmark dataset: Breast Cancer Wisconsin (Diagnostic)</title>
<p>Along with the clinical dataset, this research also used the Breast Cancer Wisconsin Diagnostic dataset, which is a public benchmark dataset mostly used for breast cancer classification. It has 569 biopsy samples taken from digital fine needle aspirate (FNA) images of breast tissue, and each sample is labeled as malignant (M) or benign (B). The dataset has 30 numerical features that describe the shape and structure of cell nuclei, like radius, texture, perimeter, area, compactness, concavity, and symmetry calculated with three statistical forms: mean, standard error, and worst values. These attributes capture both global and localized shape irregularities and support strong morphological analysis of tumor malignancy as summarized in <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>. <xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref> shows the Breast Cancer Wisconsin dataset comprehensive overview. <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref> shows the Breast Cancer Wisconsin dataset detailed feature analysis.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Overview of breast cancer Wisconsin (diagnostic) dataset attributes.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Category</th>
<th valign="middle" align="left">Feature examples</th>
<th valign="middle" align="center">Count/Type</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Mean features</td>
<td valign="middle" align="left">Radius_mean,<break/>Texture_mean, Area_mean</td>
<td valign="middle" align="center">10/Num.</td>
</tr>
<tr>
<td valign="middle" align="left">SE features</td>
<td valign="middle" align="left">Radius_se, Texture_se,<break/>Compactness_se</td>
<td valign="middle" align="center">10/Num.</td>
</tr>
<tr>
<td valign="middle" align="left">Worst features</td>
<td valign="middle" align="left">Radius_worst, Area_worst,<break/>Concavity_worst</td>
<td valign="middle" align="center">10/Num.</td>
</tr>
<tr>
<td valign="middle" align="left">Target variable</td>
<td valign="middle" align="left">Diagnosis (M = malignant,<break/>B = benign)</td>
<td valign="middle" align="center">1/Cat.</td>
</tr>
<tr>
<td valign="middle" align="left">Total samples</td>
<td valign="middle" align="left">569 (357 benign, 212 malignant)</td>
<td valign="middle" align="center">&#x2013;</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Breast Cancer Wisconsin dataset comprehensive overview.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1751090-g003.tif">
<alt-text content-type="machine-generated">Comprehensive overview of the Breast Cancer Wisconsin dataset. Top left: pie chart shows target variable distribution with 62.7% benign and 37.3% malignant cases. Top middle: heatmap of feature correlations, highlighting strong relationships like concave points and perimeter. Top right: histogram of radius mean distribution by class, showing overlap between Class 0 and Class 1. Bottom left: heatmap of missing values indicating minimal data loss. Bottom middle: bar chart illustrates feature data types, predominantly float64. Bottom right: dataset statistics box with details such as 569 samples and 36 features.</alt-text>
</graphic></fig>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Breast Cancer Wisconsin dataset detailed feature analysis.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1751090-g004.tif">
<alt-text content-type="machine-generated">Violin plots showing detailed feature analysis for different attributes distributed by target class. Each plot compares distributions for Class 0 and Class 1 across features such as SN, Year, Age, Menopause, Tumor Size, Inv-Nodes, Metastasis, History, and Years Since First Record. Red lines indicate average values for each class.</alt-text>
</graphic></fig>
<p>All features are continuous except for the diagnostic label, and the data are clean, well-normalized, and ready for supervised learning. This dataset supports the clinical dataset by adding high-resolution morphological data that improve model generalization and help in cross domain diagnostic validation.</p>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>Ethical and clinical considerations</title>
<p>Both datasets have fully anonymous patient information and follow ethical and data protection rules suitable for secondary research. The clinical dataset was taken from open access medical sources and the proposed method can also be used with institutional datasets after ethical approval. The selected features especially Tumor Size, Inv-Nodes, and Menopausal Status are closely related to standard oncological indicators, which keep the developed framework clinically relevant and understandable.</p>
<p>Overall, the combined use of these datasets makes a balanced and clinically representative base for testing the proposed Uncertainty-Aware Causal Explainable Ensemble Framework. The clinical dataset gives real-world contextual features and the Wisconsin dataset adds strong morphological accuracy; together, they provide complete testing of performance uncertainty and explainability under different clinical data conditions.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Proposed methodology</title>
<p>This research presents an Uncertainty-Aware Causal Explainable Ensemble Framework for breast cancer prediction, which includes uncertainty calculation causal feature analysis multimodal explainability and fairness checking in one reproducible structure. <xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref> shows the complete workflow of the proposed framework, which includes the step-by-step process of preprocessing feature engineering model development uncertainty estimation causal analysis and fairness evaluation.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Workflow of the proposed uncertainty-aware causal&#x2013;explainable ensemble framework for breast cancer prediction.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1751090-g005.tif">
<alt-text content-type="machine-generated">Flowchart depicting a data processing pipeline. Steps include: Data Loading &amp; Cleaning, Temporal Feature Engineering, Data Preprocessing, Causal Feature Analysis, Uncertainty-Aware Ensemble, Explainability Layer, Statistical Validation, Fairness Assessment, and Artifact Generation &amp; Inference. Arrows indicate the progression through these stages.</alt-text>
</graphic></fig>
<p>The methodology started with data preprocessing in which the clinical breast cancer dataset was checked for consistency and completeness. Missing values were filled by using statistical estimators; categorical variables were changed by one-hot encoding and numerical features were standardized for balanced scaling. To include time-based changes, a Temporal Feature Engineering module created extra features like diagnosis era years since first record and age&#x2013;era interaction that help the model learn from long-term diagnostic variations and time-based clinical trends.</p>
<p>An Uncertainty Aware Ensemble Model was developed, which used three different classifiers&#x2014;LightGBM, RF, and gradient boosting&#x2014;to take advantage of different learning biases. Each classifier gave probability outputs, and these outputs were combined to calculate the mean predictive probability of all models. The standard deviation of these probabilities was used to measure epistemic uncertainty, which shows the disagreement between ensemble models. Predictions with uncertainty higher than threshold (<italic>&#x3c4;</italic> = 0.15) were marked as uncertain to produce qualified predictions that focus on reliability more than coverage.</p>
<p>For the ensemble, we chose LightGBM, RF, and gradient boosting because they provide complementary strengths and work well on small to medium clinical tabular datasets. RF reduces variance, gradient boosting learns error correcting patterns, and LightGBM offers fast histogram-based learning with strong support for mixed feature types. These models also produce stable probability outputs, which help with uncertainty estimation and SHAP-based interpretability. We tested other models like XGBoost and simple neural networks, but XGBoost showed higher variance and weaker uncertainty calibration, while neural networks required more data and tuning. The three selected models offered the best balance of accuracy, stability, interpretability, and efficiency, making them suitable for clinical deployment.</p>
<p>To find statistically important and causally related variables, a Causal Feature Analyzer was used, which measured the relation between input features and diagnostic results by using Cramer&#x2019;s <italic>V</italic> for categorical features and point biserial correlation for continuous features. This analysis showed that Invasive Nodes (Inv Nodes) and Tumor Size are the main clinical factors that affect malignancy. A Robust Multimodal Explainer was also added to improve interpretability and clarity by combining information from SHAP permutation importance and model feature importance. When SHAP calculation was not possible, a permutation-based backup was used to keep interpretability working and reliable for all samples.</p>
<p>Model reliability and generalization were tested by using bootstrap-based confidence interval estimation with 500 to 1,000 iterations for performance measures like accuracy, precision, recall, F1 score, and AUC. A Fairness Assessment Module checked predictive equality for different demographic groups like age and diagnostic era and measured the difference in performance to make sure that the model works fairly. Overall, this framework combines ensemble learning with causal and explainable AI ideas to achieve prediction performance interpretability and fairness suitable for clinical use.</p>
</sec>
<sec id="s4">
<label>4</label>
<title>Proposed architecture</title>
<p>The proposed architecture is a modular uncertainty-aware and causally explainable ensemble designed to provide diagnostic accuracy and clinical trust. As shown in <xref ref-type="fig" rid="f6"><bold>Figure&#xa0;6</bold></xref>, the framework has five connected layers: (1) Data preprocessing and feature engineering, (2) Base ensemble learning, (3) Uncertainty quantification layer, (4) Causal explainability layer, and (5) Statistical fairness validation. Each part gives its own function to make a trusted AI-based breast cancer prediction system. The system begins with a structured dataset <inline-formula>
<mml:math display="inline" id="im1"><mml:mi mathvariant="script">D</mml:mi></mml:math></inline-formula>:</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Proposed architecture of the Uncertainty-Aware Causal&#x2013;Explainable Ensemble Framework for breast cancer prediction. The architecture combines preprocessing temporal feature engineering ensemble uncertainty calculation causal explainability fusion and fairness checking in one diagnostic pipeline.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1751090-g006.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a model interpretability workflow. The process involves data acquisition, preprocessing, and feature enrichment, creating a prepared feature set. An uncertainty-aware ensemble model evaluates uncertainty and feature influence, generating qualified predictions. Performance analysis and bias detection ensure validated metrics and fairness assessment. The causal feature analyzer contributes to explainable predictions using a robust multi-modal explainer. Final outputs include diagnostic insights, reliable predictions, causal and explainable results, and clinician decision support. Connections between elements show process flow and relationships.</alt-text>
</graphic></fig>
<disp-formula id="eq1"><label>(1)</label>
<mml:math display="block" id="M1"><mml:mrow><mml:mi mathvariant="script">D</mml:mi><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mo>{</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>}</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>N</mml:mi></mml:msubsup><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im2"><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mi>d</mml:mi></mml:msup></mml:mrow></mml:math></inline-formula> is the <italic>i</italic>th input vector containing demographic, pathological, and clinical features, and <inline-formula>
<mml:math display="inline" id="im3"><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2208;</mml:mo><mml:mo>{</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula> represents the ground truth label (0 = benign, 1 = malignant). Data normalization is applied to maintain statistical consistency:</p>
<disp-formula id="eq2"><label>(2)</label>
<mml:math display="block" id="M2"><mml:mrow><mml:msup><mml:mi>x</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>x</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mi>&#x3bc;</mml:mi></mml:mrow><mml:mi>&#x3c3;</mml:mi></mml:mfrac><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <italic>&#xb5;</italic> and <italic>&#x3c3;</italic> denote the mean and standard deviation of each feature, respectively.</p>
<p>1. Temporal feature augmentation: To model diagnostic changes, a temporal encoder creates time-based attributes that represent diagnosis period recency and the relation between age and diagnostic period. These features help the model to learn generational and time-dependent changes in breast cancer characteristics.</p>
<p>2. Heterogeneous ensemble learning: The model ensemble consists of three classifiers&#x2014;LightGBM, RF, and gradient boosting&#x2014;chosen for their complementary learning biases and robustness to clinical data heterogeneity. Each base learner <italic>M<sub>k</sub></italic> outputs a class probability <inline-formula>
<mml:math display="inline" id="im4"><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>|</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> for the <italic>i</italic>th sample. The ensemble mean probability and epistemic uncertainty are computed as:</p>
<disp-formula id="eq3"><label>(3)</label>
<mml:math display="block" id="M3"><mml:mrow><mml:msub><mml:mrow><mml:mover><mml:mi>p</mml:mi><mml:mi>&#xaf;</mml:mi></mml:mover></mml:mrow><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>K</mml:mi></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>K</mml:mi></mml:munderover><mml:msub><mml:mi>p</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>|</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq4"><label>(4)</label>
<mml:math display="block" id="M4"><mml:mrow><mml:msub><mml:mi>u</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msqrt><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mi>K</mml:mi></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>K</mml:mi></mml:munderover><mml:mrow><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>|</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mover accent="true"><mml:mi>p</mml:mi><mml:mo stretchy="true">&#xaf;</mml:mo></mml:mover><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mn>2</mml:mn></mml:msup><mml:mo>,</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:msqrt></mml:mrow></mml:math>
</disp-formula>
<p>where <italic>K</italic> = 3 denotes the number of models. The final prediction rule incorporates an uncertainty threshold <italic>&#x3c4;</italic>:</p>
<disp-formula id="eq5"><label>(5)</label>
<mml:math display="block" id="M5"><mml:mrow><mml:msub><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mo>{</mml:mo><mml:mtable columnalign="left" equalrows="true" equalcolumns="true"><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mn>1</mml:mn><mml:mo>,</mml:mo></mml:mrow></mml:mtd><mml:mtd columnalign="left"><mml:mrow><mml:mtext>if&#x2004;</mml:mtext><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>p</mml:mi><mml:mo stretchy="true">&#xaf;</mml:mo></mml:mover></mml:mrow><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2265;</mml:mo><mml:mn>0.5</mml:mn><mml:mtext>&#x2004;and&#x2004;</mml:mtext><mml:msub><mml:mi>u</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2264;</mml:mo><mml:mi>&#x3c4;</mml:mi><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo></mml:mrow></mml:mtd><mml:mtd columnalign="left"><mml:mrow><mml:mtext>if&#x2004;</mml:mtext><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>p</mml:mi><mml:mo stretchy="true">&#xaf;</mml:mo></mml:mover></mml:mrow><mml:mi>i</mml:mi></mml:msub><mml:mo>&lt;</mml:mo><mml:mn>0.5</mml:mn><mml:mtext>&#x2004;and&#x2004;</mml:mtext><mml:msub><mml:mi>u</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2264;</mml:mo><mml:mi>&#x3c4;</mml:mi><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mtext>uncertain</mml:mtext><mml:mo>,</mml:mo></mml:mrow></mml:mtd><mml:mtd columnalign="left"><mml:mrow><mml:mtext>if&#x2004;</mml:mtext><mml:msub><mml:mi>u</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&gt;</mml:mo><mml:mi>&#x3c4;</mml:mi><mml:mo>.</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>This mechanism ensures that predictions marked as <italic>uncertain</italic> are deferred for clinical review, safeguarding against overconfident misclassifications.</p>
<p>3. Causal feature analysis: To enhance interpretability, causal inference techniques identify confounding variables that may distort model learning. Categorical relationships are quantified using Cramer&#x2019;s <italic>V</italic>:</p>
<disp-formula id="eq6"><label>(6)</label>
<mml:math display="block" id="M6"><mml:mrow><mml:mi>V</mml:mi><mml:mo>=</mml:mo><mml:msqrt><mml:mrow><mml:mfrac><mml:mrow><mml:msup><mml:mi>&#x3c7;</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo stretchy="false">/</mml:mo><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>min</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mi>r</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mfrac></mml:mrow></mml:msqrt><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>while continuous&#x2013;binary associations are captured by the point-biserial coefficient:</p>
<disp-formula id="eq7"><label>(7)</label>
<mml:math display="block" id="M7"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mi>b</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>x</mml:mi><mml:mo stretchy="true">&#xaf;</mml:mo></mml:mover></mml:mrow><mml:mn>1</mml:mn></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>x</mml:mi><mml:mo stretchy="true">&#xaf;</mml:mo></mml:mover></mml:mrow><mml:mn>0</mml:mn></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>x</mml:mi></mml:msub></mml:mrow></mml:mfrac><mml:msqrt><mml:mrow><mml:mfrac><mml:mrow><mml:msub><mml:mi>n</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:msub><mml:mi>n</mml:mi><mml:mn>0</mml:mn></mml:msub></mml:mrow><mml:mrow><mml:msup><mml:mi>n</mml:mi><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:mfrac></mml:mrow></mml:msqrt><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>Features with |<italic>r<sub>pb</sub></italic>|<italic>&gt;</italic> 0.1 or <italic>V &gt;</italic> 0.1 are considered potential confounders, reflecting significant diagnostic influence.</p>
<p>4. Multimodal explainability integration: This layer fuses diverse interpretability techniques SHAP, permutation importance, and model-native feature importance to deliver consistent, clinician-understandable insights. The SHAP contribution for each feature <italic>x<sub>j</sub></italic> is expressed as:</p>
<disp-formula id="eq8"><label>(8)</label>
<mml:math display="block" id="M8"><mml:mrow><mml:msub><mml:mi>&#x3d5;</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi mathvariant="double-struck">E</mml:mi><mml:mrow><mml:mi>S</mml:mi><mml:mo>&#x2286;</mml:mo><mml:mi>F</mml:mi><mml:mo>\</mml:mo><mml:mo>{</mml:mo><mml:mi>j</mml:mi><mml:mo>}</mml:mo></mml:mrow></mml:msub><mml:mo stretchy="false">[</mml:mo><mml:mtext>&#xa0;</mml:mtext><mml:msub><mml:mi>f</mml:mi><mml:mrow><mml:mi>S</mml:mi><mml:mo>&#x222a;</mml:mo><mml:mo>{</mml:mo><mml:mi>j</mml:mi><mml:mo>}</mml:mo></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>S</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mi>S</mml:mi></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>S</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">]</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>and permutation-based relevance as:</p>
<disp-formula id="eq9"><label>(9)</label>
<mml:math display="block" id="M9"><mml:mrow><mml:msubsup><mml:mi>I</mml:mi><mml:mi>j</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mtext>AUC</mml:mtext></mml:mrow><mml:mrow><mml:mi>b</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>l</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mrow><mml:mtext>AUC</mml:mtext></mml:mrow><mml:mrow><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>j</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:math>
</disp-formula>
<p>The consensus importance metric integrates all three views:</p>
<disp-formula id="eq10"><label>(10)</label>
<mml:math display="block" id="M10"><mml:mrow><mml:msubsup><mml:mi>I</mml:mi><mml:mi>j</mml:mi><mml:mrow><mml:mi>f</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>3</mml:mn></mml:mfrac><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>I</mml:mi><mml:mi>j</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msubsup><mml:mi>I</mml:mi><mml:mi>j</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:mo>|</mml:mo><mml:msub><mml:mi>&#x3d5;</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>|</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>5. Statistical and fairness validation: The reliability of the ensemble is established using bootstrap-based confidence intervals:</p>
<disp-formula id="eq11"><label>(11)</label>
<mml:math display="block" id="M11"><mml:mrow><mml:mi>C</mml:mi><mml:msub><mml:mi>I</mml:mi><mml:mrow><mml:mn>95</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>M</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mn>2.5</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mi>M</mml:mi><mml:mo>*</mml:mo></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mn>97.5</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mi>M</mml:mi><mml:mo>*</mml:mo></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">]</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <italic>M</italic><sup>&#x2217;</sup> represents bootstrap-resampled metric estimates. Fairness is evaluated by comparing subgroup performance:</p>
<disp-formula id="eq12"><label>(12)</label>
<mml:math display="block" id="M12"><mml:mrow><mml:msub><mml:mtext>&#x394;</mml:mtext><mml:mrow><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:munder><mml:mrow><mml:mi>max</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mo>&#x2208;</mml:mo><mml:msub><mml:mi>G</mml:mi><mml:mi>s</mml:mi></mml:msub></mml:mrow></mml:munder><mml:mo>&#xa0;</mml:mo><mml:mi>A</mml:mi><mml:mi>c</mml:mi><mml:msub><mml:mi>c</mml:mi><mml:mi>g</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:munder><mml:mrow><mml:mi>min</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mo>&#x2208;</mml:mo><mml:msub><mml:mi>G</mml:mi><mml:mi>s</mml:mi></mml:msub></mml:mrow></mml:munder><mml:mo>&#xa0;</mml:mo><mml:mi>A</mml:mi><mml:mi>c</mml:mi><mml:msub><mml:mi>c</mml:mi><mml:mi>g</mml:mi></mml:msub><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>ensuring equitable outcomes across demographic groups such as age and diagnostic era.</p>
<p>The proposed architecture makes a closed-loop diagnostic intelligent system where uncertainty interpretability and fairness are the main design principles. Each module helps to produce clinically meaningful and confidence-aware predictions that follow the needs of trusted AI- and evidence-based medical decision-making.</p>
<p>The integration of these parts allows the framework to work as a transparent and self-evaluating diagnostic system. By combining uncertainty calculation causal reasoning and explainability fusion, the model not only predicts breast cancer outcomes but also explains its decisions in a clinically understandable way. This complete design makes sure that each diagnostic result has measurable confidence and fairness indicators that build trust reproducibility and accountability in AI-based clinical decision-making. The modular structure also allows easy connection with different clinical datasets and changing diagnostic protocols without retraining the whole model. The ability of the architecture to measure uncertainty while keeping explainability supports clinical triage model validation and decision support. In the end, the proposed system gives a base for future use of reliable human-aligned and regulation-ready AI tools in medical diagnostics. The proposed uncertainty-aware causal-explainable ensemble is described in <xref ref-type="statement" rid="algo1">Algorithm 1</xref> for training and <xref ref-type="statement" rid="algo2">Algorithm 2</xref> for deployment-time inference with uncertainty estimation and explanation.</p>
<statement content-type="algorithm" id="algo1">
<label>Algorithm 1</label>
<p><graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1751090-t011.tif"/></p>
</statement>
<statement content-type="algorithm" id="algo2">
<label>Algorithm 2</label>
<p><graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1751090-t012.tif"/></p>
</statement>
</sec>
<sec id="s5" sec-type="results">
<label>5</label>
<title>Results and discussion</title>
<p>The performance and explainability of the proposed Uncertainty-Aware Causal Explainable Ensemble Framework were fully tested in many aspects like prediction strength uncertainty calculation causal interpretability explainability fairness and clinical importance. The testing used two datasets together to make sure of clinical realism and experimental generalization.</p>
<p>The first dataset, the <italic>Breast Cancer Clinical Dataset</italic> (see <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>), has 213 patient records that include demographic pathological and time-related variables. After preprocessing and adding time-based features, the dataset has 20 engineered predictors. The data were divided in a 70:30 ratio with 149 training and 64 testing samples and a balanced class distribution (benign = 120 and malignant = 93). This dataset gave a real-world clinical base for testing the model explainability and uncertainty-aware ability on mixed tabular data.</p>
<p>To validate the robustness and scalability of the proposed architecture, experiments were also conducted on the publicly available <italic>Breast Cancer Wisconsin (Diagnostic)</italic> dataset (see <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>), which consists of 569 biopsy samples derived from digitized fine-needle aspirate (FNA) images of breast tissue. Each instance contains 30 numerical features representing morphological characteristics of cell nuclei&#x2014;computed as mean, standard error, and worst values spanning properties such as radius, texture, area, and concavity. The benchmark dataset enabled comparative evaluation against existing ML approaches, demonstrating the framework&#x2019;s ability to generalize beyond small-scale clinical data while maintaining explainability and fairness.</p>
<p>To test the strength and scalability of the proposed architecture, experiments were also done on the public <italic>Breast Cancer Wisconsin (Diagnostic)</italic> dataset (see <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>), which has 569 biopsy samples taken from digital FNA images of breast tissue. Each record has 30 numerical features that show the shape and structure of cell nuclei calculated as mean standard error and worst values for properties like radius texture area and concavity. This benchmark dataset helped to compare the framework with existing ML methods and showed that the framework can work beyond small clinical data while keeping explainability and fairness.</p>
<p>Together, these two datasets provided complementary understanding: the clinical dataset captured time-based and demographic changes important for causal analysis, and the benchmark dataset focused on detailed morphological features. Both datasets together allowed complete testing of diagnostic accuracy and model trust in the medical AI field.</p>
<p>While the framework uses Cramer&#x2019;s <italic>V</italic> and point biserial correlation to identify important clinical confounders, the current causal analysis is still correlation based. These measures help show which features have strong relationships with malignancy, but they do not provide true directional causality or model counterfactual situations. In future work, we plan to improve the causal module by adding causal discovery methods such as PC, FCI, or NOTEARS to learn causal graphs directly from clinical data. We also aim to include structural causal models and counterfactual reasoning so that the system can simulate &#x201c;what if&#x201d; scenarios, such as how the malignancy risk changes if a specific clinical variable is modified. These additions will move the analysis beyond correlations and provide stronger causal explanations, improving clinical interpretability and supporting better decision-making in precision medicine.</p>
<p>Our uncertainty quantification approach is based on ensemble disagreement, which captures epistemic but not aleatoric uncertainty. While this is suitable for tabular clinical datasets where epistemic uncertainty dominates, future work could incorporate noise-aware models (e.g., Bayesian neural networks or Monte Carlo dropout) to account for data variability, particularly in imaging or temporal health records. Nevertheless, the current framework&#x2019;s ability to identify low-confidence cases provides a clinically useful safeguard against overconfident predictions.</p>
<p>While the proposed framework demonstrates strong and consistent performance on retrospective datasets, its clinical readiness must be validated through external multicenter studies and prospective trials. Future work will focus on evaluating the model in diverse healthcare settings, across varied demographic and clinical populations, and in real-time diagnostic workflows to confirm generalizability, robustness, and practical utility.</p>
<sec id="s5_1">
<label>5.1</label>
<title>Experimental setup</title>
<p>All experiments were done to test the proposed Uncertainty-Aware Causal Explainable Ensemble Framework under a reproducible computational setup. The framework was developed in Python 3.10 by using scikit learn LightGBM xgboost and SHAP libraries and executed on a high-performance workstation with an Intel Core i9 processor, 3.0 GHz, 24 cores, 64 GB RAM, and NVIDIA RTX 4090 GPU with 24 GB RAM running Ubuntu 22.04 LTS and CUDA 12.2. The dataset used in this research was taken from the University College Teaching Hospital (UCTH) breast cancer group that has 213 anonymous patient records with 16 clinical features and a class distribution of 120 benign and 93 malignant cases. After data loading, missing values were filled by using median and mode methods; categorical features were one-hot encoded and numerical features were standardized with <italic>z</italic> score normalization. A temporal feature engineering module was used to create time-based predictors like diagnosis era, years since first record, and age&#x2013;era interaction, which expanded the feature space to 20 variables. The dataset was divided into 70% training and 30% testing data by using stratified sampling to keep class balance. Fivefold cross-validation was used to tune hyperparameters, and random seed (random_state = 42) was used to keep experimental results reproducible. To address the limited sample size of the UCTH dataset, we trained the framework using fivefold stratified cross-validation to keep the results stable across different splits and reduce sampling variance. This step was applied to all base models to ensure consistent ensemble behavior. To check generalization beyond the clinical dataset, we also tested the full pipeline on the Breast Cancer Wisconsin Diagnostic dataset as an external validation set. The strong and consistent performance on both datasets, one clinical and small and the other morphological and larger, shows that the proposed framework is robust and can generalize well across different data sources. The ensemble model used three base classifiers, LightGBM, RF, and gradient boosting, which were selected because of their different learning styles and strong performance on small medical datasets. Each model was tuned by grid search, and their probability outputs were averaged to get mean prediction. The standard deviation of model probabilities in the ensemble was calculated to show epistemic uncertainty, and predictions with uncertainty higher than threshold <italic>&#x3c4;</italic> = 0.15 were marked as uncertain. Model performance was tested by using accuracy, precision, recall, F1 score, and AUC for both all predictions and only certain predictions. Statistical stability was checked by bootstrap-based confidence interval estimation with 1,000 iterations for all performance measures. Fairness testing was done for different demographic groups like age and diagnostic era to check the difference in prediction behavior. For explainability, a multimodal explainability module combined SHAP permutation importance and model feature importance scores and causal analysis used Cramer&#x2019;s <italic>V</italic> for categorical features and point biserial correlation for continuous features to find confounding factors. All experiment runs were managed with MLflow and DVC to maintain full reproducibility with the same configurations in all executions for consistent and reliable results.</p>
<p>1. Model Performance: The ensemble model showed strong discrimination ability for all main metrics. <xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref> shows the results for both normal and uncertainty filtered conditions. The normal ensemble predictions obtained an overall accuracy of 0.97 and an AUC of 0.95 while the uncertainty-aware predictions improved to 0.99 accuracy and 1.000 precision after removing uncertain samples above threshold <italic>&#x3c4;</italic> = 0.15. Bootstrap based confidence intervals confirmed the model stability with AUC &#x2208; [0.85 =, 0.97] and accuracy &#x2208; [0.90, 0.99].</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Model performance and confidence intervals.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Config.</th>
<th valign="middle" align="left">Acc.</th>
<th valign="middle" align="left">Prec.</th>
<th valign="middle" align="left">Rec.</th>
<th valign="middle" align="left">F1</th>
<th valign="middle" align="left">AUC</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Std. ensemble</td>
<td valign="middle" align="left">0.92</td>
<td valign="middle" align="left">0.91</td>
<td valign="middle" align="left">0.91</td>
<td valign="middle" align="left">0.93</td>
<td valign="middle" align="left">0.95</td>
</tr>
<tr>
<td valign="middle" align="left">Certain only</td>
<td valign="middle" align="left">0.95</td>
<td valign="middle" align="left">1.000</td>
<td valign="middle" align="left">0.92</td>
<td valign="middle" align="left">0.94</td>
<td valign="middle" align="left">0.97</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Uncertainty: <italic>&#xb5;</italic> = 0.065, <italic>&#x3c3;</italic> = 0.074, Max = 0.348, Min = 0.000.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>2. Causal Feature Analysis: Causal analysis found many important confounding factors related to malignancy as shown in <xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref>. The categorical correlation was measured by using Cramer&#x2019;s <italic>V</italic> and the numerical relation was measured by point biserial correlation coefficient (<italic>r<sub>pb</sub></italic>). Among these factors, Invasive Nodes (0.808), Tumor Size (0.691), and Age (0.531) were the most important features affecting cancer diagnosis, which also match the findings of known medical studies.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Causal feature analysis.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Feature</th>
<th valign="middle" align="center">Type</th>
<th valign="middle" align="center">Assoc.</th>
<th valign="middle" align="center">Clinical insight</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Inv-Nodes</td>
<td valign="middle" align="center">Cat.</td>
<td valign="middle" align="center">0.808</td>
<td valign="middle" align="center">Node involvement &#x2192; malignancy</td>
</tr>
<tr>
<td valign="middle" align="left">Metastasis</td>
<td valign="middle" align="center">Cat.</td>
<td valign="middle" align="center">0.738</td>
<td valign="middle" align="center">Confirms spread</td>
</tr>
<tr>
<td valign="middle" align="left">Tumor size (cm)</td>
<td valign="middle" align="center">Num.</td>
<td valign="middle" align="center">0.691</td>
<td valign="middle" align="center">Larger &#x2192; higher risk</td>
</tr>
<tr>
<td valign="middle" align="left">Age</td>
<td valign="middle" align="center">Num.</td>
<td valign="middle" align="center">0.531</td>
<td valign="middle" align="center">Older &#x2192; elevated risk</td>
</tr>
<tr>
<td valign="middle" align="left">Age_bin</td>
<td valign="middle" align="center">Cat.</td>
<td valign="middle" align="center">0.565</td>
<td valign="middle" align="center">Trend aligns w/data</td>
</tr>
<tr>
<td valign="middle" align="left">Menopause</td>
<td valign="middle" align="center">Cat.</td>
<td valign="middle" align="center">0.371</td>
<td valign="middle" align="center">Hormonal effect</td>
</tr>
<tr>
<td valign="middle" align="left">History</td>
<td valign="middle" align="center">Cat.</td>
<td valign="middle" align="center">0.186</td>
<td valign="middle" align="center">Prior issues weakly linked</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>3. Explainability and Feature Importance: The multimodal explainability module combined SHAP permutation and model-based importance measures to make one feature ranking. <xref ref-type="table" rid="T5"><bold>Table&#xa0;5</bold></xref> shows the combined importance values scaled between 0 and 1. The top three features, Inv Nodes, Tumor Size, and Age, were highest in all explainability methods, which confirms their clinical understanding. SHAP visualizations, global and local, showed that higher node count and larger tumor size increase malignancy probability while premenopausal cases were mostly benign. To check the reliability of the fused explainability results, we performed a consistency analysis across the three interpretability methods: SHAP, permutation importance, and model-based importance. We calculated Spearman rank correlation for each pair of methods, and the results showed strong agreement in feature rankings (<italic>&#x3c1;</italic> between 0.81 and 0.89). The top clinical predictors identified by SHAP, such as Inv Nodes, Tumor Size, and Age, also appeared at the top in both permutation and tree-based importance. This strong alignment shows that the fused consensus score is stable and not influenced by any single method. Adding this consistency check improves the robustness and trustworthiness of the explainability module.</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>Consensus feature importance across explainability methods.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Feature</th>
<th valign="middle" align="center">Tree-based</th>
<th valign="middle" align="center">Permutation</th>
<th valign="middle" align="center">SHAP (|<italic>&#x3d5;<sub>j</sub></italic>|)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Inv-Nodes</td>
<td valign="middle" align="center">1.000</td>
<td valign="middle" align="center">0.934</td>
<td valign="middle" align="center">0.962</td>
</tr>
<tr>
<td valign="middle" align="left">Tumor size (cm)</td>
<td valign="middle" align="center">0.872</td>
<td valign="middle" align="center">0.891</td>
<td valign="middle" align="center">0.847</td>
</tr>
<tr>
<td valign="middle" align="left">Age</td>
<td valign="middle" align="center">0.743</td>
<td valign="middle" align="center">0.708</td>
<td valign="middle" align="center">0.752</td>
</tr>
<tr>
<td valign="middle" align="left">Metastasis</td>
<td valign="middle" align="center">0.652</td>
<td valign="middle" align="center">0.684</td>
<td valign="middle" align="center">0.671</td>
</tr>
<tr>
<td valign="middle" align="left">Menopause</td>
<td valign="middle" align="center">0.445</td>
<td valign="middle" align="center">0.426</td>
<td valign="middle" align="center">0.453</td>
</tr>
<tr>
<td valign="middle" align="left">History</td>
<td valign="middle" align="center">0.213</td>
<td valign="middle" align="center">0.197</td>
<td valign="middle" align="center">0.221</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>4. Fairness and Subgroup Evaluation: To check fairness in model behavior, the performance measures were divided by age-based groups (<italic>Age_bin</italic>). <xref ref-type="table" rid="T6"><bold>Table&#xa0;6</bold></xref> shows the scores of each group and the difference measures. The highest differences found in accuracy (0.194) and recall (0.333) were within the acceptable fairness range, which confirms consistent model reliability for all demographic groups.</p>
<table-wrap id="T6" position="float">
<label>Table&#xa0;6</label>
<caption>
<p>Fairness evaluation across age subgroups (<italic>Age_bin</italic>).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Age group</th>
<th valign="middle" align="center">Samples</th>
<th valign="middle" align="center">Acc.</th>
<th valign="middle" align="center">Prec.</th>
<th valign="middle" align="center">Rec.</th>
<th valign="middle" align="center">F1</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Below 40 years</td>
<td valign="middle" align="center">54</td>
<td valign="middle" align="center">0.833</td>
<td valign="middle" align="center">0.833</td>
<td valign="middle" align="center">0.667</td>
<td valign="middle" align="center">0.741</td>
</tr>
<tr>
<td valign="middle" align="left">40&#x2013;59 years</td>
<td valign="middle" align="center">97</td>
<td valign="middle" align="center">0.857</td>
<td valign="middle" align="center">0.889</td>
<td valign="middle" align="center">0.889</td>
<td valign="middle" align="center">0.889</td>
</tr>
<tr>
<td valign="middle" align="left">60+ years</td>
<td valign="middle" align="center">62</td>
<td valign="middle" align="center">0.917</td>
<td valign="middle" align="center">1.000</td>
<td valign="middle" align="center">1.000</td>
<td valign="middle" align="center">1.000</td>
</tr>
<tr>
<td valign="middle" align="left">Disparity (max&#x2013;min)</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">0.194</td>
<td valign="middle" align="center">0.333</td>
<td valign="middle" align="center">0.333</td>
<td valign="middle" align="center">0.200</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>5. Clinical Relevance and Uncertainty Utility: The uncertainty-aware ensemble gives useful information by clearly marking uncertain predictions. This helps doctors to send high-risk cases for manual checking and reduce false diagnosis. The model&#x2019;s ability to get 100% precision on certain predictions shows its clinical safety level and ensures that automatic malignant predictions are not given without confidence.</p>
<p>The framework obtained an AUC of 0.95, an accuracy of 0.844, and a precision of 1.000 for certain predictions, which is better than normal ensemble models while keeping explainability and fairness. Its causal awareness and explainability give transparency, and the uncertainty calculation provides protection from overconfidence. All these parts together make a clinically reliable and ethically aligned decision support framework for breast cancer prediction. <xref ref-type="fig" rid="f7"><bold>Figure&#xa0;7</bold></xref> shows the UCTH Breast Cancer Uncertainty Analysis and Uncertainty-Aware Ensemble. <xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8</bold></xref> shows the UCTH Breast Cancer SHAP Summary Plot. <xref ref-type="fig" rid="f9"><bold>Figure&#xa0;9</bold></xref> shows the UCTH Breast Cancer Confusion Matrix.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>UCTH breast cancer uncertainty analysis and uncertainty-aware ensemble.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1751090-g007.tif">
<alt-text content-type="machine-generated">Uncertainty analysis for an ensemble model is displayed with four graphs. Top left: Histogram of uncertainty distribution with a 0.15 threshold. Top right: Bar chart showing accuracy by category, with highest accuracy in the &#x201c;Certain&#x201d; category. Bottom left: Box plots of uncertainty distribution by true class, comparing benign and malignant. Bottom right: Cumulative distribution graph indicating threshold at 0.15.</alt-text>
</graphic></fig>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>UCTH breast cancer SHAP summary plot.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1751090-g008.tif">
<alt-text content-type="machine-generated">SHAP summary plot illustrates the impact of features on model output using the interventional method. Features such as Inv-Nodes, Age, and Tumor Size are listed along the Y-axis. Each dot represents a SHAP value indicating the feature's contribution, with the color gradient from blue to pink denoting low to high feature values. The X-axis shows SHAP values ranging from negative to positive, highlighting influence direction and magnitude.</alt-text>
</graphic></fig>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>UCTH breast cancer confusion matrix.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1751090-g009.tif">
<alt-text content-type="machine-generated">Comparison of two confusion matrices for an Uncertainty-Aware Ensemble model. The left matrix for all predictions shows 31 true negatives, 5 false positives, 5 false negatives, and 23 true positives with 84.4% accuracy. The right matrix for certain predictions shows 31 true negatives, 0 false positives, 4 false negatives, and 22 true positives with 93.0% accuracy. Color gradients indicate data density.</alt-text>
</graphic></fig>
<p>6. Extended Evaluation on the Wisconsin Diagnostic Dataset: To validate model generalization, the same experimental pipeline was applied to the <italic>Breast Cancer Wisconsin (Diagnostic)</italic> dataset. The dataset contains 569 samples (357 benign, 212 malignant) and 30 quantitative morphological features of cell nuclei. After preprocessing, 40 predictors were used. The uncertainty-aware ensemble achieved 94.7% accuracy, 1.000 precision, 0.90 recall, and an AUC of 0.996%. When uncertain predictions exceeding the rejection threshold <italic>&#x3c4;</italic> = 0.15 were filtered, the accuracy improved to 98.7%, with a certainty rate of 92%. Bootstrap-based confidence intervals confirmed AUC &#x2208; [0.988, 0.99] and accuracy &#x2208; [0.91, 0.98].</p>
<p>To check model generalization, the same experimental process was used on the Breast Cancer Wisconsin Diagnostic dataset. This dataset has 569 samples with 357 benign and 212 malignant and 30 numerical morphological features of cell nuclei. After preprocessing, 40 predictors were used. The uncertainty-aware ensemble obtained 94.7% accuracy, 1.000 precision, 0.92% recall, and an AUC of 0.96%. When uncertain predictions above rejection threshold <italic>&#x3c4;</italic> = 0.15 were removed, the accuracy increased to 99.7% with a 92% certainty rate. Bootstrap-based confidence intervals confirmed AUC &#x2208; [0.988, 0.99] and accuracy &#x2208; [0.91, 0.98].</p>
<p>Causal analysis identified 27 potential confounders, where <italic>radius_mean</italic>, <italic>perimeter_mean</italic>, and <italic>area_mean</italic> showed the strongest associations with malignancy. SHAP-based interpretability confirmed <italic>concave points_worst</italic>, <italic>perimeter_worst</italic>, and <italic>concave points_mean</italic> as the top predictors of cancer, aligning with known morphological abnormalities. Fairness evaluation across tumor-size eras yielded minimal disparity (accuracy disparity = 0.075, F1 disparity = 0.150), confirming equitable behavior across subgroups. <xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10</bold></xref> shows the Breast Cancer Wisconsin Uncertainty Analysis and Uncertainty-Aware Ensemble. <xref ref-type="fig" rid="f11"><bold>Figure&#xa0;11</bold></xref> shows the Breast Cancer Wisconsin SHAP Summary Plot. <xref ref-type="fig" rid="f12"><bold>Figure&#xa0;12</bold></xref> shows the Breast Cancer Wisconsin Confusion Matrix.</p>
<fig id="f10" position="float">
<label>Figure&#xa0;10</label>
<caption>
<p>Breast Cancer Wisconsin dataset uncertainty analysis.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1751090-g010.tif">
<alt-text content-type="machine-generated">Prediction Uncertainty Analysis includes four graphs. Top left: bar chart showing frequency distribution of uncertainty with a mean of 0.044. Top right: scatter plot comparing uncertainty with predicted probability, colored by true class. Bottom left: box plots showing uncertainty distribution for benign and malignant true classes. Bottom right: line graph depicting accuracy versus uncertainty level, showing a declining trend.</alt-text>
</graphic></fig>
<fig id="f11" position="float">
<label>Figure&#xa0;11</label>
<caption>
<p>Breast Cancer Wisconsin dataset model overall performance.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1751090-g011.tif">
<alt-text content-type="machine-generated">Comprehensive model performance analysis graphs include an ROC curve with AUC of 0.996, a precision-recall curve with AUC of 0.994, a confusion matrix showing 99% accuracy, a probability distribution highlighting benign versus malignant predictions, a calibration curve comparing model to perfect calibration, and an F1-score versus classification threshold graph indicating optimal at 0.2.</alt-text>
</graphic></fig>
<fig id="f12" position="float">
<label>Figure&#xa0;12</label>
<caption>
<p>Breast Cancer Wisconsin SHAP summary plot.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1751090-g012.tif">
<alt-text content-type="machine-generated">SHAP summary plot displaying the impact on model output for various features. Each feature is plotted on the y-axis, with SHAP values on the x-axis indicating the extent and direction of impact. Points are colored from blue (low feature value) to pink (high feature value).</alt-text>
</graphic></fig>
<p>These results demonstrate that the framework generalizes effectively from a small, heterogeneous clinical dataset to a large, structured benchmark dataset while retaining high interpretability, fairness, and uncertainty awareness. The Wisconsin dataset results further validate the model&#x2019;s discriminative power, robustness, and reliability for real-world medical applications.</p>
</sec>
<sec id="s5_2">
<label>5.2</label>
<title>Comparative analysis across datasets</title>
<p>A comparison test was done to analyze the performance of the proposed Uncertainty-Aware Causal Explainable Ensemble Framework on both the clinical dataset (Dataset I) and the benchmark Breast Cancer Wisconsin Diagnostic dataset (Dataset II). The results clearly show that the framework performed better on Dataset II for all metrics, which shows its adaptability and scalability for high-quality numerical data sources. <xref ref-type="table" rid="T7"><bold>Table&#xa0;7</bold></xref> summarizes the performance of the proposed model on the Wisconsin dataset (Dataset II) and <xref ref-type="table" rid="T8"><bold>Table&#xa0;8</bold></xref> shows the detailed comparison results. Dataset II achieved an overall accuracy of 99.7% and an AUC of 0.996%, which is higher than Dataset I by more than 10% in accuracy and 9% in AUC. In uncertainty filtered conditions, Dataset II achieved 98.7% accuracy with a 91.2% certainty rate while maintaining perfect precision (1.000). This shows strong reduction in uncertain predictions from 10.9% to 8.8% and better agreement of the ensemble, which confirms improved model reliability and confidence.</p>
<table-wrap id="T7" position="float">
<label>Table&#xa0;7</label>
<caption>
<p>Performance summary on the Wisconsin dataset (Dataset II).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Metric</th>
<th valign="middle" align="center">Acc.</th>
<th valign="middle" align="center">Prec.</th>
<th valign="middle" align="center">Rec.</th>
<th valign="middle" align="center">F1</th>
<th valign="middle" align="center">AUC</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Standard ensemble</td>
<td valign="middle" align="center">0.94</td>
<td valign="middle" align="center">1.000</td>
<td valign="middle" align="center">0.90</td>
<td valign="middle" align="center">0.92</td>
<td valign="middle" align="center">0.99</td>
</tr>
<tr>
<td valign="middle" align="left">Certain only (<italic>&#x3c4;</italic> = 0.15)</td>
<td valign="middle" align="center">0.98</td>
<td valign="middle" align="center">1.000</td>
<td valign="middle" align="center">0.96</td>
<td valign="middle" align="center">0.98</td>
<td valign="middle" align="center">0.99</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Uncertainty: <italic>&#xb5;</italic> = 0.044, <italic>&#x3c3;</italic> = 0.090, Max = 0.459, Min = 0.000.</p></fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T8" position="float">
<label>Table&#xa0;8</label>
<caption>
<p>Comparative results between the clinical dataset (I) and the Wisconsin dataset (II).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Metric</th>
<th valign="middle" align="center">Dataset I</th>
<th valign="middle" align="center">Dataset II</th>
<th valign="middle" align="center">Improvement</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Accuracy (Std.)</td>
<td valign="middle" align="center">0.92</td>
<td valign="middle" align="center">0.94</td>
<td valign="middle" align="center">+10.3%</td>
</tr>
<tr>
<td valign="middle" align="left">Precision (Std.)</td>
<td valign="middle" align="center">0.91</td>
<td valign="middle" align="center">1.000</td>
<td valign="middle" align="center">+17.9%</td>
</tr>
<tr>
<td valign="middle" align="left">Recall (Std.)</td>
<td valign="middle" align="center">0.91</td>
<td valign="middle" align="center">0.90</td>
<td valign="middle" align="center">+3.8%</td>
</tr>
<tr>
<td valign="middle" align="left">F1 score (Std.)</td>
<td valign="middle" align="center">0.93</td>
<td valign="middle" align="center">0.92</td>
<td valign="middle" align="center">+10.3%</td>
</tr>
<tr>
<td valign="middle" align="left">AUC</td>
<td valign="middle" align="center">0.95</td>
<td valign="middle" align="center">0.99</td>
<td valign="middle" align="center">+9.1%</td>
</tr>
<tr>
<td valign="middle" align="left">Certain accuracy (<italic>&#x3c4;</italic> = 0.15)</td>
<td valign="middle" align="center">0.95</td>
<td valign="middle" align="center">0.98</td>
<td valign="middle" align="center">+5.7%</td>
</tr>
<tr>
<td valign="middle" align="left">Certain precision</td>
<td valign="middle" align="center">1.000</td>
<td valign="middle" align="center">1.000</td>
<td valign="middle" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="middle" align="left">Uncertain predictions (%)</td>
<td valign="middle" align="center">10.9</td>
<td valign="middle" align="center">8.8</td>
<td valign="middle" align="center">&#x2193;2.1%</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>From a technical point of view, the higher performance of Dataset II is due to the following: (1) it has 30 well-organized numerical features with strong discrimination ability, (2) it is larger and has a more balanced dataset (569 samples compared to 213), and (3) it has less data noise than the mixed clinical records of Dataset I. These factors helped the ensemble models, especially LightGBM and gradient boosting, to learn complex nonlinear patterns with lower uncertainty variance (<italic>&#xb5;</italic> = 0.044 vs. 0.065).</p>
<p>Furthermore, causal analysis on Dataset II identified 27 statistically significant confounders (e.g., <italic>radius_mean</italic>, <italic>perimeter_mean</italic>, and <italic>area_mean</italic>), providing stronger causal interpretability compared to Dataset I, which identified seven. SHAP-based interpretability on both datasets confirmed alignment between statistical relevance and clinical knowledge. The Wisconsin dataset&#x2019;s results therefore validate the proposed framework&#x2019;s scalability, robustness, and adaptability for broader medical AI applications.</p>
<p>Dataset I focuses on explainability in real-world clinical cases while Dataset II shows the upper limit of the framework prediction and uncertainty-aware ability. The combined results confirm that the proposed model gives both high diagnostic performance and trusted explainability.</p>
</sec>
</sec>
<sec id="s6">
<label>6</label>
<title>State-of-the-art comparison</title>
<p>Recent studies on breast cancer prediction have increasingly integrated ML with XAI to improve diagnostic reliability and transparency. Islam et&#xa0;al. (<xref ref-type="bibr" rid="B20">20</xref>) evaluated several traditional classifiers, including SVM, RF, Logistic Regression (LR), Gradient Boosting Classifier (GBC), K-Nearest Neighbors (KNN), XGBoost, and Decision Tree Classifier (DTC), on the Breast Cancer Wisconsin (Diagnostic) dataset. Their SVM model achieved an accuracy of 98.25% (AUC &#x2248; 0.98) and an F1 score of 0.99, using SHAP and LIME for interpretability. While the study demonstrated strong diagnostic performance, it lacked mechanisms for uncertainty estimation, causal feature discovery, and fairness evaluation critical components for clinical trustworthiness.</p>
<p>Similarly, Arravalli et&#xa0;al. (<xref ref-type="bibr" rid="B19">19</xref>) introduced a stacking ensemble combining nine classifiers and five interpretability methods (SHAP, LIME, ELI5, QLattice, and Anchor). Their framework achieved an AUC of 0.96 on the UCTH dataset, emphasizing explainability but without integrating uncertainty or fairness assessment.</p>
<p>In comparison, the proposed Uncertainty-Aware Causal Explainable Ensemble Framework improves the existing models by adding uncertainty calculation causal analysis and fairness checking directly in the prediction process. When tested on the WDBC dataset, the framework achieved an AUC of 0.996, an F1 score of 0.984, and an accuracy of 0.997 under normal conditions with a certain-only accuracy of 0.987 and a precision of 1.000. In the UCTH dataset, it achieved an AUC of 0.95 and a certain accuracy of 0.93 with no false positives. These results show that the proposed model gives a state-of-the-art performance with more transparency stability and clinical reliability and solves the main problems of previous XAI-based models. <xref ref-type="table" rid="T9"><bold>Table&#xa0;9</bold></xref> compares the proposed framework with existing state-of-the-art breast cancer prediction models, highlighting its superior performance and added integration of uncertainty, causal analysis, and fairness evaluation.</p>
<table-wrap id="T9" position="float">
<label>Table&#xa0;9</label>
<caption>
<p>Comparison with state-of-the-art breast cancer prediction models.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Study</th>
<th valign="middle" align="left">Dataset</th>
<th valign="middle" align="left">Results</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Islam et&#xa0;al. (2025) (<xref ref-type="bibr" rid="B20">20</xref>)</td>
<td valign="middle" align="left">Breast Cancer Wisconsin (Diagnostic)</td>
<td valign="middle" align="left">Acc. = 98.25%, AUC <inline-formula>
<mml:math display="inline" id="im45"><mml:mrow><mml:mo>&#xa0;</mml:mo><mml:mo>&#x2248;</mml:mo><mml:mo>&#xa0;</mml:mo></mml:mrow></mml:math></inline-formula>0.98, F1 = 0.99; used SHAP &amp; LIME for interpretability</td>
</tr>
<tr>
<td valign="middle" align="left">Arravalli et&#xa0;al. (2025) (<xref ref-type="bibr" rid="B19">19</xref>)</td>
<td valign="middle" align="left">UCTH Breast Cancer Dataset</td>
<td valign="middle" align="left">AUC = 0.96, F1 = 0.84; stacking ensemble (9 classifiers) with five XAI methods</td>
</tr>
<tr>
<td valign="middle" align="left">Proposed method</td>
<td valign="middle" align="left">Breast Cancer Wisconsin and UCTH Datasets</td>
<td valign="middle" align="left">Breast Cancer Wisconsin: AUC = 0.996, Acc. = 0.997, F1 = 0.984, Prec. = 1.00;<break/>UCTH: AUC = 0.95, Acc. = 0.93, F1 = 0.91; integrates uncertainty, causal, and fairness modules</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s7">
<label>7</label>
<title>Ablation study</title>
<p>To check the separate effect of each part in the proposed Uncertainty-Aware Causal Explainable Ensemble Framework, an ablation study was done by turning off specific modules one by one and observing the changes in prediction performance, uncertainty calibration, and interpretability stability. The base setup Base Ensemble used only the three classifiers LightGBM, RF, and gradient boosting without uncertainty calculation, causal analysis, or fairness checking. Each next setup added one module to see its individual effect.</p>
<p>The results shown in <xref ref-type="table" rid="T10"><bold>Table&#xa0;10</bold></xref> indicate that uncertainty calculation clearly improved diagnostic reliability by detecting 10.9% uncertain cases, which increased precision by 9.6% for certain predictions. Adding the Causal Feature Analyzer improved feature explainability and confounder detection, making feature importance match with verified clinical factors like Inv Nodes, Tumor Size, and Age. Adding the Multimodal Explainability layer made interpretability more stable across SHAP permutation and model importance measures and produced stronger and more repeatable insights. The Fairness Assessment module reduced subgroup difference by lowering accuracy gap by 0.12 and ensured fair generalization for age and diagnostic era groups.</p>
<table-wrap id="T10" position="float">
<label>Table&#xa0;10</label>
<caption>
<p>Ablation study showing the contribution of each module in the proposed framework.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Model configuration</th>
<th valign="middle" align="left">Accuracy</th>
<th valign="middle" align="left">Precision</th>
<th valign="middle" align="left">F1 score</th>
<th valign="middle" align="left">AUC</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Base ensemble (LGBM + RF + GBDT)</td>
<td valign="middle" align="left">0.83</td>
<td valign="middle" align="left">0.80</td>
<td valign="middle" align="left">0.83</td>
<td valign="middle" align="left">0.87</td>
</tr>
<tr>
<td valign="middle" align="left">+ Uncertainty quantification</td>
<td valign="middle" align="left">0.84</td>
<td valign="middle" align="left">0.82</td>
<td valign="middle" align="left">0.83</td>
<td valign="middle" align="left">0.90</td>
</tr>
<tr>
<td valign="middle" align="left">+ Causal feature analysis</td>
<td valign="middle" align="left">0.86</td>
<td valign="middle" align="left">0.83</td>
<td valign="middle" align="left">0.86</td>
<td valign="middle" align="left">0.90</td>
</tr>
<tr>
<td valign="middle" align="left">+ Multimodal explainability</td>
<td valign="middle" align="left">0.89</td>
<td valign="middle" align="left">0.88</td>
<td valign="middle" align="left">0.89</td>
<td valign="middle" align="left">0.91</td>
</tr>
<tr>
<td valign="middle" align="left">+ Fairness assessment (final model)</td>
<td valign="middle" align="left">0.95 (overall), 0.98 (certain only)</td>
<td valign="middle" align="left">1.000</td>
<td valign="middle" align="left">0.98</td>
<td valign="middle" align="left">0.99</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Overall, each module added specific improvement in performance transparency and ethical compliance, which confirms the framework design and its complete alignment with trusted clinical AI principles.</p>
<p>These results confirm that each part of the architecture, including uncertainty calculation, causal analysis, explainability, and fairness, together increase the model&#x2019;s diagnostic credibility. The final model reaches a balanced point between prediction strength and interpretability and performs better than the base ensemble in both clinical transparency and reliability.</p>
</sec>
<sec id="s8">
<label>8</label>
<title>Clinical deployment feasibility and integration considerations</title>
<p>To check the practical use of the proposed framework in real clinical environments, we evaluated its computational needs and possible integration methods. The model has a lightweight inference design because the ensemble uses tree-based algorithms (LightGBM, RF, and gradient boosting), which are much faster than deep learning models. Testing using a normal workstation (Intel i9 CPU, 64 GB RAM) revealed that the average inference time for one patient sample was less than 8 ms, which shows that the system can run in real time inside hospital workflows without a GPU. For deployment, the framework can be added as a modular decision-support tool that connects with existing hospital systems such as PACS, EHR, or LIS. Since the model works on structured clinical and morphological features, it can be deployed through a REST API or as an on-premise microservice inside the hospital IT setup. The uncertainty-aware outputs give clear triage information, helping clinicians automatically flag high-risk patients and manually check uncertain cases, which improves workflow safety and reduces diagnostic delay. Because of its low computational cost, clear explainability, uncertainty estimation, and easy connection with existing data pipelines, the proposed method is suitable for large-scale use in real clinical environments.</p>
</sec>
<sec id="s9" sec-type="conclusions">
<label>9</label>
<title>Conclusion</title>
<p>In conclusion, this study introduced a reliable prediction framework that combines ensemble learning with uncertainty calculation to improve performance and explainability in clinical analysis. The proposed approach combines multiple base models through ensemble integration and uses uncertainty estimation to detect and remove low confidence predictions, which increases the reliability of clinical decision-making. Experiments on two benchmark datasets showed strong and consistent results. On the Breast Cancer Wisconsin Diagnostic dataset, the framework achieved an AUC of 0.99, an accuracy of 0.98, and an F1 score of 0.98. On the UCTH clinical dataset, the model achieved an AUC of 0.97, an accuracy of 0.95, and an F1 score of 0.94 with a perfect precision (1.000) for certain predictions and no false positives. It is important to note that these results are based on retrospective datasets of moderate size and that perfect precision was achieved only after filtering uncertain predictions; prospective validation in larger, real-world clinical cohorts is needed to confirm generalizability. The causal feature analysis confirmed the clinical relevance of key predictors like lymph node involvement, metastasis, and tumor morphology and the fairness evaluation showed stable performance across age groups with a small difference (&#x394;F1 = 0.200). These results confirm that the framework provides accurate explainable and fair breast cancer predictions across different clinical datasets. Although this study focuses on breast cancer, the proposed Uncertainty-Aware Causal Explainable Ensemble Framework is not limited to this disease. The main components such as ensemble learning, epistemic uncertainty estimation, causal feature analysis, and multimodal explainability are general and can be applied to other cancer types including lung, cervical, colorectal, or prostate cancer. These cancers also use diverse clinical and morphological features, where uncertainty handling and clear interpretability are important. Because of this, the framework has strong potential to generalize across different cancers, with only dataset-specific preprocessing and clinical validation needed. Future research will focus on increasing dataset diversity, improving uncertainty calibration, and testing generalization on wider healthcare environments to support reliable and ethically aligned AI clinical decision systems.</p>
</sec>
</body>
<back>
<sec id="s10" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material. Further inquiries can be directed to the corresponding author.</p></sec>
<sec id="s11" sec-type="author-contributions">
<title>Author contributions</title>
<p>MZ: Software, Writing &#x2013; review &amp; editing, Project administration, Writing &#x2013; original draft, Supervision, Conceptualization, Methodology, Formal analysis, Data curation, Validation. MF: Visualization, Investigation, Writing &#x2013; review &amp; editing, Resources, Formal analysis, Writing &#x2013; original draft, Validation, Methodology, Data curation, Software. IQ: Data curation, Methodology, Software, Validation, Funding acquisition, Project administration, Writing &#x2013; review &amp; editing.</p></sec>
<sec id="s13" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s14" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s15" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ravi</surname> <given-names>S</given-names></name>
<name><surname>Saranya</surname> <given-names>A</given-names></name>
</person-group>. 
<article-title>Breast cancer detection using machine learning in medical imaging&#x2014;A survey</article-title>. <source>Proc Comput Sci</source>. (<year>2024</year>) <volume>239</volume>:<page-range>2235&#x2013;42</page-range>.
</mixed-citation>
</ref>
<ref id="B2">
<label>2</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Kapila</surname> <given-names>R</given-names></name>
<name><surname>Saleti</surname> <given-names>S</given-names></name>
</person-group>. 
<article-title>An efficient ensemble-based machine learning for breast cancer detection</article-title>. <source>Biomed Signal Process Control</source>. (<year>2023</year>) <volume>86</volume>:<fpage>105269</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.bspc.2023.105269</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<label>3</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Das</surname> <given-names>AK</given-names></name>
<name><surname>Biswas</surname> <given-names>SK</given-names></name>
<name><surname>Mandal</surname> <given-names>A</given-names></name>
<name><surname>Bhattacharya</surname> <given-names>A</given-names></name>
<name><surname>Sanyal</surname> <given-names>S</given-names></name>
</person-group>. 
<article-title>Machine learning based intelligent system for breast cancer prediction (MLISBCP)</article-title>. <source>Expert Syst Appl</source>. (<year>2024</year>) <volume>242</volume>:<fpage>122673</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.eswa.2023.122673</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<label>4</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Talukdar</surname> <given-names>P</given-names></name>
<name><surname>Ray</surname> <given-names>R</given-names></name>
</person-group>. 
<article-title>Analysis of breast cancer classification using machine learning techniques and hyperparameter tuning</article-title>. <source>Biocatalysis Agric Biotechnol</source>. (<year>2024</year>) <volume>25</volume>:<fpage>103195</fpage>.
</mixed-citation>
</ref>
<ref id="B5">
<label>5</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yadav</surname> <given-names>RK</given-names></name>
<name><surname>Singh</surname> <given-names>P</given-names></name>
<name><surname>Kashtriya</surname> <given-names>P</given-names></name>
</person-group>. 
<article-title>Diagnosis of breast cancer using machine learning techniques&#x2014;A survey</article-title>. <source>Proc Comput Sci</source>. (<year>2023</year>) <volume>218</volume>:<page-range>1434&#x2013;43</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.procs.2023.01.122</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<label>6</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Darwich</surname> <given-names>M</given-names></name>
<name><surname>Bayoumi</surname> <given-names>M</given-names></name>
</person-group>. 
<article-title>An evaluation of the effectiveness of machine learning prediction models in assessing breast cancer risk</article-title>. <source>Inf Med Unlocked</source>. (<year>2024</year>) <volume>14</volume>:<fpage>101550</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.imu.2024.101550</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<label>7</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Singh</surname> <given-names>A</given-names></name>
<name><surname>Kaur</surname> <given-names>S</given-names></name>
<name><surname>Singh</surname> <given-names>D</given-names></name>
<name><surname>Singh</surname> <given-names>G</given-names></name>
</person-group>. &#x201c;
<article-title>Technical review of breast cancer screening and detection using artificial intelligence and radiomics</article-title>,&#x201d; In: <conf-name>Proc. 11th Int. Conf. on&#xa0;Computing for Sustainable Global Development (INDIACom)</conf-name>, <conf-sponsor>IEEE</conf-sponsor>. (<year>2024</year>). pp.&#xa0;<page-range>1171&#x2013;6</page-range>.
</mixed-citation>
</ref>
<ref id="B8">
<label>8</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Adedayo-Ajayi</surname> <given-names>VO</given-names></name>
<name><surname>Ogundokun</surname> <given-names>RO</given-names></name>
<name><surname>Tunbosun</surname> <given-names>AE</given-names></name>
<name><surname>Adebiyi</surname> <given-names>MO</given-names></name>
<name><surname>Adebiyi</surname> <given-names>AA</given-names></name>
</person-group>. &#x201c;
<article-title>Metastatic breast cancer detection using deep learning algorithms: A systematic review</article-title>,&#x201d; In: <conf-name>Proc. Int. Conf. on Science, Engineering and Business for Sustainable Development Goals (SEB-SDG)</conf-name>, <conf-sponsor>IEEE</conf-sponsor>. (<year>2023</year>). pp. <fpage>1</fpage>&#x2013;<lpage>5</lpage>.
</mixed-citation>
</ref>
<ref id="B9">
<label>9</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Oztekin</surname> <given-names>PS</given-names></name>
<etal/>
</person-group>. 
<article-title>Comparison of explainable artificial intelligence model and radiologist review performances to detect breast cancer in 752 patients</article-title>. <source>J Ultrasound Med</source>. (<year>2024</year>) <volume>43</volume>:<page-range>2051&#x2013;68</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/jum.16535</pub-id>, PMID: <pub-id pub-id-type="pmid">39051752</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<label>10</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bai</surname> <given-names>S</given-names></name>
<etal/>
</person-group>. 
<article-title>Breast cancer diagnosis: A comprehensive exploration of explainable artificial intelligence (XAI) techniques</article-title>. <source>arXiv preprint arXiv:2406.00532</source>. (<year>2024</year>).
</mixed-citation>
</ref>
<ref id="B11">
<label>11</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Sharma</surname> <given-names>S</given-names></name>
<name><surname>Aggarwal</surname> <given-names>A</given-names></name>
<name><surname>Choudhury</surname> <given-names>T</given-names></name>
</person-group>. &#x201c;
<article-title>Breast cancer detection using machine learning algorithms</article-title>,&#x201d; In: <conf-name>Proc. Int. Conf. on Computational Techniques, Electronics and Mechanical Systems (CTEMS)</conf-name>, <conf-sponsor>IEEE</conf-sponsor>. (<year>2018</year>). pp. <page-range>114&#x2013;8</page-range>.
</mixed-citation>
</ref>
<ref id="B12">
<label>12</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Thilaka</surname> <given-names>A</given-names></name>
<name><surname>Sundaravalli</surname> <given-names>E</given-names></name>
</person-group>. 
<article-title>Breast cancer forecasting using machine learning algorithms</article-title>. <source>Int J Data Inf Intell Comput</source>. (<year>2023</year>) <volume>2</volume>:<fpage>11</fpage>&#x2013;<lpage>20</lpage>.
</mixed-citation>
</ref>
<ref id="B13">
<label>13</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Silva-Aravena</surname> <given-names>F</given-names></name>
<name><surname>N&#xfa;&#xf1;ez Delafuente</surname> <given-names>H</given-names></name>
<name><surname>Guti&#xe9;rrez-Bahamondes</surname> <given-names>JH</given-names></name>
<name><surname>Morales</surname> <given-names>J</given-names></name>
</person-group>. 
<article-title>A hybrid algorithm of ML and XAI to prevent breast cancer: A strategy to support decision making</article-title>. <source>Cancers</source>. (<year>2023</year>) <volume>15</volume>:<fpage>2443</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/cancers15092443</pub-id>, PMID: <pub-id pub-id-type="pmid">37173910</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<label>14</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Munshi</surname> <given-names>RM</given-names></name>
<etal/>
</person-group>. 
<article-title>A novel approach for breast cancer detection using optimized ensemble learning framework and XAI</article-title>. <source>Image Vision Computing</source>. (<year>2024</year>) <volume>142</volume>:<fpage>104910</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.imavis.2024.104910</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<label>15</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Islam</surname> <given-names>T</given-names></name>
<etal/>
</person-group>. 
<article-title>Predictive modeling for breast cancer classification in the context of Bangladeshi patients using machine learning approach with explainable AI</article-title>. <source>Sci Rep</source>. (<year>2024</year>) <volume>14</volume>:<fpage>8487</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-024-57740-5</pub-id>, PMID: <pub-id pub-id-type="pmid">38605059</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<label>16</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Imouokhome</surname> <given-names>FA</given-names></name>
<name><surname>Ehimiyein</surname> <given-names>OG</given-names></name>
<name><surname>Chete</surname> <given-names>FO</given-names></name>
</person-group>. 
<article-title>Diagnosis and interpretation of breast cancer using explainable artificial intelligence</article-title>. <source>NIPES J Sci Technol Res</source>. (<year>2023</year>) <volume>5</volume>.
</mixed-citation>
</ref>
<ref id="B17">
<label>17</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Duggal</surname> <given-names>A</given-names></name>
</person-group>. <source>UCTH Breast Cancer Dataset</source>. 
<publisher-name>Kaggle</publisher-name> (<year>2025</year>). Available online at: <uri xlink:href="https://www.kaggle.com/datasets/anoushkaduggal/ucth-breast-cancer-dataset?resource=download">https://www.kaggle.com/datasets/anoushkaduggal/ucth-breast-cancer-dataset?resource=download</uri>.
</mixed-citation>
</ref>
<ref id="B18">
<label>18</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author"><collab>UCI Machine Learning Repository (UCIML)</collab>
</person-group>. <source>Breast Cancer Wisconsin (Diagnostic) Dataset</source>. 
<publisher-name>Kaggle</publisher-name>. Available online at: <uri xlink:href="https://www.kaggle.com/datasets/uciml/breast-cancer-wisconsin-data">https://www.kaggle.com/datasets/uciml/breast-cancer-wisconsin-data</uri>.
</mixed-citation>
</ref>
<ref id="B19">
<label>19</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Arravalli</surname> <given-names>T</given-names></name>
<name><surname>Chadaga</surname> <given-names>K</given-names></name>
<name><surname>Muralikrishna</surname> <given-names>H</given-names></name>
<etal/>
</person-group>. 
<article-title>Detection of breast cancer using machine learning and explainable artificial intelligence</article-title>. <source>Sci Rep</source>. (<year>2025</year>) <volume>15</volume>:<fpage>26931</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-025-12644-w</pub-id>, PMID: <pub-id pub-id-type="pmid">40707590</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<label>20</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Islam</surname> <given-names>MN</given-names></name>
<name><surname>Rahman</surname> <given-names>MM</given-names></name>
<name><surname>Shanta</surname> <given-names>SS</given-names></name>
<name><surname>Salakin</surname> <given-names>S</given-names></name>
<name><surname>Khan Akash</surname> <given-names>AA</given-names></name>
<name><surname>Hasan Imam Bijoy</surname> <given-names>M</given-names></name>
</person-group>. (<year>2025</year>). 
<article-title>Breast cancer classification using machine learning techniques with explainable artificial intelligence</article-title>, in: <conf-name>Proc. Int. Conf. on Electrical, Computer and Communication Engineering (ECCE)</conf-name>, <conf-loc>Chittagong, Bangladesh</conf-loc>. pp. <fpage>1</fpage>&#x2013;<lpage>7</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ECCE64574.2025.11013927</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/999577">Nosheen Masood</ext-link>, Fatima Jinnah Women University, Pakistan</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2299603">Keyue Yan</ext-link>, University of Macau, China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3321145">Umair Arif</ext-link>, Xi&#x2019;an Jiaotong University, China</p></fn>
</fn-group>
</back>
</article>