<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Med.</journal-id>
<journal-title>Frontiers in Medicine</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Med.</abbrev-journal-title>
<issn pub-type="epub">2296-858X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmed.2025.1594450</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Medicine</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Reducing misdiagnosis in AI-driven medical diagnostics: a multidimensional framework for technical, ethical, and policy solutions</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Li</surname>
<given-names>Yue</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3004294/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yi</surname>
<given-names>Xin</given-names>
</name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Fu</surname>
<given-names>Jia</given-names>
</name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yang</surname>
<given-names>Yujing</given-names>
</name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Duan</surname>
<given-names>ChuJie</given-names>
</name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Wang</surname>
<given-names>Jun</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>School of Humanities and Social Sciences, Shanxi Medical University</institution>, <addr-line>Jinzhong</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Ideological and Political Education, Shanxi University of Medicine, Medical Humanities Program, Fenyang</institution>, <addr-line>Shanxi Province</addr-line>, <country>China</country></aff>
<aff id="aff3"><sup>3</sup><institution>School of Management, Shanxi Medical University</institution>, <addr-line>Jinzhong</addr-line>, <country>China</country></aff>
<aff id="aff4"><sup>4</sup><institution>Department of Radiation Therapy, Shanxi Cancer Hospital</institution>, <addr-line>Taiyuan</addr-line>, <country>China</country></aff>
<aff id="aff5"><sup>5</sup><institution>Department of Nursing, Shanxi Medical University</institution>, <addr-line>Fenyang</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1549474/overview">Filippo Gibelli</ext-link>, University of Camerino, Italy</p>
</fn>
<fn fn-type="edited-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/987032/overview">Rudra P. Saha</ext-link>, Adamas University, India</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2305680/overview">Sanyam Gandhi</ext-link>, Takeda Development Centers Americas, United States</p>
</fn>
<corresp id="c001">&#x002A;Correspondence: Yue Li, <email>sxjkyxyly@163.com</email>; Jun Wang, <email>wangjylyh@foxmail.com</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>31</day>
<month>10</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>12</volume>
<elocation-id>1594450</elocation-id>
<history>
<date date-type="received">
<day>16</day>
<month>03</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>10</day>
<month>10</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2025 Li, Yi, Fu, Yang, Duan and Wang.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Li, Yi, Fu, Yang, Duan and Wang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec id="sec1">
<title>Purpose</title>
<p>This study aims to systematically identify and address key barriers to misdiagnosis in AI-driven medical diagnostics. The main research question is how technical limitations, ethical concerns, and unclear accountability hinder safe and equitable use of AI in real-world clinical practice, and what integrated solutions can minimize errors and promote trust.</p>
</sec>
<sec id="sec2">
<title>Methods</title>
<p>We conducted a literature review and case analysis across major medical fields, evaluating failure modes such as data pathology, algorithmic bias, and human-AI interaction. Based on these findings, we propose a multidimensional framework combining technical strategies&#x2014;such as dynamic data auditing and explainability engines&#x2014;with ethical and policy interventions, including federated learning for bias mitigation and blockchain-based accountability.</p>
</sec>
<sec id="sec3">
<title>Results</title>
<p>Our analysis shows that misdiagnosis often results from data bias, lack of model transparency, and ambiguous responsibility. When applied to published case examples and comparative evaluations from the literature, elements of our framework are associated with improvements in diagnostic accuracy, transparency, and equity. Key recommendations include bias monitoring, real-time interpretability dashboards, and legal frameworks for shared accountability.</p>
</sec>
<sec id="sec4">
<title>Conclusion</title>
<p>A coordinated, multidimensional approach is essential to reduce the risk of misdiagnosis in AI-supported diagnostics. By integrating robust technical controls, clear ethical guidelines, and defined accountability, our framework provides a practical roadmap for responsible, transparent, and equitable AI adoption in healthcare&#x2014;improving patient safety, clinician trust, and health equity.</p>
</sec>
</abstract>
<kwd-group>
<kwd>artificial intelligence (AI) diagnostics</kwd>
<kwd>misdiagnosis risk</kwd>
<kwd>AI policy and regulation</kwd>
<kwd>patient safety and trust</kwd>
<kwd>ethical responsibility</kwd>
</kwd-group>
<counts>
<fig-count count="1"/>
<table-count count="3"/>
<equation-count count="0"/>
<ref-count count="53"/>
<page-count count="9"/>
<word-count count="7035"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Regulatory Science</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec5">
<label>1</label>
<title>Introduction</title>
<p>The integration of artificial intelligence (AI) into healthcare is transforming diagnostic workflows. Machine-learning models now deliver faster and more accurate image interpretation than traditional methods across oncology, cardiology, and radiology (<xref ref-type="bibr" rid="ref1">1</xref>, <xref ref-type="bibr" rid="ref2">2</xref>). Deep-learning systems such as convolutional neural networks (CNNs) can achieve expert-level performance in controlled settings&#x2014;for example, melanoma detection AUCs exceeding 0.94 (<xref ref-type="bibr" rid="ref3">3</xref>)&#x2014;and they show promise for expanding early cancer diagnosis in resource-limited settings (<xref ref-type="bibr" rid="ref4">4</xref>). Yet these technical achievements do not translate seamlessly to everyday clinical care. Despite benchmark accuracies as high as 94.5% (<xref ref-type="bibr" rid="ref5">5</xref>), real-world deployments often reveal performance drops of 15&#x2013;30% due to population shifts and integration barriers (<xref ref-type="bibr" rid="ref6">6</xref>).</p>
<p>The adoption of AI in diagnostics introduces systemic risks that current governance frameworks are ill-equipped to manage. The World Health Organization defines misdiagnosis as the failure to accurately identify or communicate a patient&#x2019;s condition (<xref ref-type="bibr" rid="ref7">7</xref>). Algorithmic opacity and bias further compound this risk. For instance, underrepresentation of rural populations in training datasets has been linked to a 23% higher false-negative rate for pneumonia detection, while melanoma detection errors are more prevalent among dark-skinned patients due to dataset imbalances (<xref ref-type="bibr" rid="ref8">8</xref>). Additionally, overfitting and spurious correlations can lead to clinically significant false positives, as observed in breast cancer screening (<xref ref-type="bibr" rid="ref9">9</xref>). Two factors exacerbate these challenges: (1) the &#x201C;black-box&#x201D; nature of many AI models, which limits error traceability and undermines clinician trust (<xref ref-type="bibr" rid="ref10">10</xref>), and (2) blurred lines of accountability among developers, clinicians, and healthcare institutions. We categorize these issues into three failure modes&#x2014;data pathology, algorithmic bias, and human&#x2013;AI interaction&#x2014;outlined in <xref ref-type="table" rid="tab1">Table 1</xref>, which links technical root causes to their clinical consequences.</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Failure modes and root causes of AI misdiagnosis: a technical-clinical analysis.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Failure mode</th>
<th align="left" valign="top">Technical root cause</th>
<th align="left" valign="top">Clinical manifestation</th>
<th align="left" valign="top">Empirical evidence</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Data pathology</td>
<td align="left" valign="top">Sampling bias in training data</td>
<td align="left" valign="top">Under diagnosis in underrepresented subgroups</td>
<td align="left" valign="top">28% higher FN rates for dark-skinned melanoma cases (<xref ref-type="bibr" rid="ref13">13</xref>)</td>
</tr>
<tr>
<td align="left" valign="top">Algorithmic bias</td>
<td align="left" valign="top">Overfitting to spurious correlations</td>
<td align="left" valign="top">Over diagnosis of benign nodules as malignant</td>
<td align="left" valign="top">22% FP increase in lung CT analysis (<xref ref-type="bibr" rid="ref14">14</xref>)</td>
</tr>
<tr>
<td align="left" valign="top">Human-AI interaction</td>
<td align="left" valign="top">Automation complacency among clinicians</td>
<td align="left" valign="top">Delayed correction of AI errors</td>
<td align="left" valign="top">41% slower error identification vs. human-only workflows (<xref ref-type="bibr" rid="ref15">15</xref>)</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Implementing real-time bias monitoring and interpretability dashboards is crucial to mitigating these issues, but the feasibility and infrastructure requirements must be carefully considered. While these tools could enhance transparency and trust, their deployment in resource-limited settings may face challenges related to cost, data infrastructure, and technical expertise. For hospitals in low-resource regions, the implementation of such technologies could require significant investments in both hardware and training. Therefore, policy recommendations must account for the scalability of these tools, with phased rollouts and tailored strategies to ensure accessibility and effectiveness across various healthcare settings. As noted by Smith and Fotheringham, current liability frameworks inadequately address this tripartite accountability gap, potentially exacerbating health disparities. In line with this, a 2023 study in JAMA found that AI misdiagnosis rates for minority patients were 31% higher than for majority patients in critical care settings (<xref ref-type="bibr" rid="ref11">11</xref>, <xref ref-type="bibr" rid="ref12">12</xref>).</p>
<p>This study addresses these gaps by presenting an integrated framework to reduce AI-related misdiagnosis in real-world care. The framework couples (i) bias-aware data curation; (ii) a hybrid explainability engine that combines gradient-based saliency (e.g., Grad-CAM, Integrated Gradients) with a structural causal model (SCM), aligns the top-k% salient regions with SCM variables, and runs counterfactual/ablation queries with faithfulness checks (deletion/insertion) to yield concise, clinician-facing rationales; (iii) dynamic data auditing via federated learning, whereby each site computes subgroup-stratified metrics (AUC, sensitivity/specificity, ECE, FPR/FNR) locally and shares privacy-preserving aggregates to monitor drift (PSI, KL) and fairness (&#x0394;FNR), with threshold-based alerts and returned reweighting/sampling quotas to mitigate representation disparities; and (iv) accountability-by-design instruments, including versioned model fact sheets and on-chain hashing of artifacts with pointers to off-chain logs for auditor verification. A schematic overview appears in <xref rid="SM1" ref-type="supplementary-material">Supplementary Figure S1</xref> (S1A, hybrid explainability; S1B, blockchain-anchored accountability and data flows; S1C, federated learning&#x2013;based dynamic auditing). Because the work involves no patient intervention or prospective enrollment, clinical trial registration is not applicable.</p>
</sec>
<sec id="sec6">
<label>2</label>
<title>Failure modes and risk analysis in AI-based medical diagnosis</title>
<p>Scope of evidence. This is a narrative synthesis and framework paper based on peer-reviewed studies and case analyses; no primary multi-center trial was performed by the authors. Quantitative values cited (e.g., error gaps) reflect external sources explicitly referenced in the text.</p>
<sec id="sec7">
<label>2.1</label>
<title>Three interdependent failure modes</title>
<p>AI diagnostic errors can be traced to three interdependent failure modes, each demanding targeted mitigation. First, data pathology&#x2014;driven by sampling biases&#x2014;leads to systematic underdiagnosis in minority or underrepresented groups, as seen in elevated false-negative rates among dark-skinned patients (<xref ref-type="bibr" rid="ref13">13</xref>). Second, algorithmic bias&#x2014;often caused by overfitting to spurious patterns in training data&#x2014;results in clinically significant false positives, such as unnecessary treatment for benign findings (<xref ref-type="bibr" rid="ref14">14</xref>). Third, human-AI interaction issues, such as automation complacency or overreliance, can slow down error detection and correction, as demonstrated by delays in clinical workflows when AI is blindly trusted or ignored (<xref ref-type="bibr" rid="ref15">15</xref>).</p>
<p>Although advanced models such as Vision Transformers can achieve impressive accuracy&#x2014;for example, an AUC of 0.97 in retinal disease detection (<xref ref-type="bibr" rid="ref16">16</xref>)&#x2014;their lack of interpretability remains a major barrier. Clinicians require 2.3 times longer to audit deep neural network (DNN) decisions compared to traditional rule-based systems (<xref ref-type="bibr" rid="ref17">17</xref>), and 34% of radiologists report overriding correct AI recommendations due to distrust in opaque outputs (<xref ref-type="bibr" rid="ref18">18</xref>). This underutilization and propagation of errors highlight a critical paradox: as AI models become more powerful, the risks of misdiagnosis, inequity, and accountability gaps can actually increase if transparency and trust are not systematically addressed.</p>
<p>As depicted in <xref ref-type="fig" rid="fig1">Figure 1</xref>, the end-to-end AI diagnostic workflow&#x2014;from data collection and model training to clinical application and iterative feedback&#x2014;includes several points where technical flaws and systemic biases can be introduced and amplified. Each stage represents a potential vulnerability, capable of propagating errors throughout the entire diagnostic process. These interconnected risks underscore the urgent need for solutions that not only enhance technical performance, but also explicitly address the ethical, legal, and operational challenges unique to AI in healthcare.</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>Key workflow of the AI diagnostic system, highlighting critical stages from data collection and model development to clinical deployment and feedback optimization, where technical and ethical vulnerabilities may arise.</p>
</caption>
<graphic xlink:href="fmed-12-1594450-g001.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Flowchart titled "Workflow of AI in Medical Diagnostics" showing four main stages: Data Collection, Model Training, Diagnostic Application, and Feedback Optimization. Data Collection involves gathering electronic health records, medical imaging, genomic, and clinical trial data, followed by cleaning and preprocessing. Model Training includes data splitting, selection, optimization, and iterative training. Diagnostic Application covers model validation, testing, deployment, and real-time diagnostics. Feedback Optimization involves collecting clinician feedback, data logging, and retraining to improve model accuracy and reliability. The process forms a continuous loop with updates and retraining.</alt-text>
</graphic>
</fig>
</sec>
<sec id="sec8">
<label>2.2</label>
<title>Data quality, diversity, and accountability in AI diagnostics</title>
<p>The reliability and fairness of AI diagnostics rest on three pillars: data quality and diversity, algorithmic interpretability, and rigorous validation. High-quality, representative data are crucial to avoid systematic disadvantages for minorities. Complex models boost accuracy but may obscure reasoning, limiting clinicians&#x2019; ability to verify diagnoses. Rigorous testing, including cross-validation on diverse datasets and real-world clinical trials, is essential to confirm safety and build trust. <xref ref-type="table" rid="tab2">Table 2</xref> summarizes performance and persistent challenges across key medical fields, providing context for targeted improvements.</p>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>Comparison of AI diagnostic performance across different medical fields.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Diagnostic field</th>
<th align="left" valign="top">Application</th>
<th align="center" valign="top">Diagnostic accuracy</th>
<th align="left" valign="top">Speed</th>
<th align="left" valign="top">Strengths</th>
<th align="left" valign="top">Challenges</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Dermatology</td>
<td align="left" valign="top">Skin cancer detection</td>
<td align="center" valign="top">90&#x2013;95%</td>
<td align="left" valign="top">Significantly faster than biopsy</td>
<td align="left" valign="top">High accuracy for melanoma; valuable for early detection</td>
<td align="left" valign="top">Struggles with atypical cases and non-Caucasian skin due to data bias (<xref ref-type="bibr" rid="ref41">41</xref>, <xref ref-type="bibr" rid="ref33">33</xref>)</td>
</tr>
<tr>
<td align="left" valign="middle">Radiology</td>
<td align="left" valign="top">Lung cancer detection</td>
<td align="center" valign="top">85&#x2013;95%</td>
<td align="left" valign="top">&#x003C;1&#x202F;min per image</td>
<td align="left" valign="top">Sensitive to small nodules; reduces radiologist workload</td>
<td align="left" valign="top">Needs high-quality images; susceptible to motion artifacts (<xref ref-type="bibr" rid="ref14">14</xref>, <xref ref-type="bibr" rid="ref42">42</xref>)</td>
</tr>
<tr>
<td align="left" valign="middle">Ophthalmology</td>
<td align="left" valign="top">Diabetic retinopathy screening</td>
<td align="center" valign="top">90&#x2013;98%</td>
<td align="left" valign="top">Immediate (seconds)</td>
<td align="left" valign="top">Enables mass screening; accurate in staging progression</td>
<td align="left" valign="top">May miss atypical cases; limited by dataset diversity (<xref ref-type="bibr" rid="ref43">43</xref>, <xref ref-type="bibr" rid="ref44">44</xref>)</td>
</tr>
<tr>
<td align="left" valign="middle">Cardiology</td>
<td align="left" valign="top">ECG interpretation for arrhythmias</td>
<td align="center" valign="top">85&#x2013;92%</td>
<td align="left" valign="top">Real-time analysis</td>
<td align="left" valign="top">Supports continuous monitoring; aids early detection</td>
<td align="left" valign="top">Prone to errors in complex or mixed arrhythmias (<xref ref-type="bibr" rid="ref45">45</xref>)</td>
</tr>
<tr>
<td align="left" valign="middle">Pathology</td>
<td align="left" valign="top">Histopathology for cancer diagnosis</td>
<td align="center" valign="top">90&#x2013;97%</td>
<td align="left" valign="top">Faster than human review</td>
<td align="left" valign="top">High sensitivity; helps prioritize critical cases</td>
<td align="left" valign="top">Limited interpretability; risk of over-reliance (<xref ref-type="bibr" rid="ref46 ref47 ref48">46&#x2013;48</xref>)</td>
</tr>
<tr>
<td align="left" valign="middle">Pulmonology</td>
<td align="left" valign="top">Pneumonia Diagnosis via Chest X-Ray</td>
<td align="center" valign="top">85&#x2013;93%</td>
<td align="left" valign="top">Immediate (seconds)</td>
<td align="left" valign="top">Effective for rapid triage in emergencies</td>
<td align="left" valign="top">Challenged by overlapping symptoms; sensitive to image quality (<xref ref-type="bibr" rid="ref49">49</xref>, <xref ref-type="bibr" rid="ref50">50</xref>)</td>
</tr>
<tr>
<td align="left" valign="middle">Neurology</td>
<td align="left" valign="top">Stroke Detection on MRI/CT</td>
<td align="center" valign="top">88&#x2013;94%</td>
<td align="left" valign="top">Rapid pre-processing</td>
<td align="left" valign="top">High accuracy for ischemic/hemorrhagic stroke; time-sensitive</td>
<td align="left" valign="top">Limited diverse datasets; interpretability issues (<xref ref-type="bibr" rid="ref51 ref52 ref53">51&#x2013;53</xref>)</td>
</tr>
</tbody>
</table>
</table-wrap>
<sec id="sec9">
<label>2.2.1</label>
<title>Data quality and diversity</title>
<p>High-quality, diverse datasets are essential for robust AI performance. If training data are noisy, incomplete, or lack representation from certain racial, age, or geographic groups, models may perform well on some patients but poorly on others, systematically disadvantaging marginalized populations (<xref ref-type="bibr" rid="ref19 ref20 ref21">19&#x2013;21</xref>). For example, suboptimal medical imaging data, including artifacts or poor resolution, can mislead AI systems, leading to diagnostic errors (<xref ref-type="bibr" rid="ref22">22</xref>, <xref ref-type="bibr" rid="ref23">23</xref>). Inadequate data can lead to diagnostic errors, reduce generalizability, and worsen health inequities.</p>
</sec>
<sec id="sec10">
<label>2.2.2</label>
<title>Algorithmic complexity and interpretability</title>
<p>While advanced deep learning models can surpass human experts in detecting subtle clinical patterns, their complexity often comes at the expense of interpretability. Overfitting to spurious details in training data can cause unreliable predictions in new populations (<xref ref-type="bibr" rid="ref24 ref25 ref26 ref27">24&#x2013;27</xref>). The &#x201C;black-box&#x201D; nature of many models makes it difficult for clinicians to understand, verify, or challenge AI-generated diagnoses, eroding trust and increasing the risk of undetected errors (<xref ref-type="bibr" rid="ref28">28</xref>, <xref ref-type="bibr" rid="ref29">29</xref>). Techniques such as LIME and SHAP improve transparency, but typically offer only partial insights.</p>
</sec>
<sec id="sec11">
<label>2.2.3</label>
<title>Model testing and validation</title>
<p>Thorough external validation, including cross-validation across subgroups and prospective real-world clinical trials, is critical for ensuring AI safety and reliability. Using specialized metrics&#x2014;such as sensitivity, specificity, and precision-recall&#x2014;helps confirm performance in clinically relevant terms (<xref ref-type="bibr" rid="ref30">30</xref>, <xref ref-type="bibr" rid="ref31">31</xref>). Following these best practices builds trust among both clinicians and patients.</p>
<p>In summary, progress in these technical domains&#x2014;data curation, interpretability, and robust validation&#x2014;is essential to minimize misdiagnosis risk (<xref ref-type="bibr" rid="ref28">28</xref>). However, technical safeguards alone are not enough. Without clear ethical and legal frameworks, ambiguity in responsibility and accountability can persist, leaving patients vulnerable. The next section addresses these broader challenges, focusing on how responsibility should be allocated and safeguarded in AI-powered healthcare.</p>
</sec>
</sec>
</sec>
<sec id="sec12">
<label>3</label>
<title>Ethical and legal responsibility allocation in AI diagnostic errors</title>
<p>Technical safeguards alone are insufficient. Ethical and legal responsibility must be clearly defined to protect patients and ensure accountability in AI-assisted medicine. Ensuring responsible and equitable use of AI in diagnostics is not only a technical challenge, but also a profound ethical and legal issue. This section addresses three critical areas: patient safety and equity, accountability gaps among stakeholders, and the evolving standards for patient rights and informed consent.</p>
<sec id="sec13">
<label>3.1</label>
<title>Patient safety and equity: the ethical stakes of AI misdiagnosis</title>
<p>As AI becomes deeply embedded in clinical diagnostics, misdiagnosis is no longer just a technical failure&#x2014;it raises fundamental ethical concerns about patient safety and health equity. Diagnostic errors can result in delayed, inappropriate, or unnecessary treatment, directly harming patients. The consequences are often worst for marginalized groups: when AI systems trained on unbalanced datasets underperform for underrepresented populations, existing health disparities are not just maintained&#x2014;they are made worse (<xref ref-type="bibr" rid="ref32">32</xref>, <xref ref-type="bibr" rid="ref33">33</xref>). Thus, ensuring justice and fairness in AI-supported diagnosis is both an ethical imperative and a technical challenge.</p>
</sec>
<sec id="sec14">
<label>3.2</label>
<title>Accountability gaps: roles of developers, institutions, and clinicians</title>
<p>Responsibility for AI errors in healthcare remains ill-defined. Developers are tasked with designing transparent, reliable, and validated systems, yet they rarely interact with patients or clinical realities. Healthcare institutions choose and deploy AI tools, integrate them into clinical workflows, and train staff&#x2014;but few have established procedures for monitoring, post-market surveillance, or incident response. Clinicians make final care decisions, but may not fully understand or be able to challenge &#x201C;black-box&#x201D; model outputs, yet still bear legal and ethical liability. Without clear regulatory frameworks, these overlapping roles lead to confusion, inconsistency, and increased patient safety risks. Practical, shared accountability frameworks tailored to the unique risks of AI-driven medicine are urgently needed.</p>
</sec>
<sec id="sec15">
<label>3.3</label>
<title>Patient rights and informed consent in the age of AI</title>
<p>AI-assisted diagnosis introduces new complexities to informed consent. Patients should be told how AI informs their care, its benefits and limitations, and any risks&#x2014;especially those stemming from model bias or limited explainability. Communicating the workings of opaque models to non-experts is difficult but essential to maintain trust and protect autonomy. In some settings, AI may be the only diagnostic tool available, further reducing patient choice. Ongoing data use by AI systems also raises privacy concerns, making clear, accessible communication about data use and patient rights crucial. Informed consent procedures must be updated to reflect these realities, safeguarding patient interests as AI becomes more prevalent in healthcare.</p>
<p>Practical strategies (&#x2248;60&#x2013;90&#x202F;s). We adopt a layered, risk-tiered consent approach that fits typical visit time constraints: (i) a one-sentence disclosure (&#x201C;An AI system will assist your clinician; a human remains responsible for your care.&#x201D;); (ii) a 30-s &#x201C;AI Fact Label&#x201D; in plain language summarizing intended use, key limitations, and any subgroup caveats (e.g., performance may differ in patients &#x003E;75&#x202F;years); and (iii) an optional deep-dive explanation accessible via QR/EHR link. Understanding is checked with a brief teach-back (&#x201C;In your own words, what does the AI add and what are its limits?&#x201D;). Patients are offered a clear opt-out/human-only review path without penalty. The consent artifact records data use/retention policies and model name/version, and is stored in the EHR. Materials are translated where needed and designed for low health-literacy; in emergencies, deferred consent is documented and completed at the earliest opportunity.</p>
<p>Patient and stakeholder input. To incorporate patient perspectives, we propose a brief, clinic-compatible engagement loop: (i) a 3-item comprehension check after consent (e.g., role of AI, key limits, human-override) and a 5-point trust/clarity rating; (ii) optional focus groups (30&#x2013;45&#x202F;min, purposive sampling across age, education, and rurality) to surface concerns and language preferences; and (iii) an auditable EHR record of consent outcomes (accept, opt-out, request human-only review), model/version, and timestamp. Aggregate indicators (e.g., comprehension &#x2265;80%, median trust &#x2265;4/5, opt-out and human-only rates) are reported at the service line and site level to guide content and UI refinements. Materials target &#x2264;8th-grade reading level and are translated as needed. (No new patient data are presented here; future implementations will seek local IRB approval or exemption as appropriate.)</p>
</sec>
</sec>
<sec id="sec16">
<label>4</label>
<title>The role of transparency and explainability in reducing AI misdiagnosis</title>
<sec id="sec17">
<label>4.1</label>
<title>Why transparency matters</title>
<p>Building on 2.1&#x2013;2.2&#x2014;which detail how data pathology and model opacity contribute to diagnostic error&#x2014;this section focuses on practice-facing safeguards. Transparency is essential for trustworthy AI in medical diagnostics: clinicians who understand how recommendations are generated can validate and act on them more reliably. Providing clear explanations enables secondary review, helping detect hidden errors and improving patient outcomes (<xref ref-type="bibr" rid="ref19">19</xref>, <xref ref-type="bibr" rid="ref24">24</xref>, <xref ref-type="bibr" rid="ref25">25</xref>, <xref ref-type="bibr" rid="ref34">34</xref>). To avoid the twin pitfalls of undue skepticism and blind trust that can arise with opaque &#x201C;black-box&#x201D; models (<xref ref-type="bibr" rid="ref35">35</xref>), explanations should be concise and point-of-care (e.g., a non-blocking saliency overlay plus a one-sentence causal rationale), paired with explicit statements of system limits and subgroup caveats, and an auditable record of model/version and rationale in the EHR. Such transparency anchors accountability and clarifies when and how AI should be used in practice.</p>
</sec>
<sec id="sec18">
<label>4.2</label>
<title>Explainability techniques in practice</title>
<p>Explainability techniques like LIME and SHAP have shown real-world utility in clinical AI workflows. In a retinoblastoma detection study using an InceptionV3 model on balanced cohorts (400 tumorous / 400 normal fundus images), both methods effectively revealed model logic: LIME highlighted tumor regions in individual cases, while SHAP provided feature importance scores across the dataset. This dual insight improved transparency and boosted clinician trust (<xref ref-type="bibr" rid="ref36">36</xref>).</p>
<p>Similarly, in acute stroke modeling based on random forest or XGBoost, SHAP waterfall plots identified risk contributors such as elevated blood glucose, age, and cerebral blood flow; LIME, meanwhile, localized CT regions that most influenced individual predictions (<xref ref-type="bibr" rid="ref37">37</xref>). These cases highlight how layered explanations can both guide clinicians and validate AI models.</p>
<p>However, LIME may over-simplify by approximating only locally, and SHAP is often computationally heavy and struggles with feature collinearity&#x2014;making it less suitable for time-sensitive scenarios (<xref ref-type="bibr" rid="ref38">38</xref>). Both methods may also miss high-dimensional feature interactions intrinsic to deep neural networks. To address these gaps, we operationalize a hybrid engine that couples gradient-based saliency with an SCM-based causal layer supporting counterfactual queries and ROI ablations; faithfulness and sparsity are monitored to ensure explanations remain clinically actionable (see <xref rid="SM1" ref-type="supplementary-material">Supplementary Figure S1A</xref>).</p>
<p>Limitations and safeguards. Gradient-based saliency can be sensitive to noise, preprocessing, and ROI thresholds; the SCM layer introduces assumption dependence, and counterfactuals are model-based rather than interventional. We therefore log deletion/insertion faithfulness scores, enforce sparsity, flag saliency&#x2013;SCM discordance for review, and present explanations as non-blocking overlays to avoid workflow disruption.</p>
<p>Trade-offs and model choice. Where an intrinsically interpretable model (e.g., sparse linear/rule-based or GAM-style) attains performance within a small tolerance of a complex model (e.g., &#x0394;AUC &#x2264; 0.01&#x2013;0.02 with comparable calibration/fairness), we prioritize the interpretable model for primary use. When a black-box delivers material performance gains, we retain it with guardrails&#x2014;pre-deployment faithfulness/stability checks and time budgets, real-time rationale overlays, and prospective monitoring of accuracy, calibration, fairness gaps, and decision latency&#x2014;while documenting the accuracy&#x2013;interpretability trade-off in the model&#x2019;s fact sheet and patient-facing materials.</p>
</sec>
<sec id="sec19">
<label>4.3</label>
<title>Patient communication and ethical integration</title>
<p>Transparency in AI is incomplete unless clinicians can translate model reasoning into understandable dialog with patients. This includes clearly explaining AI&#x2019;s role in the diagnostic process, its capabilities, and its limitations&#x2014;particularly when performance disparities exist across age groups or demographic segments. For example, saying &#x201C;This AI system achieves 97% accuracy overall, but it may be less reliable for patients over 75&#x202F;years old&#x201D; helps contextualize results, supports informed consent, and reinforces patient autonomy (<xref ref-type="bibr" rid="ref39">39</xref>). However, explanations must fit clinical workflow constraints. Under pressure, clinicians may lack time to tailor messages; without concise summaries&#x2014;such as visual markers, standard interpretability labels, or dashboards&#x2014;technical details risk becoming noise rather than enhancing trust.</p>
<p>Consent-in-practice protocol. At the point of care, clinicians: (1) give the one-sentence disclosure and the AI Fact Label; (2) present a concise rationale from the explainability view (e.g., a saliency overlay plus a one-sentence causal path); (3) perform a teach-back confirmation; and (4) record consent in the EHR, including model/version, date/time, and whether the patient requested human-only review. Explanations are delivered as non-blocking overlays to avoid workflow disruption; language access tools and templated scripts support consistency. In summary, transparency and explainability are not just technical enhancements&#x2014;they are prerequisites for trust, accountability, and equity in AI-enabled care, and they can be operationalized with brief, standardized communication steps.</p>
<p>Feedback loop and continuous improvement. Patient-reported metrics (comprehension, trust/clarity, perceived usefulness of explanations) and operational signals (time burden, opt-out/human-only rates, teach-back success) are summarized on the communication dashboard and reviewed in monthly huddles with a patient advisory panel. Iterations prioritize brevity and clarity (&#x2264;90&#x202F;s), accessibility (language and format), and equity checks (stratified by age, education, and rurality). Changes to the consent script or UI are versioned and time-stamped to maintain an auditable trail.</p>
</sec>
</sec>
<sec id="sec20">
<label>5</label>
<title>Recommendations and future directions for improving AI diagnostic systems</title>
<sec id="sec21">
<label>5.1</label>
<title>Technical and ethical strategies to reduce misdiagnosis</title>
<p>Reducing misdiagnosis in AI diagnostics requires both robust technical controls and clear ethical guidelines. First, AI models should be trained on large, diverse datasets that reflect differences in age, ethnicity, and geography, to minimize bias and ensure generalizability. Rigorous validation&#x2014;using cross-validation, independent test sets, and real-world clinical trials&#x2014;is critical for uncovering hidden errors and establishing reliability. Furthermore, explainability and transparency must be integrated at every stage of model development. Tools like LIME and SHAP enable clinicians to better understand and trust AI recommendations, making it easier to detect and correct mistakes (<xref ref-type="bibr" rid="ref40">40</xref>). Combining technical rigor with interpretability is essential for safe and effective clinical use of AI.</p>
<sec id="sec22">
<label>5.1.1</label>
<title>Scaling solutions in low-resource settings</title>
<p>Implementing solutions such as blockchain contracts and federated learning audits in diverse healthcare systems, especially those with limited resources, requires careful consideration of feasibility and cost. In low-resource settings, the adoption of these technologies can be challenging due to the required infrastructure, technical expertise, and financial investment. Blockchain-anchored accountability systems, for instance, can introduce costs related to storage, key management, and throughput. We propose a phased implementation approach to scale these tools effectively, starting with pilot projects to assess their viability before broader deployment. By leveraging lightweight blockchain models that store only hashes and timestamps on-chain, we can reduce the data storage requirements, keeping detailed records off-chain and thus minimizing infrastructure costs.</p>
<p>For federated learning audits, which allow healthcare sites to collaborate while preserving data privacy, we recommend starting with local data audits. Each site computes subgroup-stratified metrics and shares privacy-preserving aggregates, which minimizes the need for large-scale computational resources while still enabling essential monitoring functions such as bias detection and data drift monitoring. This approach is particularly suited for resource-constrained settings, where large infrastructure investments are not feasible. We also recommend secure aggregation protocols to mitigate the risks and costs associated with federated learning by minimizing the volume of data transmitted and reducing network overhead. As these audits are scaled, cloud-based solutions could be considered for integrating data from multiple sites without compromising privacy.</p>
</sec>
<sec id="sec23">
<label>5.1.2</label>
<title>Model choice and governance (complexity&#x2013;interpretability trade-offs)</title>
<p>The preference should be for the simplest adequate model that meets clinical targets, especially in resource-limited settings where computational power and infrastructure are constrained. When an intrinsically interpretable model (e.g., sparse linear/rule-based, GAM-style) performs similarly to a more complex alternative (e.g., &#x0394;AUC &#x2264; 0.01&#x2013;0.02 with comparable calibration/fairness), prioritizing the interpretable model helps preserve transparency and reduce resource demands. If a complex, black-box model is necessary for significant performance gains, it is crucial to document the trade-off between accuracy and interpretability in the model fact sheet, specifying clinician-facing explanations and response-time budgets.</p>
<p>Moreover, to ensure that hospitals are ready for deployment, we suggest implementing training programs for clinicians on using blockchain contracts and federated learning systems. Hospitals should focus on educating their clinical staff about the basics of blockchain technology and its use in verifying AI model outputs. Training should include practical demonstrations of how to access blockchain contract logs and use federated learning data audits effectively. This training can be integrated into existing educational programs and can be delivered through workshops or online tutorials. Ensuring that clinicians are familiar with these technologies will promote their adoption and reduce resistance to using these advanced tools in day-to-day workflows.</p>
<p>Prospective monitoring of model performance, including accuracy, calibration, fairness gaps, and decision latency, should be implemented, with human-override options in place if necessary. Periodic reassessment of the model&#x2019;s performance can help guide decisions about potential simplification to preserve transparency and workflow efficiency. This ensures that the AI system remains effective, interpretable, and scalable in diverse healthcare environments, especially in low-resource settings.</p>
</sec>
</sec>
<sec id="sec24">
<label>5.2</label>
<title>Clarifying responsibility and evolving legal standards</title>
<p>A clear and shared framework for responsibility is urgently needed as AI becomes central to medical diagnostics. Developers must be accountable for model reliability, transparency, and communicating known risks or limitations. Healthcare institutions should evaluate AI tools before deployment, provide clinician training, and monitor ongoing performance, intervening when safety issues arise. Clinicians, while ultimately responsible for patient care, should not be held solely liable for errors that originate from opaque AI models. Regulators must update legal standards and create practical guidelines that distribute accountability fairly and reflect the complexities of AI-assisted medicine.</p>
</sec>
<sec id="sec25">
<label>5.3</label>
<title>Advancing ethical standards and policy implementation</title>
<p>Creating a fair and effective AI diagnostic ecosystem requires ongoing collaboration among developers, healthcare providers, policymakers, and ethicists. Ethical standards should mandate fairness, transparency, and respect for patient rights, building on principles such as justice and beneficence. Policies should require data transparency, regular audits for bias, and public disclosure of system limitations. Continuous regulatory oversight is necessary to prevent health disparities and to ensure that technical progress is matched by ethical responsibility. <xref ref-type="table" rid="tab3">Table 3</xref> provides a consolidated summary of strategic recommendations for enhancing AI diagnostic systems. It outlines technical improvements, ethical considerations, and policy initiatives to guide stakeholders toward a safer, more transparent, and equitable diagnostic framework.</p>
<table-wrap position="float" id="tab3">
<label>Table 3</label>
<caption>
<p>Summary of strategic recommendations for enhancing AI diagnostic systems.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Category</th>
<th align="left" valign="top">Issue</th>
<th align="left" valign="top">Strategy/recommendation</th>
<th align="left" valign="top">Description</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle" rowspan="8">Technical improvements</td>
<td align="left" valign="middle" rowspan="3">Data quality &#x0026; diversity</td>
<td align="left" valign="middle">Data augmentation</td>
<td align="left" valign="middle">Use methods like image rotation, noise addition, and synthetic data to improve diversity.</td>
</tr>
<tr>
<td align="left" valign="middle">Dataset expansion</td>
<td align="left" valign="middle">Include a broad range of demographics, disease types, and medical contexts.</td>
</tr>
<tr>
<td align="left" valign="middle">Data standardization</td>
<td align="left" valign="middle">Standardize labeling and preprocessing to reduce noise and boost accuracy.</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="3">Model complexity</td>
<td align="left" valign="middle">Algorithm optimization</td>
<td align="left" valign="middle">Apply regularization to prevent overfitting and improve generalizability.</td>
</tr>
<tr>
<td align="left" valign="middle">Explainability tools</td>
<td align="left" valign="middle">Integrate SHAP and LIME for better model interpretability.</td>
</tr>
<tr>
<td align="left" valign="middle">Ensemble modeling</td>
<td align="left" valign="middle">Combine multiple models to increase robustness and reduce errors.</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">Validation</td>
<td align="left" valign="middle">Cross-validation with diverse data</td>
<td align="left" valign="middle">Validate models on data from different sources and demographics.</td>
</tr>
<tr>
<td align="left" valign="middle">Real-world clinical testing</td>
<td align="left" valign="middle">Deploy models in pilot studies to detect practical limitations early.</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="4">Ethical suggestions</td>
<td align="left" valign="middle" rowspan="2">Transparency &#x0026; trust</td>
<td align="left" valign="middle">Data transparency</td>
<td align="left" valign="middle">Disclose data sources, limitations, and processing steps to users.</td>
</tr>
<tr>
<td align="left" valign="middle">Bias monitoring</td>
<td align="left" valign="middle">Regularly check for and correct bias against underrepresented groups.</td>
</tr>
<tr>
<td align="left" valign="middle">Patient consent</td>
<td align="left" valign="middle">Informed consent enhancements</td>
<td align="left" valign="middle">Ensure patients understand AI&#x2019;s role, limitations, and risks.</td>
</tr>
<tr>
<td align="left" valign="middle">Equity in diagnosis</td>
<td align="left" valign="middle">Inclusive dataset representation</td>
<td align="left" valign="middle">Prioritize diverse data collection to improve fairness.</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="5">Policy actions</td>
<td align="left" valign="middle" rowspan="3">Responsibility allocation</td>
<td align="left" valign="middle">Accountability framework</td>
<td align="left" valign="middle">Clearly define roles for developers, institutions, and clinicians.</td>
</tr>
<tr>
<td align="left" valign="middle">Guidelines for AI deployment</td>
<td align="left" valign="middle">Set standards for safe AI integration, training, and support.</td>
</tr>
<tr>
<td align="left" valign="middle">Regular audits</td>
<td align="left" valign="middle">Periodically assess AI performance and address bias or risk.</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">Patient safety</td>
<td align="left" valign="middle">AI Performance standards</td>
<td align="left" valign="middle">Establish accuracy, sensitivity, and specificity benchmarks.</td>
</tr>
<tr>
<td align="left" valign="middle">Ethics and compliance training</td>
<td align="left" valign="middle">Train staff in AI ethics, safety, and compliance.</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Fostering collaboration throughout the AI development lifecycle is crucial for building diagnostic systems that truly serve diverse patient needs. Open-source platforms&#x2014;such as those pioneered by the Hugging Face community&#x2014;improve transparency and accountability by making AI models and datasets available for broader review and improvement. Policymakers should also support the adoption of Explainable AI (XAI) frameworks, which make model logic visible and actionable for clinicians and patients alike, directly addressing the &#x201C;black box&#x201D; problem and enabling safer, more equitable diagnostic care.</p>
</sec>
<sec id="sec26">
<label>5.4</label>
<title>Framework validation roadmap</title>
<p>Validation will proceed in three steps: (i) Feasibility/shadow-mode pilots (1&#x2013;3 sites) to test non-blocking explainability, bias monitoring, and governance under predefined time budgets; endpoints include calibration (ECE/Brier), discrimination (AUROC), fairness gaps (&#x0394;FNR/&#x0394;AUC), alert precision/recall, and clinician verification time. (ii) Retrospective offline replay with de-identified EHR/imaging streams to stress-test drift detectors (PSI/KL), subgroup metrics, and ledger throughput; report false-alert rate, time-to-detection, and triage effort. (iii) Prospective pragmatic evaluation (cluster A/B or stepped-wedge) comparing standard care versus framework-augmented workflows; primary outcome: misdiagnosis composite; secondary outcomes: decision latency, override rates, calibration/fairness, and patient comprehension. All studies will be pre-registered, include privacy-impact and cost/infrastructure logs, and&#x2014;where resources are limited&#x2014;use lightweight deployments (local audits, secure aggregation, hash-only ledger anchoring).</p>
<p>This study has several limitations. First, it presents a conceptual framework supported by a narrative synthesis and secondary sources; it does not include original data collection or prospective clinical trials. Second, reliance on published reports and case descriptions introduces risks of citation and publication bias. Third, the framework&#x2019;s components&#x2014;bias-aware curation, hybrid explainability, federated audits, and blockchain-anchored accountability&#x2014;are not empirically validated here; their performance, costs, and workflow impact may vary across settings. Finally, generalizability is uncertain, especially in low-resource environments with heterogeneous infrastructure and policies. These limitations motivate the validation roadmap outlined below.</p>
</sec>
</sec>
<sec sec-type="conclusions" id="sec27">
<label>6</label>
<title>Conclusion</title>
<p>The integration of AI into medical diagnostics holds great promise for improving accuracy, efficiency, and personalized care, but it also introduces risks of misdiagnosis driven by technical limits, model opacity, and diffuse responsibility. This study identifies three core barriers&#x2014;data bias, lack of transparency, and ambiguous accountability&#x2014;and advances a coordinated response across technical, ethical, and policy domains. Technically, we call for diverse, representative datasets, rigorous external validation, and explainability that is usable at the point of care (e.g., non-blocking overlays with concise rationales), while explicitly managing the complexity&#x2013;interpretability trade-off by preferring the simplest adequate model and documenting guardrails when black-box models are used. Ethically, roles are clarified&#x2014;developers for model quality, institutions for safe deployment and oversight, clinicians for patient care&#x2014;supported by layered, risk-tiered consent, teach-back, and human-override options. From a policy perspective, we advocate standards that require transparency audits, continuous post-deployment monitoring (calibration, fairness, and decision latency), and context-aware reporting across demographic groups and sites. Aligning these pillars enables stakeholders to harness AI&#x2019;s benefits while reducing its risks, strengthening patient safety, clinical trust, and health equity.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="sec28">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref rid="SM1" ref-type="supplementary-material">Supplementary material</xref>, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="sec29">
<title>Author contributions</title>
<p>YL: Writing &#x2013; review &#x0026; editing, Writing &#x2013; original draft, Funding acquisition, Data curation, Conceptualization. XY: Conceptualization, Writing &#x2013; review &#x0026; editing, Writing &#x2013; original draft. JF: Conceptualization, Investigation, Writing &#x2013; review &#x0026; editing, Writing &#x2013; original draft. YY: Conceptualization, Writing &#x2013; review &#x0026; editing, Writing &#x2013; original draft, Data curation, Funding acquisition, Resources. CD: Writing &#x2013; review &#x0026; editing, Conceptualization, Writing &#x2013; original draft. JW: Project administration, Resources, Supervision, Validation, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec sec-type="funding-information" id="sec30">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research and/or publication of this article. This work was supported by the 2025 Shanxi Provincial Higher Education Science and Technology Innovation Program Projects (2025W089).</p>
</sec>
<sec sec-type="COI-statement" id="sec31">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="sec32">
<title>Generative AI statement</title>
<p>The authors declare that no Gen AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="sec33">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="sec34">
<title>Supplementary material</title>
<p>The Supplementary material for this article can be found online at: <ext-link xlink:href="https://www.frontiersin.org/articles/10.3389/fmed.2025.1594450/full#supplementary-material" ext-link-type="uri">https://www.frontiersin.org/articles/10.3389/fmed.2025.1594450/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Table_1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><label>1.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>McKinney</surname><given-names>SM</given-names></name> <name><surname>Sieniek</surname><given-names>M</given-names></name> <name><surname>Godbole</surname><given-names>V</given-names></name> <name><surname>Godwin</surname><given-names>J</given-names></name> <name><surname>Antropova</surname><given-names>N</given-names></name> <name><surname>Ashrafian</surname><given-names>H</given-names></name> <etal/></person-group>. <article-title>International evaluation of an AI system for breast cancer screening</article-title>. <source>Nature</source>. (<year>2020</year>) <volume>577</volume>:<fpage>89</fpage>&#x2013;<lpage>94</lpage>., PMID: <pub-id pub-id-type="pmid">31894144</pub-id></citation></ref>
<ref id="ref2"><label>2.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname><given-names>X</given-names></name> <name><surname>Faes</surname><given-names>L</given-names></name> <name><surname>Kale</surname><given-names>AU</given-names></name> <name><surname>Wagner</surname><given-names>SK</given-names></name> <name><surname>Fu</surname><given-names>DJ</given-names></name> <name><surname>Bruynseels</surname><given-names>A</given-names></name> <etal/></person-group>. <article-title>A comparison of deep learning performance against health-care professionals in detecting diseases from medical imaging: a systematic review and meta-analysis</article-title>. <source>Lancet Digit Health</source>. (<year>2019</year>) <volume>1</volume>:<fpage>e271</fpage>&#x2013;<lpage>97</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S2589-7500(19)30123-2</pub-id>, PMID: <pub-id pub-id-type="pmid">33323251</pub-id></citation></ref>
<ref id="ref3"><label>3.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tschandl</surname><given-names>P</given-names></name> <name><surname>Rinner</surname><given-names>C</given-names></name> <name><surname>Apalla</surname><given-names>Z</given-names></name> <name><surname>Argenziano</surname><given-names>G</given-names></name> <name><surname>Codella</surname><given-names>N</given-names></name> <name><surname>Halpern</surname><given-names>A</given-names></name> <etal/></person-group>. <article-title>Human&#x2013;computer collaboration for skin cancer recognition</article-title>. <source>Nat Med</source>. (<year>2020</year>) <volume>26</volume>:<fpage>1229</fpage>&#x2013;<lpage>34</lpage>., PMID: <pub-id pub-id-type="pmid">32572267</pub-id></citation></ref>
<ref id="ref4"><label>4.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gulshan</surname><given-names>V</given-names></name> <name><surname>Rajan</surname><given-names>RP</given-names></name> <name><surname>Widner</surname><given-names>K</given-names></name> <name><surname>Wu</surname><given-names>D</given-names></name> <name><surname>Wubbels</surname><given-names>P</given-names></name> <name><surname>Rhodes</surname><given-names>T</given-names></name> <etal/></person-group>. <article-title>Performance of a deep-learning algorithm vs manual grading for detecting diabetic retinopathy in India</article-title>. <source>JAMA Ophthalmol</source>. (<year>2019</year>) <volume>137</volume>:<fpage>987</fpage>&#x2013;<lpage>93</lpage>., PMID: <pub-id pub-id-type="pmid">31194246</pub-id></citation></ref>
<ref id="ref5"><label>5.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Esteva</surname><given-names>A</given-names></name> <name><surname>Chou</surname><given-names>K</given-names></name> <name><surname>Yeung</surname><given-names>S</given-names></name> <name><surname>Naik</surname><given-names>N</given-names></name> <name><surname>Madani</surname><given-names>A</given-names></name> <name><surname>Mottaghi</surname><given-names>A</given-names></name> <etal/></person-group>. <article-title>Deep learning-enabled medical computer vision</article-title>. <source>NPJ Digit Med</source>. (<year>2021</year>) <volume>4</volume>:<fpage>5</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41746-020-00376-2</pub-id>, PMID: <pub-id pub-id-type="pmid">33420381</pub-id></citation></ref>
<ref id="ref6"><label>6.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kelly</surname><given-names>CJ</given-names></name> <name><surname>Karthikesalingam</surname><given-names>A</given-names></name> <name><surname>Suleyman</surname><given-names>M</given-names></name> <name><surname>Corrado</surname><given-names>G</given-names></name> <name><surname>King</surname><given-names>D</given-names></name></person-group>. <article-title>Key challenges for delivering clinical impact with artificial intelligence</article-title>. <source>BMC Med</source>. (<year>2019</year>) <volume>17</volume>:<fpage>195</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s12916-019-1426-2</pub-id></citation></ref>
<ref id="ref7"><label>7.</label><citation citation-type="book"><person-group person-group-type="author"><collab id="coll1">World Health Organization</collab></person-group>. <source>Diagnostic error: technical series on safer primary care</source>. <publisher-loc>Geneva</publisher-loc>: <publisher-name>WHO Press</publisher-name> (<year>2016</year>).</citation></ref>
<ref id="ref8"><label>8.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zech</surname><given-names>JR</given-names></name> <name><surname>Badgeley</surname><given-names>MA</given-names></name> <name><surname>Liu</surname><given-names>M</given-names></name> <name><surname>Costa</surname><given-names>AB</given-names></name> <name><surname>Titano</surname><given-names>JJ</given-names></name> <name><surname>Oermann</surname><given-names>EK</given-names></name></person-group>. <article-title>Variable generalization performance of a deep learning model to detect pneumonia in chest radiographs: a cross-sectional study</article-title>. <source>PLoS Med</source>. (<year>2018</year>) <volume>15</volume>:<fpage>e1002683</fpage>. doi: <pub-id pub-id-type="doi">10.1371/journal.pmed.1002683</pub-id>, PMID: <pub-id pub-id-type="pmid">30399157</pub-id></citation></ref>
<ref id="ref9"><label>9.</label><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Oakden-Rayner</surname><given-names>L.</given-names></name> <name><surname>Dunnmon</surname><given-names>J.</given-names></name> <name><surname>Carneiro</surname><given-names>G.</given-names></name> <name><surname>R&#x00E9;</surname><given-names>C.</given-names></name></person-group> <article-title>Hidden stratification causes clinically meaningful failures in machine learning for medical imaging</article-title>. <conf-name>Proceedings of the ACM conference on health, inference, and learning</conf-name>. (<year>2020</year>): <fpage>151</fpage>&#x2013;<lpage>159</lpage>.</citation></ref>
<ref id="ref10"><label>10.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Newman-Toker</surname><given-names>DE</given-names></name> <name><surname>Schaffer</surname><given-names>AC</given-names></name> <name><surname>Yu-Moe</surname><given-names>CW</given-names></name> <name><surname>Nassery</surname><given-names>N</given-names></name> <name><surname>Saber Tehrani</surname><given-names>AS</given-names></name> <name><surname>Clemens</surname><given-names>GD</given-names></name> <etal/></person-group>. <article-title>Serious misdiagnosis-related harms in malpractice claims: the &#x201C;big three&#x201D;&#x2013;vascular events, infections, and cancers</article-title>. <source>Diagnosis</source>. (<year>2019</year>) <volume>6</volume>:<fpage>227</fpage>&#x2013;<lpage>40</lpage>. doi: <pub-id pub-id-type="doi">10.1515/dx-2019-0019</pub-id>, PMID: <pub-id pub-id-type="pmid">31535832</pub-id></citation></ref>
<ref id="ref11"><label>11.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Smith</surname><given-names>H</given-names></name> <name><surname>Fotheringham</surname><given-names>K</given-names></name></person-group>. <article-title>Artificial intelligence in clinical decision-making: rethinking liability</article-title>. <source>Med Law Int</source>. (<year>2020</year>) <volume>20</volume>:<fpage>131</fpage>&#x2013;<lpage>54</lpage>. doi: <pub-id pub-id-type="doi">10.1177/0968533220945766</pub-id></citation></ref>
<ref id="ref12"><label>12.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Obermeyer</surname><given-names>Z</given-names></name> <name><surname>Powers</surname><given-names>B</given-names></name> <name><surname>Vogeli</surname><given-names>C</given-names></name> <name><surname>Mullainathan</surname><given-names>S</given-names></name></person-group>. <article-title>Dissecting racial bias in an algorithm used to manage the health of populations</article-title>. <source>Science</source>. (<year>2019</year>) <volume>366</volume>:<fpage>447</fpage>&#x2013;<lpage>53</lpage>. doi: <pub-id pub-id-type="doi">10.1126/science.aax2342</pub-id>, PMID: <pub-id pub-id-type="pmid">31649194</pub-id></citation></ref>
<ref id="ref13"><label>13.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Adamson</surname><given-names>AS</given-names></name> <name><surname>Smith</surname><given-names>A</given-names></name></person-group>. <article-title>Machine learning and health care disparities in dermatology</article-title>. <source>JAMA Dermatol</source>. (<year>2018</year>) <volume>154</volume>:<fpage>1247</fpage>&#x2013;<lpage>8</lpage>. doi: <pub-id pub-id-type="doi">10.1001/jamadermatol.2018.2348</pub-id>, PMID: <pub-id pub-id-type="pmid">30073260</pub-id></citation></ref>
<ref id="ref14"><label>14.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ardila</surname><given-names>D</given-names></name> <name><surname>Kiraly</surname><given-names>AP</given-names></name> <name><surname>Bharadwaj</surname><given-names>S</given-names></name> <name><surname>Choi</surname><given-names>B</given-names></name> <name><surname>Reicher</surname><given-names>JJ</given-names></name> <name><surname>Peng</surname><given-names>L</given-names></name> <etal/></person-group>. <article-title>End-to-end lung cancer screening with three-dimensional deep learning on low-dose chest computed tomography</article-title>. <source>Nat Med</source>. (<year>2019</year>) <volume>25</volume>:<fpage>954</fpage>&#x2013;<lpage>61</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41591-019-0447-x</pub-id></citation></ref>
<ref id="ref15"><label>15.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lyell</surname><given-names>D</given-names></name> <name><surname>Coiera</surname><given-names>E</given-names></name></person-group>. <article-title>Automation bias and verification complexity: a systematic review</article-title>. <source>J Am Med Inform Assoc</source>. (<year>2017</year>) <volume>24</volume>:<fpage>423</fpage>&#x2013;<lpage>31</lpage>. doi: <pub-id pub-id-type="doi">10.1093/jamia/ocw105</pub-id>, PMID: <pub-id pub-id-type="pmid">27516495</pub-id></citation></ref>
<ref id="ref16"><label>16.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>De Fauw</surname><given-names>J</given-names></name> <name><surname>Ledsam</surname><given-names>JR</given-names></name> <name><surname>Romera-Paredes</surname><given-names>B</given-names></name> <name><surname>Nikolov</surname><given-names>S</given-names></name> <name><surname>Tomasev</surname><given-names>N</given-names></name> <name><surname>Blackwell</surname><given-names>S</given-names></name> <etal/></person-group>. <article-title>Clinically applicable deep learning for diagnosis and referral in retinal disease</article-title>. <source>Nat Med</source>. (<year>2018</year>) <volume>24</volume>:<fpage>1342</fpage>&#x2013;<lpage>50</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41591-018-0107-6</pub-id></citation></ref>
<ref id="ref17"><label>17.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rudin</surname><given-names>C</given-names></name></person-group>. <article-title>Stop explaining black box machine learning models for high stakes decisions and use interpretable models instead</article-title>. <source>Nat Mach Intell</source>. (<year>2019</year>) <volume>1</volume>:<fpage>206</fpage>&#x2013;<lpage>15</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s42256-019-0048-x</pub-id>, PMID: <pub-id pub-id-type="pmid">35603010</pub-id></citation></ref>
<ref id="ref18"><label>18.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sendak</surname><given-names>MP</given-names></name> <name><surname>D&#x2019;Arcy</surname><given-names>J</given-names></name> <name><surname>Kashyap</surname><given-names>S</given-names></name> <name><surname>Gao</surname><given-names>M</given-names></name> <name><surname>Nichols</surname><given-names>M</given-names></name> <name><surname>Corey</surname><given-names>K</given-names></name> <etal/></person-group>. <article-title>A path for translation of machine learning products into healthcare delivery</article-title>. <source>EMJ Innov</source>. (<year>2020</year>) <volume>10</volume>:<fpage>19-00172</fpage>. doi: <pub-id pub-id-type="doi">10.1002/ems3.1234</pub-id></citation></ref>
<ref id="ref19"><label>19.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Albahri</surname><given-names>AS</given-names></name> <name><surname>Duhaim</surname><given-names>AM</given-names></name> <name><surname>Fadhel</surname><given-names>MA</given-names></name> <name><surname>Alnoor</surname><given-names>A</given-names></name> <name><surname>Baqer</surname><given-names>NS</given-names></name> <name><surname>Alzubaidi</surname><given-names>L</given-names></name> <etal/></person-group>. <article-title>A systematic review of trustworthy and explainable artificial intelligence in healthcare: assessment of quality, bias risk, and data fusion</article-title>. <source>Inf Fusion</source>. (<year>2023</year>) <volume>96</volume>:<fpage>156</fpage>&#x2013;<lpage>91</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.inffus.2023.03.008</pub-id></citation></ref>
<ref id="ref20"><label>20.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Luz</surname><given-names>A</given-names></name> <name><surname>Ray</surname><given-names>D</given-names></name></person-group>. <source>AI-powered disease diagnosis: evaluating the effectiveness of machine learning algorithms</source>. <publisher-loc>Amsterdam, Netherlands</publisher-loc>: <publisher-name>Elsevier</publisher-name>. (<year>2024</year>).</citation></ref>
<ref id="ref21"><label>21.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Elemento</surname><given-names>O</given-names></name> <name><surname>Leslie</surname><given-names>C</given-names></name> <name><surname>Lundin</surname><given-names>J</given-names></name> <name><surname>Tourassi</surname><given-names>G</given-names></name></person-group>. <article-title>Artificial intelligence in cancer research, diagnosis and therapy</article-title>. <source>Nat Rev Cancer</source>. (<year>2021</year>) <volume>21</volume>:<fpage>747</fpage>&#x2013;<lpage>52</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41568-021-00399-1</pub-id></citation></ref>
<ref id="ref22"><label>22.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nguyen</surname><given-names>XV</given-names></name> <name><surname>Oztek</surname><given-names>MA</given-names></name> <name><surname>Nelakurti</surname><given-names>DD</given-names></name> <name><surname>Brunnquell</surname><given-names>CL</given-names></name> <name><surname>Mossa-Basha</surname><given-names>M</given-names></name> <name><surname>Haynor</surname><given-names>DR</given-names></name> <etal/></person-group>. <article-title>Applying artificial intelligence to mitigate effects of patient motion or other complicating factors on image quality</article-title>. <source>Top Magn Reson Imaging</source>. (<year>2020</year>) <volume>29</volume>:<fpage>175</fpage>&#x2013;<lpage>80</lpage>. doi: <pub-id pub-id-type="doi">10.1097/RMR.0000000000000249</pub-id>, PMID: <pub-id pub-id-type="pmid">32511198</pub-id></citation></ref>
<ref id="ref23"><label>23.</label><citation citation-type="book"><person-group person-group-type="author"><name><surname>Makanjee</surname><given-names>CR</given-names></name></person-group>. <article-title>Diagnostic medical imaging services with myriads of ethical dilemmas in a contemporary healthcare context: is artificial intelligence the solution?</article-title> In: <source>Medical imaging methods</source>. eds. Liu, J., Hines, D. and Zheng, Y. <publisher-loc>Advances in Diagnostic Imaging. Boca Raton, Florida, USA</publisher-loc>: <publisher-name>CRC Press</publisher-name> (<year>2021</year>). <fpage>1</fpage>&#x2013;<lpage>44</lpage>.</citation></ref>
<ref id="ref24"><label>24.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bashir</surname><given-names>A</given-names></name></person-group>. <article-title>AI-driven platforms for improving diagnostic accuracy in rare diseases: utilizing machine learning to identify and diagnose Underrecognized medical conditions</article-title>. <source>Hong Kong J AI Med</source>. (<year>2023</year>) <volume>3</volume>:<fpage>32</fpage>&#x2013;<lpage>52</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s00330-020-06672-5</pub-id></citation></ref>
<ref id="ref25"><label>25.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Adler-Milstein</surname><given-names>J</given-names></name> <name><surname>Aggarwal</surname><given-names>N</given-names></name> <name><surname>Ahmed</surname><given-names>M</given-names></name> <name><surname>Castner</surname><given-names>J</given-names></name> <name><surname>Evans</surname><given-names>BJ</given-names></name> <name><surname>Gonzalez</surname><given-names>AA</given-names></name> <etal/></person-group>. <article-title>Meeting the moment: addressing barriers and facilitating clinical adoption of artificial intelligence in medical diagnosis</article-title>. <source>NAM Perspect</source>. (<year>2022</year>) <volume>2022</volume>. doi: <pub-id pub-id-type="doi">10.1001/jamaophthalmol.2019.2004</pub-id></citation></ref>
<ref id="ref26"><label>26.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Brundage</surname><given-names>M</given-names></name> <name><surname>Avin</surname><given-names>S</given-names></name> <name><surname>Wang</surname><given-names>J</given-names></name> <name><surname>Belfield</surname><given-names>H</given-names></name> <name><surname>Krueger</surname><given-names>G</given-names></name> <name><surname>Hadfield</surname><given-names>G</given-names></name> <etal/></person-group>. <article-title>Toward trustworthy AI development: mechanisms for supporting verifiable claims</article-title>. <source>arXiv</source>. (<year>2020</year>). doi: <pub-id pub-id-type="doi">10.1038/s41591-020-0942-0</pub-id></citation></ref>
<ref id="ref27"><label>27.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Dignum</surname><given-names>V</given-names></name></person-group>. <article-title>Responsibility and artificial intelligence</article-title> In: <source>The oxford handbook of ethics of AI</source>, eds. Moor, J., Binns, R., and Dignum, V. vol. <volume>4698</volume> <publisher-loc>Oxford, United Kingdom</publisher-loc>: <publisher-name>Oxford University Press</publisher-name> (<year>2020</year>). <fpage>215</fpage>.</citation></ref>
<ref id="ref28"><label>28.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chinta</surname><given-names>SV</given-names></name> <name><surname>Wang</surname><given-names>Z</given-names></name> <name><surname>Zhang</surname><given-names>X</given-names></name> <name><surname>Viet</surname><given-names>TD</given-names></name> <name><surname>Kashif</surname><given-names>A</given-names></name> <name><surname>Smith</surname><given-names>MA</given-names></name> <etal/></person-group>. <article-title>Ai-driven healthcare: a survey on ensuring fairness and mitigating bias</article-title>. <source>arXiv</source>. (<year>2024</year>).</citation></ref>
<ref id="ref29"><label>29.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Berber</surname><given-names>A</given-names></name> <name><surname>Sre&#x0107;kovi&#x0107;</surname><given-names>S</given-names></name></person-group>. <article-title>When something goes wrong: who is responsible for errors in ML decision-making?</article-title> <source>AI &#x0026; Soc</source>. (<year>2024</year>) <volume>39</volume>:<fpage>1891</fpage>&#x2013;<lpage>903</lpage>.</citation></ref>
<ref id="ref30"><label>30.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Naik</surname><given-names>N</given-names></name> <name><surname>Hameed</surname><given-names>BMZ</given-names></name> <name><surname>Shetty</surname><given-names>DK</given-names></name> <name><surname>Swain</surname><given-names>D</given-names></name> <name><surname>Shah</surname><given-names>M</given-names></name> <name><surname>Paul</surname><given-names>R</given-names></name> <etal/></person-group>. <article-title>Legal and ethical consideration in artificial intelligence in healthcare: who takes responsibility?</article-title> <source>Front Surg</source>. (<year>2022</year>) <volume>9</volume>:<fpage>862322</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fsurg.2022.862322</pub-id>, PMID: <pub-id pub-id-type="pmid">35360424</pub-id></citation></ref>
<ref id="ref31"><label>31.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Habli</surname><given-names>I</given-names></name> <name><surname>Lawton</surname><given-names>T</given-names></name> <name><surname>Porter</surname><given-names>Z</given-names></name></person-group>. <article-title>Artificial intelligence in health care: accountability and safety</article-title>. <source>Bull World Health Organ</source>. (<year>2020</year>) <volume>98</volume>:<fpage>251</fpage>&#x2013;<lpage>6</lpage>. doi: <pub-id pub-id-type="doi">10.2471/BLT.19.237487</pub-id>, PMID: <pub-id pub-id-type="pmid">32284648</pub-id></citation></ref>
<ref id="ref32"><label>32.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Takshi</surname><given-names>S</given-names></name></person-group>. <article-title>Unexpected inequality: disparate-impact from artificial intelligence in healthcare decisions</article-title>. <source>JL &#x0026; Health</source>. (<year>2020</year>) <volume>34</volume>:<fpage>215</fpage>. doi: <pub-id pub-id-type="doi">10.1001/jama.2019.21237</pub-id></citation></ref>
<ref id="ref33"><label>33.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Timmons</surname><given-names>AC</given-names></name> <name><surname>Duong</surname><given-names>JB</given-names></name> <name><surname>Simo Fiallo</surname><given-names>N</given-names></name> <name><surname>Lee</surname><given-names>T</given-names></name> <name><surname>Vo</surname><given-names>HPQ</given-names></name> <name><surname>Ahle</surname><given-names>MW</given-names></name> <etal/></person-group>. <article-title>A call to action on assessing and mitigating bias in artificial intelligence applications for mental health</article-title>. <source>Perspect Psychol Sci</source>. (<year>2023</year>) <volume>18</volume>:<fpage>1062</fpage>&#x2013;<lpage>96</lpage>. doi: <pub-id pub-id-type="doi">10.1177/17456916221134490</pub-id>, PMID: <pub-id pub-id-type="pmid">36490369</pub-id></citation></ref>
<ref id="ref34"><label>34.</label><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Tsai</surname><given-names>C.H.</given-names></name> <name><surname>You</surname><given-names>Y.</given-names></name> <name><surname>Gui</surname><given-names>X.</given-names></name> <name><surname>Kou</surname><given-names>Y.</given-names></name> <name><surname>Carroll</surname><given-names>J.M.</given-names></name></person-group> <article-title>Exploring and promoting diagnostic transparency and explainability in online symptom checkers</article-title> <conf-name>Proceedings of the 2021 CHI Conference on Human Factors in Computing Systems</conf-name> <volume>2021</volume>: <fpage>1</fpage>&#x2013;<lpage>17</lpage>.</citation></ref>
<ref id="ref35"><label>35.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Recht</surname><given-names>MP</given-names></name> <name><surname>Dewey</surname><given-names>M</given-names></name> <name><surname>Dreyer</surname><given-names>K</given-names></name> <name><surname>Langlotz</surname><given-names>C</given-names></name> <name><surname>Niessen</surname><given-names>W</given-names></name> <name><surname>Prainsack</surname><given-names>B</given-names></name> <etal/></person-group>. <article-title>Integrating artificial intelligence into the clinical practice of radiology: challenges and recommendations</article-title>. <source>Eur Radiol</source>. (<year>2020</year>) <volume>30</volume>:<fpage>3576</fpage>&#x2013;<lpage>84</lpage>., PMID: <pub-id pub-id-type="pmid">32064565</pub-id></citation></ref>
<ref id="ref36"><label>36.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Aldughayfiq</surname><given-names>B</given-names></name> <name><surname>Ashfaq</surname><given-names>F</given-names></name> <name><surname>Jhanjhi</surname><given-names>NZ</given-names></name> <name><surname>Humayun</surname><given-names>M</given-names></name></person-group>. <article-title>Explainable AI for retinoblastoma diagnosis: interpreting deep learning models with LIME and SHAP</article-title>. <source>Diagnostics</source>. (<year>2023</year>) <volume>13</volume>:<fpage>1932</fpage>. doi: <pub-id pub-id-type="doi">10.3390/diagnostics13111932</pub-id>, PMID: <pub-id pub-id-type="pmid">37296784</pub-id></citation></ref>
<ref id="ref37"><label>37.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vimbi</surname><given-names>V</given-names></name> <name><surname>Shaffi</surname><given-names>N</given-names></name> <name><surname>Mahmud</surname><given-names>M</given-names></name></person-group>. <article-title>Interpreting artificial intelligence models: a systematic review on the application of LIME and SHAP in Alzheimer&#x2019;s disease detection</article-title>. <source>Brain Inform</source>. (<year>2024</year>) <volume>11</volume>:<fpage>10</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s40708-024-00222-1</pub-id>, PMID: <pub-id pub-id-type="pmid">38578524</pub-id></citation></ref>
<ref id="ref38"><label>38.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Salih</surname><given-names>AM</given-names></name> <name><surname>Raisi-Estabragh</surname><given-names>Z</given-names></name> <name><surname>Galazzo</surname><given-names>IB</given-names></name> <name><surname>Radeva</surname><given-names>P</given-names></name> <name><surname>Petersen</surname><given-names>SE</given-names></name> <name><surname>Lekadir</surname><given-names>K</given-names></name> <etal/></person-group>. <article-title>A perspective on explainable artificial intelligence methods: SHAP and LIME</article-title>. <source>Adv Intell Syst</source>. (<year>2025</year>) <volume>7</volume>:<fpage>2400304</fpage>. doi: <pub-id pub-id-type="doi">10.1002/aisy.202400304</pub-id></citation></ref>
<ref id="ref39"><label>39.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chaddad</surname><given-names>A</given-names></name> <name><surname>Peng</surname><given-names>J</given-names></name> <name><surname>Xu</surname><given-names>J</given-names></name> <name><surname>Bouridane</surname><given-names>A</given-names></name></person-group>. <article-title>Survey of explainable AI techniques in healthcare</article-title>. <source>Sensors (Basel)</source>. (<year>2023</year>) <volume>23</volume>:<fpage>634</fpage>. doi: <pub-id pub-id-type="doi">10.3390/s23020634</pub-id>, PMID: <pub-id pub-id-type="pmid">36679430</pub-id></citation></ref>
<ref id="ref40"><label>40.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Okada</surname><given-names>Y</given-names></name> <name><surname>Ning</surname><given-names>Y</given-names></name> <name><surname>Ong</surname><given-names>MEH</given-names></name></person-group>. <article-title>Explainable artificial intelligence in emergency medicine: an overview</article-title>. <source>Clin Exp Emerg Med</source>. (<year>2023</year>) <volume>10</volume>:<fpage>354</fpage>&#x2013;<lpage>62</lpage>. doi: <pub-id pub-id-type="doi">10.15441/ceem.23.145</pub-id>, PMID: <pub-id pub-id-type="pmid">38012816</pub-id></citation></ref>
<ref id="ref41"><label>41.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Esteva</surname><given-names>A</given-names></name> <name><surname>Kuprel</surname><given-names>B</given-names></name> <name><surname>Novoa</surname><given-names>RA</given-names></name> <name><surname>Ko</surname><given-names>J</given-names></name> <name><surname>Swetter</surname><given-names>SM</given-names></name> <name><surname>Blau</surname><given-names>HM</given-names></name> <etal/></person-group>. <article-title>Dermatologist-level classification of skin cancer with deep neural networks</article-title>. <source>Nature</source>. (<year>2017</year>) <volume>542</volume>:<fpage>115</fpage>&#x2013;<lpage>8</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nature21056</pub-id>, PMID: <pub-id pub-id-type="pmid">28117445</pub-id></citation></ref>
<ref id="ref42"><label>42.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hosny</surname><given-names>A</given-names></name> <name><surname>Parmar</surname><given-names>C</given-names></name> <name><surname>Quackenbush</surname><given-names>J</given-names></name> <name><surname>Schwartz</surname><given-names>LH</given-names></name> <name><surname>Aerts</surname><given-names>HJWL</given-names></name></person-group>. <article-title>Artificial intelligence in radiology</article-title>. <source>Nat Rev Cancer</source>. (<year>2018</year>) <volume>18</volume>:<fpage>500</fpage>&#x2013;<lpage>10</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41568-018-0016-5</pub-id>, PMID: <pub-id pub-id-type="pmid">29777175</pub-id></citation></ref>
<ref id="ref43"><label>43.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ting</surname><given-names>DS</given-names></name> <name><surname>Ting</surname><given-names>DSW</given-names></name> <name><surname>Cheung</surname><given-names>CY</given-names></name> <name><surname>Lim</surname><given-names>G</given-names></name> <name><surname>Tan</surname><given-names>GSW</given-names></name> <name><surname>Quang</surname><given-names>ND</given-names></name> <etal/></person-group>. <article-title>Development and validation of a deep learning system for diabetic retinopathy and related eye diseases using retinal images from multiethnic populations with diabetes</article-title>. <source>JAMA</source>. (<year>2017</year>) <volume>318</volume>:<fpage>2211</fpage>&#x2013;<lpage>23</lpage>. doi: <pub-id pub-id-type="doi">10.1001/jama.2017.18152</pub-id>, PMID: <pub-id pub-id-type="pmid">29234807</pub-id></citation></ref>
<ref id="ref44"><label>44.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gulshan</surname><given-names>V</given-names></name> <name><surname>Peng</surname><given-names>L</given-names></name> <name><surname>Coram</surname><given-names>M</given-names></name> <name><surname>Stumpe</surname><given-names>MC</given-names></name> <name><surname>Wu</surname><given-names>D</given-names></name> <name><surname>Narayanaswamy</surname><given-names>A</given-names></name> <etal/></person-group>. <article-title>Development and validation of a deep learning algorithm for detection of diabetic retinopathy in retinal fundus photographs</article-title>. <source>JAMA</source>. (<year>2016</year>) <volume>316</volume>:<fpage>2402</fpage>&#x2013;<lpage>10</lpage>. doi: <pub-id pub-id-type="doi">10.1001/jama.2016.17216</pub-id>, PMID: <pub-id pub-id-type="pmid">27898976</pub-id></citation></ref>
<ref id="ref45"><label>45.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hannun</surname><given-names>AY</given-names></name> <name><surname>Rajpurkar</surname><given-names>P</given-names></name> <name><surname>Haghpanahi</surname><given-names>M</given-names></name> <name><surname>Tison</surname><given-names>GH</given-names></name> <name><surname>Bourn</surname><given-names>C</given-names></name> <name><surname>Turakhia</surname><given-names>MP</given-names></name> <etal/></person-group>. <article-title>Cardiologist-level arrhythmia detection and classification in ambulatory electrocardiograms using a deep neural network</article-title>. <source>Nat Med</source>. (<year>2019</year>) <volume>25</volume>:<fpage>65</fpage>&#x2013;<lpage>9</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41586-019-1799-6</pub-id></citation></ref>
<ref id="ref46"><label>46.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rajpurkar</surname><given-names>P</given-names></name> <name><surname>Hannun</surname><given-names>AY</given-names></name> <name><surname>Haghpanahi</surname><given-names>M</given-names></name> <name><surname>Bourn</surname><given-names>C</given-names></name> <name><surname>Ng</surname><given-names>AY</given-names></name></person-group>. <article-title>Cardiologist-level arrhythmia detection with convolutional neural networks</article-title>. <source>Nat Med</source>. (<year>2019</year>) <volume>25</volume>:<fpage>65</fpage>&#x2013;<lpage>9</lpage>. doi: <pub-id pub-id-type="doi">10.5694/mja2.50821</pub-id></citation></ref>
<ref id="ref47"><label>47.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Campanella</surname><given-names>G</given-names></name> <name><surname>Hanna</surname><given-names>MG</given-names></name> <name><surname>Geneslaw</surname><given-names>L</given-names></name> <name><surname>Miraflor</surname><given-names>A</given-names></name> <name><surname>Werneck Krauss Silva</surname><given-names>V</given-names></name> <name><surname>Busam</surname><given-names>KJ</given-names></name> <etal/></person-group>. <article-title>Clinical-grade computational pathology using weakly supervised deep learning on whole slide images</article-title>. <source>Nat Med</source>. (<year>2019</year>) <volume>25</volume>:<fpage>1301</fpage>&#x2013;<lpage>9</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41591-019-0508-1</pub-id>, PMID: <pub-id pub-id-type="pmid">31308507</pub-id></citation></ref>
<ref id="ref48"><label>48.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Steiner</surname><given-names>DF</given-names></name> <name><surname>MacDonald</surname><given-names>R</given-names></name> <name><surname>Liu</surname><given-names>Y</given-names></name> <name><surname>Truszkowski</surname><given-names>P</given-names></name> <name><surname>Hipp</surname><given-names>JD</given-names></name> <name><surname>Gammage</surname><given-names>C</given-names></name> <etal/></person-group>. <article-title>Impact of deep learning assistance on the histopathologic review of lymph nodes for metastatic breast cancer</article-title>. <source>Am J Pathol</source>. (<year>2018</year>) <volume>188</volume>:<fpage>431</fpage>&#x2013;<lpage>8</lpage>. doi: <pub-id pub-id-type="doi">10.1097/PAS.0000000000001151</pub-id></citation></ref>
<ref id="ref49"><label>49.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rajpurkar</surname><given-names>P</given-names></name> <name><surname>Irvin</surname><given-names>J</given-names></name> <name><surname>Zhu</surname><given-names>K</given-names></name> <name><surname>Yang</surname><given-names>B</given-names></name> <name><surname>Mehta</surname><given-names>H</given-names></name> <name><surname>Duan</surname><given-names>T</given-names></name> <etal/></person-group>. <article-title>CheXNet: radiologist-level pneumonia detection on chest X-rays with deep learning</article-title>. <source>arXiv</source>. (<year>2017</year>) <volume>23</volume>:<fpage>1</fpage>&#x2013;<lpage>9</lpage>. doi: 10.1007/s00330-020-06672-5</citation></ref>
<ref id="ref50"><label>50.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Scott</surname><given-names>IA</given-names></name> <name><surname>Coiera</surname><given-names>EW</given-names></name></person-group>. <article-title>Can AI help in the fight against COVID-19?</article-title> <source>Med J Aust</source>. (<year>2020</year>) <volume>213</volume>:<fpage>439</fpage>&#x2013;<lpage>441.e2</lpage>.</citation></ref>
<ref id="ref51"><label>51.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yedavalli</surname><given-names>V. S.</given-names></name> <name><surname>Tong</surname><given-names>E</given-names></name> <name><surname>Martin</surname><given-names>D</given-names></name> <name><surname>Yeom</surname><given-names>K. W.</given-names></name> <name><surname>Forkert</surname><given-names>N. D</given-names></name></person-group>. <article-title>Artificial intelligence in stroke imaging: Current and future perspectives</article-title>. <source>Clin. Imaging</source>. (<year>2021</year>) <volume>69</volume>:<fpage>246</fpage>&#x2013;<lpage>254</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.clinimag.2020.09.005</pub-id></citation></ref>
<ref id="ref52"><label>52.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Stib</surname><given-names>MT</given-names></name> <name><surname>Menon</surname><given-names>BK</given-names></name> <name><surname>Dyer</surname><given-names>P</given-names></name> <name><surname>Fawzi</surname><given-names>A</given-names></name> <name><surname>Baker</surname><given-names>A</given-names></name> <name><surname>Gupta</surname><given-names>R</given-names></name> <etal/></person-group>. <article-title>Artificial intelligence in stroke imaging: current practices and emerging applications</article-title>. <source>Stroke</source>. (<year>2020</year>) <volume>51</volume>:<fpage>e249</fpage>&#x2013;<lpage>52</lpage>. doi: <pub-id pub-id-type="doi">10.1161/STROKEAHA.120.029199</pub-id></citation></ref>
<ref id="ref53"><label>53.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gichoya</surname><given-names>JW</given-names></name> <name><surname>Banerjee</surname><given-names>I</given-names></name> <name><surname>Bhimireddy</surname><given-names>AR</given-names></name> <name><surname>Burns</surname><given-names>JL</given-names></name> <name><surname>Celi</surname><given-names>LA</given-names></name> <name><surname>Chen</surname><given-names>LC</given-names></name> <etal/></person-group>. <article-title>AI recognition of patient race in medical imaging: a modelling study</article-title>. <source>Lancet Digit Health</source>. (<year>2022</year>) <volume>4</volume>:<fpage>e406</fpage>&#x2013;<lpage>14</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S2589-7500(22)00063-2</pub-id>, PMID: <pub-id pub-id-type="pmid">35568690</pub-id></citation></ref>
</ref-list>
</back>
</article>