<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Artif. Intell.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Artificial Intelligence</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Artif. Intell.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2624-8212</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frai.2026.1668029</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Designing effective explainable AI: a human-centered evaluation of explanation formats in financial decision-making</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Maathuis</surname> <given-names>Henry</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/2820679"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Stalenhoef</surname> <given-names>Marcel</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>van Otterloo</surname> <given-names>Sieuwert</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Zwaal</surname> <given-names>Raymond</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>van Montfort</surname> <given-names>Kees</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/3292811"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Sent</surname> <given-names>Danielle</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<uri xlink:href="https://loop.frontiersin.org/people/2815425"/>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Research Group Artificial Intelligence, HU University of Applied Sciences Utrecht</institution>, <city>Utrecht</city>, <country>Netherlands</country></aff>
<aff id="aff2"><label>2</label><institution>Jheronimus Academy of Data Science, Tilburg University, Eindhoven University of Technology</institution>, <city>&#x02018;s-Hertogenbosch</city>, <country>Netherlands</country></aff>
<aff id="aff3"><label>3</label><institution>Research Group Human Experience &#x00026; Media Design, HU University of Applied Sciences Utrecht</institution>, <city>Utrecht</city>, <country>Netherlands</country></aff>
<aff id="aff4"><label>4</label><institution>Center for Financial Innovation, Amsterdam University of Applied Sciences</institution>, <city>Amsterdam</city>, <country>Netherlands</country></aff>
<author-notes>
<corresp id="c001"><label>&#x0002A;</label>Correspondence: Henry Maathuis, <email xlink:href="mailto:henry.maathuis@hu.nl">henry.maathuis@hu.nl</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-03-05">
<day>05</day>
<month>03</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>9</volume>
<elocation-id>1668029</elocation-id>
<history>
<date date-type="received">
<day>17</day>
<month>07</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>15</day>
<month>12</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>30</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2026 Maathuis, Stalenhoef, van Otterloo, Zwaal, van Montfort and Sent.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Maathuis, Stalenhoef, van Otterloo, Zwaal, van Montfort and Sent</copyright-holder>
<license>
<ali:license_ref start_date="2026-03-05">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>As artificial intelligence (AI) systems are increasingly deployed in high-risk financial decision-making contexts, the demand for transparency and interpretability becomes critical. Explainable AI (XAI) has emerged as a key research domain addressing these needs. While most existing XAI studies emphasize objective quality measures such as correctness and completeness of explanations, they often overlook the role of end-user requirements and the broader ecosystem of stakeholders. This study presents a human-centered evaluation of different visual explanation designs in financial AI applications, assessing their effectiveness. A two-phase mixed-method evaluation was conducted, combining user studies with end-users and a stakeholder workshop, to rank visual prototypes across four explanation types: feature importance, counterfactuals, contrastive/similar examples, and rule-based explanations. A key finding is the divergence between end-users and other stakeholders&#x02014;including compliance officers, XAI consultants, and developers&#x02014;with end-users indicating a preference for concise, contextually visual explanations (e.g., small sets of decision rules or risk plots relative to similar cases), while other stakeholders often favor more complete, technically detailed representations. This highlights a critical trade-off between interpretability and completeness. This suggests that visual encoding choices may affect the effectiveness of AI explanations across different stakeholder groups.</p></abstract>
<kwd-group>
<kwd>explainable AI</kwd>
<kwd>explanation formats</kwd>
<kwd>finance</kwd>
<kwd>graphical design</kwd>
<kwd>human-centered evaluation</kwd>
</kwd-group>
<funding-group>
<award-group id="gs1">
<funding-source id="sp1">
<institution-wrap>
<institution>Nationaal Regieorgaan Praktijkgericht Onderzoek SIA</institution>
<institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open_funder_registry">10.13039/501100010409</institution-id>
</institution-wrap>
</funding-source>
</award-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This research was part of the FIN-X project, funded by the Dutch National Organisation for Practice-Oriented Research SIA (file number RAAK.MKB17.003).</funding-statement>
</funding-group>
<counts>
<fig-count count="1"/>
<table-count count="7"/>
<equation-count count="0"/>
<ref-count count="32"/>
<page-count count="14"/>
<word-count count="10830"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>AI in Finance</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>Artificial Intelligence (AI) has become an increasingly important component of modern decision-making systems, influencing a wide range of industries, including healthcare and finance. As AI-driven models and solutions grow more sophisticated, their underlying complexity has raised significant concerns around transparency, interpretability, and ultimately, the trustworthiness of AI-generated outcomes (<xref ref-type="bibr" rid="B29">Phillips-Wren, 2012</xref>; <xref ref-type="bibr" rid="B15">Hawley et al., 1990</xref>; <xref ref-type="bibr" rid="B2">Amann et al., 2020</xref>). In particular, black-box models&#x02014;characterized by opaque and non-intuitive decision processes&#x02014;pose serious challenges for regulators, businesses, and end-users seeking to understand, challenge, or justify algorithmic outcomes (<xref ref-type="bibr" rid="B14">Hassija et al., 2024</xref>).</p>
<p>These concerns have fueled the emergence of Explainable Artificial Intelligence (XAI), a growing research field dedicated to improving the interpretability of AI systems and ensuring that decisions are comprehensible, justifiable, and actionable (<xref ref-type="bibr" rid="B26">Miller, 2019</xref>; <xref ref-type="bibr" rid="B3">Arrieta et al., 2020</xref>). The importance of explainability is further reflected in ethical and regulatory frameworks such as the Ethics Guidelines for Trustworthy AI (<xref ref-type="bibr" rid="B16">High-Level Expert Group on Artificial Intelligence., 2019</xref>), which identify it as a key pillar for fostering transparency and accountability in AI systems.</p>
<p>Although XAI has made considerable technical progress, much of the research continues to emphasize explanation generation rather than its reception and practical utility in real-world contexts (<xref ref-type="bibr" rid="B27">Mohseni et al., 2021</xref>). The presence of an explanation alone does not ensure that users will understand or trust the underlying system. For explanations to be effective, they must align with the audience&#x00027;s cognitive abilities, expectations, and prior knowledge (<xref ref-type="bibr" rid="B6">Brennen, 2020</xref>; <xref ref-type="bibr" rid="B5">Bertrand et al., 2022</xref>; <xref ref-type="bibr" rid="B23">Liao et al., 2020</xref>). This requires not only accurate content but also careful attention to how explanations are presented. Modalities such as textual descriptions, visual illustrations, or auditory feedback each influence how users process information. Interactive elements, in particular, can serve as scaffolding mechanisms, helping users to explore, navigate, and interpret AI outputs in more meaningful ways (<xref ref-type="bibr" rid="B23">Liao et al., 2020</xref>).</p>
<p>Despite this, the design of explanations&#x02014;especially in terms of visual layout, modality, and interactivity&#x02014;remains an underexplored dimension in XAI research. While some studies have examined how presentation formats affect comprehension and trust, few have systematically investigated the role of design in shaping user engagement and experience. This is a critical omission, as design fundamentally influences whether explanations are not just available but actually usable. Research in human-computer interaction (HCI) suggests that well-designed explanations can improve trust, enhance decision-making accuracy, and increase user satisfaction (<xref ref-type="bibr" rid="B8">Ehsan et al., 2021</xref>; <xref ref-type="bibr" rid="B18">Kaur et al., 2020</xref>). Yet, in many XAI studies, design remains secondary to algorithmic transparency and technical performance (<xref ref-type="bibr" rid="B13">Gilpin et al., 2018</xref>).</p>
<p>The need for explainable AI is particularly acute in high-risk domains such as finance, where AI systems are increasingly used for credit scoring, risk evaluation, and fraud detection. In such applications, explainability is essential not only for regulatory compliance and auditability but also for ensuring fair, informed, and transparent decision-making. The proposed EU AI Act underscores this requirement by categorizing AI systems into risk tiers and imposing stringent transparency requirements on high-risk applications (<xref ref-type="bibr" rid="B9">European Commission, 2021</xref>). Furthermore, recent work emphasizes that effective explanations must be actionable, narratively coherent, and context-sensitive in order to support meaningful human-AI collaboration (<xref ref-type="bibr" rid="B20">Kim et al., 2024b</xref>). A large-scale study by the European Commission&#x00027;s Joint Research Centre further highlights the importance of human oversight, showing its role in mitigating discriminatory outcomes in AI-assisted decision-making (<xref ref-type="bibr" rid="B12">Gaudeul et al., 2025</xref>).</p>
<p>In the financial sector, the diversity of stakeholders&#x02014;including regulators, compliance officers, analysts, and end-users&#x02014;creates an additional challenge for explainability. Each group may have distinct informational needs, interpretive skills, and decision contexts. For instance, a financial analyst may require a detailed feature-level explanation, whereas a loan applicant may only need a clear, high-level rationale for the outcome (<xref ref-type="bibr" rid="B22">Langer et al., 2021</xref>). Current XAI methods often fall short of addressing this spectrum of needs, pointing to a critical gap in adaptive, stakeholder-specific explanation strategies (<xref ref-type="bibr" rid="B21">Kim et al., 2024c</xref>).</p>
<p>Moreover, the evaluation of XAI techniques is frequently limited to algorithmic metrics or controlled lab benchmarks rather than real-world user assessments (<xref ref-type="bibr" rid="B7">Doshi-Velez and Kim, 2017</xref>; <xref ref-type="bibr" rid="B25">Lipton, 2018</xref>). Studies on end-user perceptions reveal a recurring mismatch between what researchers define as &#x0201C;explainable&#x0201D; and what users actually find understandable or useful (<xref ref-type="bibr" rid="B24">Liao et al., 2022</xref>). A recent systematic review by <xref ref-type="bibr" rid="B4">Belle and Papantonis (2021)</xref> echoes these concerns, calling for more empirical user-centered research. Additionally, psychological and cognitive factors that shape how users engage with explanations remain insufficiently addressed in current evaluation frameworks (<xref ref-type="bibr" rid="B1">Ali et al., 2023</xref>).</p>
<p>To address these limitations, our study conducts a human-centered evaluation of visual presentation formats for communicating explainable AI (XAI) outputs in the financial domain, framed as an exploratory analysis due to the small sample size. We adopt a model-agnostic approach, focusing not on generating explanations from specific algorithms but on how different types of pre-defined explanations&#x02014;such as feature importance or counterfactuals&#x02014;can be effectively presented and understood. Our investigation centers on two real-world use cases: (1) business credit provision and (2) automotive insurance claim fraud assessment. Since all participating end-users already work directly with these AI-supported decision processes, they possess an inherent understanding of the underlying use case, decision task, and role of the AI system. The visualizations and explanation designs were therefore aligned with their existing domain knowledge and everyday workflows. End-users evaluated explanations on explicitly given dimensions such as usefulness, clarity, and actionability, which reflect meaningful engagement with the context rather than mere visual appeal, and generally rated the designs positively, suggesting they supported understanding and decision-making within their specific workflows (<xref ref-type="bibr" rid="B20">Kim et al., 2024b</xref>).</p>
<p>Through this research, we aim to generate actionable insights for AI developers, financial institutions, and policymakers working toward more transparent, trustworthy, and user-centered AI systems. By bridging the persistent gap between technical explainability and real-world usability&#x02014;particularly through a focus on visual and interaction design&#x02014; we seek to advance the development of AI systems that are not only interpretable, but also intelligible and impactful in practice.</p></sec>
<sec sec-type="materials|methods" id="s2">
<label>2</label>
<title>Materials and methods</title>
<p>In this study, we design and evaluate the effectiveness of various designs of explanations for different explanation types. To achieve this, we first distill a set of requirements that a meaningful explanation must meet based on existing literature, studying two real-world use-cases and studying rules and legislation; specifically, the considerations mentioned in the AI Act and GDPR (<xref ref-type="bibr" rid="B10">European Commission, 2024</xref>; <xref ref-type="bibr" rid="B11">European Union, 2016</xref>). We further elicit user needs through interviews with end-users from both use cases. Based on these requirements and insights, we iteratively developed and refined the explanation designs in close alignment with end-users and use-case owners. The final designs are provided in the <xref ref-type="supplementary-material" rid="SM1">Supplementary material</xref>.</p>
<p>Afterwards, we employ a two-phase evaluation process to assess the effectiveness of different visualization approaches for various explanation types. The first phase consists of user studies with end-users from our use-case partners, where we collect insights on user preferences and evaluate them on human-centered criteria (<xref ref-type="bibr" rid="B19">Kim et al., 2024a</xref>) through questionnaires. This feedback is then used to make small refinements to the explanation designs.</p>
<p>In the second phase, the evaluation extends to a wider range of stakeholders, including consultancy partners, compliance officers, and other professionals involved in XAI systems. This phase is conducted through a workshop, where participants rank the prototypes using the 100-dollar method for each explanation format, followed by qualitative discussions on the highest-ranked designs. By incorporating perspectives beyond end-users, this approach aims to keep the explanations both user-centric and aligned with broader stakeholder needs.</p>
<p>Therefore, the effectiveness of these designs is evaluated through mixed-method use-case studies with various stakeholders.</p>
<sec>
<label>2.1</label>
<title>Law-based considerations</title>
<p>When designing high-risk AI systems, it is essential to consider legal requirements related to transparency and explainability. In the European context, key regulations such as the Artificial Intelligence Regulation (AI Act) (<xref ref-type="bibr" rid="B10">European Commission, 2024</xref>) and the General Data Protection Regulation (GDPR) (<xref ref-type="bibr" rid="B11">European Union, 2016</xref>) apply. Organizations must comply with both frameworks when deploying AI systems that affect individuals or process personal data.</p>
<p>The AI Act, which formally came into effect on August 1, 2024, establishes explicit obligations for high-risk AI systems. Section 86 and Recital 171 of the AI Act, specify that individuals subject to significant decisions based primarily on AI outputs are entitled to a clear and meaningful explanation, sufficient to understand the basis of the decision and exercise their rights. For instance, this applies to financial customers whose transactions are reviewed by an AI system. Recital 73 further requires that AI systems be designed to enable human operators, such as employees of a financial institution, to oversee the system&#x00027;s functioning, ensure correct usage, and mitigate adverse impacts. Providers are obliged to implement operational constraints and assign oversight responsibilities to competent personnel with the necessary training and authority, and to ensure that users possess sufficient literacy to understand explanations necessary for monitoring and intervention.</p>
<p>The GDPR complements the AI Act by regulating automated decision-making that involves personal data. Article 14 requires controllers to provide data subjects with meaningful information about the logic of automated decisions, their significance, and consequences, particularly when personal data were not obtained directly from the data subject. In financial contexts, this typically applies to bank customers, whereas the user of the system is the bank employee who first receives the explanation and may subsequently communicate it to the customer.</p>
<p>Legal requirements distinguish between global and local explanations. Global explanations provide insight into the AI system as a whole, including its architecture, training data, modeling techniques, testing procedures, and instructions for use, and can be embedded in help screens, manuals, or other accessible documentation. Local explanations, in contrast, pertain to individual decisions and describe specific inputs, intermediate factors or sub-scores, and the rationale behind a particular output. For example, a local explanation might indicate why a particular insurance claim was denied, referencing relevant features such as payment history or other criteria used by the AI system.</p>
<p>In this study, we focus on the requirements for local explanations. While global explanations remain important for user orientation, the design challenges and regulatory obligations are most pronounced in local explanations, which directly support decision-making and compliance. These legal considerations, together with insights from existing XAI literature, form the foundation for a comprehensive set of explanation requirements that can guide the design of practical, user-centered AI explanations as discussed in Section 2.2.</p>
</sec>
<sec>
<label>2.2</label>
<title>Consolidated set of explanation requirements</title>
<p>A comprehensive set of requirements was established by reviewing the literature on Explainable AI (XAI) (<xref ref-type="bibr" rid="B19">Kim et al., 2024a</xref>) and analyzing relevant legal frameworks, such as the GDPR and EU AI Regulation as described above. This theoretical foundation was further refined through expert workshops involving a total of seven academic and industry professionals, including XAI consultants, data scientists/developers of predictive financial models, and representatives from other financial bodies. This diverse group ensured that the requirements were both conceptually robust and practically relevant for financial applications. In the workshops, each requirement was assigned a score based on its priority, using a star-rating system in which a requirement could receive <italic>zero, one</italic>, or <italic>two</italic> stars. Each expert assigned their own rating, and the final score for a requirement was calculated as the average of all expert ratings. This averaging process produced fractional scores (e.g., 1.8), which represent the mean importance across experts. The star ratings were used to determine the prioritization of the requirements:</p>
<list list-type="bullet">
<list-item><p>A requirement with more stars than another indicates a higher priority.</p></list-item>
<list-item><p>There are no restrictions on the number of requirements receiving two stars, meaning multiple requirements can be rated with the highest priority.</p></list-item>
</list>
<p>This ranked set of requirements was compiled into a list after the workshop and sorted by their importance. This final validated set of requirements is used as input for designing explanations for AI systems and is listed in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Final ranked requirements for local explanations.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left"><bold>Rank</bold></th>
<th valign="top" align="left"><bold>Requirement</bold></th>
<th valign="top" align="center"><bold>Score</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="left">Helps people understand how to change a decision (e.g., from reject to accept).</td>
<td valign="top" align="center">1.8</td>
</tr>
<tr>
<td valign="top" align="left">2</td>
<td valign="top" align="left">Encourages users to take action (e.g., look up or verify information).</td>
<td valign="top" align="center">1.7</td>
</tr>
<tr>
<td valign="top" align="left">3</td>
<td valign="top" align="left">Helps users recognize risks of bias, profiling, and discrimination.</td>
<td valign="top" align="center">1.7</td>
</tr>
<tr>
<td valign="top" align="left">4</td>
<td valign="top" align="left">Serves as a means for users to provide feedback to the model.</td>
<td valign="top" align="center">1.7</td>
</tr>
<tr>
<td valign="top" align="left">5</td>
<td valign="top" align="left">Enables users to explain the decision to the affected person.</td>
<td valign="top" align="center">1.5</td>
</tr>
<tr>
<td valign="top" align="left">6</td>
<td valign="top" align="left">Reduces over-reliance on AI outputs (prevents automation bias).</td>
<td valign="top" align="center">1.5</td>
</tr>
<tr>
<td valign="top" align="left">7</td>
<td valign="top" align="left">Supports oversight by helping users determine when not to use a high-risk AI system.</td>
<td valign="top" align="center">1.5</td>
</tr>
<tr>
<td valign="top" align="left">8</td>
<td valign="top" align="left">Must be compact and concise.</td>
<td valign="top" align="center">1.3</td>
</tr>
<tr>
<td valign="top" align="left">9</td>
<td valign="top" align="left">Increases user confidence in decision-making.</td>
<td valign="top" align="center">1.3</td>
</tr>
<tr>
<td valign="top" align="left">10</td>
<td valign="top" align="left">Helps detect anomalies, dysfunctions, and unexpected performance.</td>
<td valign="top" align="center">1.3</td>
</tr>
<tr>
<td valign="top" align="left">11</td>
<td valign="top" align="left">Provides interactivity, allowing users to engage with the explanation.</td>
<td valign="top" align="center">1.3</td>
</tr></tbody>
</table>
</table-wrap>
<p>These requirements were used to inform the development of the visual prototypes, guiding the design of different visual explanation formats tailored to specific explanation types. Section 2.3 describes the user needs related to the investigated use-cases. In Section 2.4, we list general design considerations that were taken into account when designing the explanation formats.</p>
</sec>
<sec>
<label>2.3</label>
<title>User needs</title>
<p>To ensure that the explanation requirements were grounded in actual practice, we implemented a two-stage process to capture detailed use-case contexts and user needs.</p>
<p>In the first stage, use-case partners completed a structured use-case form that documented key aspects of their decision-support systems. This form gathered information on: (1) a comprehensive description of the use-case, (2) the types of decision-support systems in use (e.g., AI or rule-based systems), including specific techniques (e.g. logistic regression, XGBoost, rule-based methods, neural networks), (3) the operational history, function, and data flow (inputs/outputs) of these systems, (4) the processes supported by the systems and the steps involved, and (5) the stakeholders and their respective roles (e.g., customers, regulators, auditors). The specific use-case form can be found in the <xref ref-type="supplementary-material" rid="SM1">Supplementary material</xref>.</p>
<p>In the second stage, we conducted semi-structured interviews with end-users of these systems. The interview protocol began with the administration of an informed consent procedure and the collection of demographic data (including main role, tenure, and which role, if any, they had in the implementation of the system). Participants were then asked to describe in detail the decision-making process they follow when using the system, with prompts to explain the specific steps, challenges encountered, and the support provided by the system&#x00027;s outputs. Particular emphasis was placed on the nature of any explanations received: how outcomes are presented, the content and technical underpinnings of the explanations, and how the explanation is (visually) communicated. The respondents were also asked to indicate their needs and suggestions for improving the explanations as part of their decision support systems. The interview protocol is attached in the <xref ref-type="supplementary-material" rid="SM1">Supplementary material</xref>.</p>
<p>The data obtained from these interviews were analyzed and organized into user stories that were then integrated into the iterative development of visual prototypes for AI explanations. This approach ensured that the prototypes were aligned with both the technical requirements and the real-world challenges faced by users in the financial sector.</p>
</sec>
<sec>
<label>2.4</label>
<title>Prototype development</title>
<p>Drawing on insights from the literature, completed use-case forms, and user interviews, different visual prototypes were developed to illustrate the AI decision-making process. To ensure that the designs were meaningful, functional requirements were first derived from the interviews using user stories structured as:</p>
<disp-quote><p><italic>As a [user role], I want to [goal], so that [desired outcome]</italic>.</p></disp-quote>
<p>In response to these user stories, concrete functional requirements were specified to guide the design and implementation of the explanation interfaces. <xref ref-type="table" rid="T2">Table 2</xref> presents the main user stories identified across both use-cases, their implications for interface design, and the resulting functional requirements. These requirements served as a design blueprint for all subsequent prototypes and ensured that each explanation type addressed a clearly articulated user need.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>User stories and derived functional requirements.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left"><bold>User story</bold></th>
<th valign="top" align="left"><bold>Design impact</bold></th>
<th valign="top" align="left"><bold>Functional requirements</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">As a user, I want to see which features contributed most to the decision, so that I can understand the reasoning behind the outcome.</td>
<td valign="top" align="left">Feature importance visualization (e.g., bar chart)</td>
<td valign="top" align="left">Display the top five influential features; apply color coding for positive and negative contributions.</td>
</tr>
<tr>
<td valign="top" align="left">As a user, I want to view similar past cases and their outcomes, so that I can compare and validate the current decision.</td>
<td valign="top" align="left">Similar/contrastive cases examples</td>
<td valign="top" align="left">Show three to six similar cases; include both matching and contrasting outcomes.</td>
</tr>
<tr>
<td valign="top" align="left">As a user, I want to explore hypothetical changes to inputs, so that I can see what adjustments would lead to a different outcome.</td>
<td valign="top" align="left">Counterfactual explanations</td>
<td valign="top" align="left">Present original and modified outcomes side by side; suggest alternative values that would lead to acceptance.</td>
</tr>
<tr>
<td valign="top" align="left">As a user, I want to see which decision rules were applied and their impact, so that I can understand the logic behind the decision.</td>
<td valign="top" align="left">Rule-based explanations</td>
<td valign="top" align="left">List applied rules with their outcomes; highlight thresholds and actual values; allow users to expand rules for additional details.</td>
</tr>
<tr>
<td valign="top" align="left">As a user, I want a clear visual summary of the decision and explanation, so that I can easily communicate it to stakeholders.</td>
<td valign="top" align="left">Summary panel</td>
<td valign="top" align="left">Display decision score and risk category; apply concise explanatory text; use color cues to support fast interpretation.</td>
</tr>
<tr>
<td valign="top" align="left">As a user, I want the interface to clearly differentiate between neutral, uncertain, and high-risk outcomes, so that I can interpret the decision correctly.</td>
<td valign="top" align="left">Three-tier risk categorization</td>
<td valign="top" align="left">Provide consistent visual representations for neutral, uncertain, and high-risk outcomes; include explicit labels or legends; avoid implicit positive/negative outcome framing.</td>
</tr>
<tr>
<td valign="top" align="left">As a user, I want neutral indicators for non-risk values, so that I don&#x00027;t assume the outcome is automatically positive.</td>
<td valign="top" align="left">Semantically neutral visual encoding</td>
<td valign="top" align="left">Use neutral visual styles for non-risk values; clearly distinguish neutral, warning, and high-risk states; avoid color cues, such as green, implying approval.</td>
</tr></tbody>
</table>
</table-wrap>
<p>For each use-case, designs were created for four explanation types: counterfactual explanations, feature importance explanations, similar/contrastive examples, and rule-based explanations. While most explanation types were represented by five designs each, the similar/contrastive examples category included only four designs for the business loan use-case and five for the insurance fraud use-case. The designs were largely consistent across use-cases but adapted to reflect the respective domain context. This resulted in a total of 20 prototypes for the business loan use-case and 19 for the car insurance fraud use-case, with the discrepancy due to a small mistake in the experimental setup. This is reflected with the dashes in the Similar/Contrastive Cases portion of <xref ref-type="table" rid="T3">Table 3</xref>.</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Ranking of prototypes across different explanation methods.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="center" colspan="6"><bold>Feature importance</bold></th>
<th valign="top" align="center" colspan="6"><bold>Counterfactuals</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td/>
<td valign="top" align="center">A</td>
<td valign="top" align="center">B</td>
<td valign="top" align="center">C</td>
<td valign="top" align="center">D</td>
<td valign="top" align="center">E</td>
<td/>
<td valign="top" align="center">A</td>
<td valign="top" align="center">B</td>
<td valign="top" align="center">C</td>
<td valign="top" align="center">D</td>
<td valign="top" align="center">E</td>
</tr>
<tr>
<td valign="top" align="left">Participant 1.1</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">Participant 1.1</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">Participant 1.2</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">5</td>
<td valign="top" align="left">Participant 1.2</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">Participant 1.3</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">5</td>
<td valign="top" align="left">Participant 1.3</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">4</td>
</tr>
<tr>
<td valign="top" align="left">Total score</td>
<td valign="top" align="center">10</td>
<td valign="top" align="center">8</td>
<td valign="top" align="center">11</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4</td>
<td valign="top" align="center">12</td>
<td valign="top" align="left">Total score</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">7</td>
<td valign="top" align="center">15</td>
<td valign="top" align="center">8</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">7</td>
<td valign="top" align="center">8</td>
</tr>
<tr>
<td valign="top" align="left">Participant 2.1</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">5</td>
<td valign="top" align="left">Participant 2.1</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">Participant 2.2</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">4</td>
<td valign="top" align="left">Participant 2.2</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">1</td>
</tr>
<tr>
<td valign="top" align="left">Participant 2.3</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">4</td>
<td valign="top" align="left">Participant 2.3</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">Total score</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">5</td>
<td valign="top" align="center">14</td>
<td valign="top" align="center">7</td>
<td valign="top" align="center">6</td>
<td valign="top" align="center">13</td>
<td valign="top" align="left">Total score</td>
<td valign="top" align="center">11</td>
<td valign="top" align="center">12</td>
<td valign="top" align="center">13</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4</td>
<td valign="top" align="center">5</td>
</tr>
<tr>
<td valign="top" align="center" colspan="6"><bold>(a)</bold></td>
<td valign="top" align="center" colspan="6"><bold>(b)</bold></td>
</tr>
<tr>
<td valign="top" align="center" colspan="6"><bold>Rule-based explanations</bold></td>
<td valign="top" align="center" colspan="6"><bold>Similar/contrastive cases</bold></td>
</tr>
<tr>
<td/>
<td valign="top" align="center">A</td>
<td valign="top" align="center">B</td>
<td valign="top" align="center">C</td>
<td valign="top" align="center">D</td>
<td valign="top" align="center">E</td>
<td/>
<td valign="top" align="center">A</td>
<td valign="top" align="center">B</td>
<td valign="top" align="center">C</td>
<td valign="top" align="center">D</td>
<td valign="top" align="center">E</td>
</tr>
<tr>
<td valign="top" align="left">Participant 1.1</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">2</td>
<td valign="top" align="left">Participant 1.1</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">&#x02013;</td>
</tr>
<tr>
<td valign="top" align="left">Participant 1.2</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">3</td>
<td valign="top" align="left">Participant 1.2</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">&#x02013;</td>
</tr>
<tr>
<td valign="top" align="left">Participant 1.3</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">1</td>
<td valign="top" align="left">Participant 1.3</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">&#x02013;</td>
</tr>
<tr>
<td valign="top" align="left">Total score</td>
<td valign="top" align="center">12</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">6</td>
<td valign="top" align="center">12</td>
<td valign="top" align="center">9</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">6</td>
<td valign="top" align="left">Total score</td>
<td valign="top" align="center">10</td>
<td valign="top" align="center">8</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">5</td>
<td valign="top" align="center">7</td>
<td valign="top" align="center">&#x02013;</td>
</tr>
<tr>
<td valign="top" align="left">Participant 2.1</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">3</td>
<td valign="top" align="left">Participant 2.1</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">Participant 2.2</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">2</td>
<td valign="top" align="left">Participant 2.2</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">1</td>
</tr>
<tr>
<td valign="top" align="left">Participant 2.3</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">3</td>
<td valign="top" align="left">Participant 2.3</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">1</td>
</tr>
<tr>
<td valign="top" align="left">Total score</td>
<td valign="top" align="center">6</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4</td>
<td valign="top" align="center">15</td>
<td valign="top" align="center">12</td>
<td valign="top" align="center">8</td>
<td valign="top" align="left">Total score</td>
<td valign="top" align="center">13</td>
<td valign="top" align="center">10</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">8</td>
<td valign="top" align="center">10</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4</td>
</tr>
<tr>
<td valign="top" align="center" colspan="6"><bold>(c)</bold></td>
<td valign="top" align="center" colspan="6"><bold>(d)</bold></td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>Each quadrant shows Participant scores and total scores for an explanation method. Gray highlights indicate lowest total scores representing the best ranked prototypes.</p>
</table-wrap-foot>
</table-wrap>
<p>The design decisions were guided by well-established principles from cognitive science and human-computer interaction (HCI). From Cognitive Load Theory (<xref ref-type="bibr" rid="B30">Sweller, 1988</xref>), we applied two concrete principles. First, reducing extraneous cognitive load by removing non-essential visual elements and avoiding dense text blocks. Second, structuring intrinsic load by chunking related information into grouped sections within each explanation. These decisions aimed to support users in processing model outputs without being overloaded. Recent work highlighting the effects of presentation order and morphological clarity on cognitive load, trust, and confidence in AI systems further supports these choices (<xref ref-type="bibr" rid="B17">Hudon et al., 2021</xref>). Selective Attention Theory (<xref ref-type="bibr" rid="B32">Treisman and Gelade, 1980</xref>) guided our use of color, contrast, and spatial positioning to draw the viewer&#x00027;s attention to key explanatory elements&#x02014;such as the primary feature contributions, decision rationale, or risk indicators&#x02014;while de-emphasizing less important information.</p>
<p>Designing user interfaces depends not only on defined requirements but also on the implicit expertise, intuition, and aesthetic judgment of the graphic designer. These factors shape critical aspects such as layout, visual hierarchy, color schemes, and typographic choices, which are elements that can influence the interpretability and usability of interface explanations (<xref ref-type="bibr" rid="B31">Tidwell, 2010</xref>; <xref ref-type="bibr" rid="B28">Norman, 2013</xref>). To support transparency and reproducibility, all design prototypes developed in this study are included for reference. All 20 prototypes developed for the car-insurance fraud use case are available in the <xref ref-type="supplementary-material" rid="SM1">Supplementary material</xref>; <xref ref-type="fig" rid="F1">Figure 1</xref> illustrates a representative contrastive/similar example design.</p>
<fig position="float" id="F1">
<label>Figure 1</label>
<caption><p>Contrastive/similar examples design A for the insurance fraud use-case.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1668029-g0001.tif">
<alt-text content-type="machine-generated">Infographic compares car theft insurance cases using a horizontal risk score bar colored blue, orange, and red. Four cases are detailed: Case C (approved, low risk, Honda, Rotterdam Harbor, score 65); Case B (rejected, medium risk, Audi, Amsterdam Centre, score 98); Case A (current, high risk, Audi, Rotterdam Harbor, score 126); Case D (rejected, high risk, Audi, Amsterdam Centre, score 128). Case A is highlighted in red. Total score is one hundred twenty-six with a high risk level indicated.</alt-text>
</graphic>
</fig>
</sec>
<sec>
<label>2.5</label>
<title>Evaluating the visual designs with end-users</title>
<p>The evaluation of these prototypes with end-users was carried out through an exploratory quantitative approach. For each of the two use cases, three end-users (n = 6 total) completed all unique pairwise comparisons for each explanation type. Each explanation type included five different visualizations, resulting in 10 unique pairwise comparisons per explanation type per participant. From these selections, we derived a preference ranking from most to least preferred visualization per explanation type, with the design chosen most frequently across pairings representing the participant&#x00027;s top preference.</p>
<p>In the second stage, the end-users completed a 5-points Likert scale questionnaire assessing the prototypes based on 11 user-centered explanation criteria: <italic>understanding, ease of understanding, ease of use, satisfaction, usefulness, typicality, sufficiency, subjective correctness, compactness</italic>, and <italic>actionability</italic>. These criteria were distilled from prior research (<xref ref-type="bibr" rid="B19">Kim et al., 2024a</xref>). The complete questionnaire is attached in the <xref ref-type="supplementary-material" rid="SM1">Supplementary material</xref>.</p>
<p>The quantitative data, consisting of prototype selection rankings and questionnaire ratings, were analyzed to determine the most preferred prototype for each explanation type and to identify the key criteria influencing user preferences. Participants rated each prototype based on the 11 predefined criteria.</p>
<p>To assess the relationship between these criteria and prototype preference, we computed Spearman&#x00027;s rank correlation coefficient (&#x003C1;) between the binary preference variable (most preferred prototype per explanation type = 1, others = 0) and the corresponding questionnaire ratings. Spearman&#x00027;s &#x003C1; measures the strength and direction of a monotonic relationship between ordinal variables, making it appropriate for ranking-based data.</p>
<p>The exploratory analysis was conducted across two use-cases. In the credit loan use-case, participants evaluated 19 prototypes, resulting in a dataset of 3 &#x000D7; 11 &#x000D7; 19 &#x0003D; 627 observations, where each row represents a prototype evaluation based on the 11 criteria. In the insurance claim use-case, 20 prototypes were assessed, yielding 3 &#x000D7; 11 &#x000D7; 20 &#x0003D; 660 observations. Participants were not compensated for their participation.</p>
<p>Spearman&#x00027;s correlation was computed for each criterion to determine its predictive value in relation to prototype preference. Higher absolute values of &#x003C1; indicate a stronger relationship between a given criterion and user preference. The results are listed in Section 3.1.3.</p>
</sec>
<sec>
<label>2.6</label>
<title>Workshop to assess the generality of the visual designs</title>
<p>To complement the user studies, we conducted a workshop involving key stakeholders with expertise in explainable AI (XAI) systems. The workshop attracted four participants including two representatives of consultancy firms building explainable AI solutions for the financial sector, a compliance officer, and a developer/owner of XAI use-case systems. This broader group of stakeholders provided insights beyond end-user preferences, ensuring that the evaluation captured regulatory, practical, and implementation considerations relevant to XAI explanation design. Participants were not compensated for their participation.</p>
<p>The workshop followed a structured two-step mixed-method approach. In the first step, participants evaluated the same set of explanation designs as presented to the end-users, consisting of five designs for each of the four explanation types: feature importance, counterfactuals, similar/contrastive examples, and rule-based explanations. The prototypes used in Phase 2 were identical to those in Phase 1; no refinements were introduced between phases. This decision was intentional, as it ensured that differences in feedback or preferences could be attributed to stakeholder perspectives rather than changes in the materials, thereby enabling a more direct comparison across phases. The 100-dollar method was employed for this evaluation, requiring participants to allocate a virtual budget of 100 dollars across the designs within each explanation type. This forced-choice allocation method provides a quantitative measure of preference by revealing the relative value participants assign to different designs.</p>
<p>In the second step, the top three designs for each explanation type, as determined by the 100-dollar method, were subjected to a qualitative analysis. Participants provided feedback on the aspects of these top-ranked designs that contributed to their usability, clarity, and effectiveness. Additionally, they were asked to suggest modifications that could enhance the generality and adaptability of the designs across different XAI contexts and use-cases oriented on tabular data. This qualitative feedback aimed to ensure that the explanation visualizations were not only effective in specific scenarios but also applicable to a broader range of stakeholders and systems.</p></sec>
</sec>
<sec sec-type="results" id="s3">
<label>3</label>
<title>Results</title>
<sec>
<label>3.1</label>
<title>User-result studies</title>
<p>This section presents the findings from the quantitative user studies, detailing the assessment of XAI prototypes by end-users on explanation design.</p>
<sec>
<label>3.1.1</label>
<title>Evaluation with end-users</title>
<p>To evaluate the XAI prototypes, two companies participated: a business loan provider and an insurance company. Each company had three participants involved in the evaluation. At the loan provider, underwriters assessed the prototypes, whereas at the insurance company, consultants evaluated them in the context of fraud detection. Participants completed a questionnaire, as outlined in Section 2.5, evaluating the prototypes based on 11 human-centered explanation criteria.</p></sec>
<sec>
<label>3.1.2</label>
<title>Prototype ranking</title>
<p>For each of the four types of explanation, users were asked to indicate which of the different designs (A-E) was preferred. <xref ref-type="table" rid="T3">Table 3</xref> shows the results of this evaluation.</p>
<p>From the <bold>feature importance</bold> quadrant (top-left) of <xref ref-type="table" rid="T3">Table 3</xref>, it can be seen that design D received the lowest total rank score from participant group 1 (score = 4), while design E was the least preferred (score = 12). In participant group 2, design A scored the best (score = 5), and design B had the lowest ranking (score = 14). In group 2, the difference in preference between designs A and D is only 1 point.</p>
<p>In terms of <bold>counterfactuals</bold>, as shown in the top-right quadrant of <xref ref-type="table" rid="T3">Table 3</xref>, designs A and D were the most preferred in participant group 1, while design B was the least preferred. In participant group 2, design D received the highest preference, while design C was the least favored. Overall, design D performed well across both groups, suggesting that it was perceived as a strong candidate for presenting counterfactual explanations.</p>
<p>Looking at <bold>rule-based explanations</bold> in the bottom-left quadrant of <xref ref-type="table" rid="T3">Table 3</xref>, participant group 1 most favored designs B and E, while designs A and D were the least preferred. In participant group 2, design B stood out as the favorite, while design C had the lowest preference score. The consistency in scores for design B suggests that it was well received for rule-based explanations.</p>
<p>For <bold>similar/contrastive cases</bold>, the bottom-right quadrant of <xref ref-type="table" rid="T3">Table 3</xref> shows that participant group 1 preferred design C, despite design A receiving the highest overall score. In contrast, group 2 most favored design E, with design A being rated the lowest. As noted earlier, design E was not included in group 1&#x00027;s evaluation due to a small mistake in the experimental setup. When considering only the first four designs, both groups exhibited a shared preference for design C.</p></sec>
<sec>
<label>3.1.3</label>
<title>Explanation criteria ratings</title>
<p><xref ref-type="table" rid="T4">Table 4</xref> provides an overview of how the different designs were rated based on the various criteria, derived from previous research (<xref ref-type="bibr" rid="B19">Kim et al., 2024a</xref>). Specifically, the properties related to the quality of the explanations were considered.</p>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>For each criterion, the average score (1&#x02013;5) for each prototype and the overall criterion average.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left"><bold>FinTech Credit</bold></th>
<th valign="top" align="center" colspan="5"><bold>Feature importance</bold></th>
<th valign="top" align="center" colspan="5"><bold>Counter-factuals</bold></th>
<th valign="top" align="center" colspan="5"><bold>Similar-contrastive</bold></th>
<th valign="top" align="center" colspan="5"><bold>Rule-based explanations</bold></th>
<th valign="top" align="center"><bold>Avg</bold>.</th>
</tr>
</thead>
<tbody>
<tr>
<td/>
<td valign="top" align="center">A</td>
<td valign="top" align="center">B</td>
<td valign="top" align="center">C</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">D</td>
<td valign="top" align="center">E</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">A</td>
<td valign="top" align="center">B</td>
<td valign="top" align="center">C</td>
<td valign="top" align="center">D</td>
<td valign="top" align="center">E</td>
<td valign="top" align="center">A</td>
<td valign="top" align="center">B</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">C</td>
<td valign="top" align="center">D</td>
<td valign="top" align="center">E</td>
<td valign="top" align="center">A</td>
<td valign="top" align="center">B</td>
<td valign="top" align="center">C</td>
<td valign="top" align="center">D</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">E</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Understand</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.0</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">&#x02014;</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.7</td>
</tr>
<tr>
<td valign="top" align="left">Easy to understand</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">2.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.7</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">4.3</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center">2.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.7</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">&#x02014;</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.4</td>
</tr>
<tr>
<td valign="top" align="left">Easy to use</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.7</td>
<td valign="top" align="center">2.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center">2.0</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.7</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center">&#x02014;</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.0</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.0</td>
<td valign="top" align="center">3.3</td>
</tr>
<tr>
<td valign="top" align="left">Satisfying</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.7</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.7</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">1.7</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.0</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center">&#x02014;</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.7</td>
<td valign="top" align="center">3.1</td>
</tr>
<tr>
<td valign="top" align="left">Useful</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.0</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.7</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.7</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.0</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">&#x02014;</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">3.4</td>
</tr>
<tr>
<td valign="top" align="left">Trustworthy</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.0</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.0</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.0</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">&#x02014;</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.7</td>
<td valign="top" align="center">4.3</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">3.9</td>
</tr>
<tr>
<td valign="top" align="left">Typical</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.7</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.7</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.7</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.7</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.7</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">&#x02014;</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.4</td>
</tr>
<tr>
<td valign="top" align="left">Sufficient</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.3</td>
<td valign="top" align="center">2.0</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.0</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center">1.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.3</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">&#x02014;</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">3.1</td>
</tr>
<tr>
<td valign="top" align="left">Correct</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">&#x02014;</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.9</td>
</tr>
<tr>
<td valign="top" align="left">Concise</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center">4.3</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.7</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.7</td>
<td valign="top" align="center">2.0</td>
<td valign="top" align="center">&#x02014;</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.0</td>
<td valign="top" align="center">2.0</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">3.3</td>
</tr>
<tr>
<td valign="top" align="left">Act</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.7</td>
<td valign="top" align="center">2.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.3</td>
<td valign="top" align="center">2.0</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.7</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">&#x02014;</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.1</td>
</tr>
<tr>
<td valign="top" align="left">Avg.</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.1</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">2.5</td>
<td valign="top" align="center">3.9</td>
<td valign="top" align="center">2.9</td>
<td valign="top" align="center">3.4</td>
<td valign="top" align="center">3.8</td>
<td valign="top" align="center">3.1</td>
<td valign="top" align="center">2.5</td>
<td valign="top" align="center">3.2</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">2.9</td>
<td valign="top" align="center">&#x02014;</td>
<td valign="top" align="center">3.5</td>
<td valign="top" align="center">4.1</td>
<td valign="top" align="center">3.4</td>
<td valign="top" align="center">3.8</td>
<td valign="top" align="center">3.9</td>
<td/>
</tr>
<tr>
<td valign="top" align="left"><bold>Insurance claims</bold></td>
<td valign="top" align="center" colspan="5"><bold>Feature importance</bold></td>
<td valign="top" align="center" colspan="5"><bold>Counter-factuals</bold></td>
<td valign="top" align="center" colspan="5"><bold>Similar-contrastive</bold></td>
<td valign="top" align="center" colspan="5"><bold>Rule-based explanations</bold></td>
<td valign="top" align="center"><bold>Avg</bold>.</td>
</tr>
<tr>
<td/>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">A</td>
<td valign="top" align="center">B</td>
<td valign="top" align="center">C</td>
<td valign="top" align="center">D</td>
<td valign="top" align="center">E</td>
<td valign="top" align="center">A</td>
<td valign="top" align="center">B</td>
<td valign="top" align="center">C</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">D</td>
<td valign="top" align="center">E</td>
<td valign="top" align="center">A</td>
<td valign="top" align="center">B</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">C</td>
<td valign="top" align="center">D</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">E</td>
<td valign="top" align="center">A</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">B</td>
<td valign="top" align="center">C</td>
<td valign="top" align="center">D</td>
<td valign="top" align="center">E</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Understand</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.7</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.7</td>
<td valign="top" align="center">4.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.7</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.7</td>
<td valign="top" align="center">4.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">4.1</td>
</tr>
<tr>
<td valign="top" align="left">Easy to understand</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.7</td>
<td valign="top" align="center">4.3</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.3</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.7</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.7</td>
</tr>
<tr>
<td valign="top" align="left">Easy to use</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.7</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">4.3</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">4.3</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.7</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.7</td>
</tr>
<tr>
<td valign="top" align="left">Satisfying</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">5.0</td>
<td valign="top" align="center">4.3</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">4.3</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.7</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.7</td>
</tr>
<tr>
<td valign="top" align="left">Useful</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.7</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.0</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">3.7</td>
</tr>
<tr>
<td valign="top" align="left">Trustworthy</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.7</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">4.1</td>
</tr>
<tr>
<td valign="top" align="left">Typical</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.0</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">3.9</td>
</tr>
<tr>
<td valign="top" align="left">Sufficient</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.7</td>
<td valign="top" align="center">4.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.7</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.7</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.8</td>
</tr>
<tr>
<td valign="top" align="left">Correct</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">5.0</td>
<td valign="top" align="center">4.3</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.7</td>
<td valign="top" align="center">4.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.7</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.1</td>
</tr>
<tr>
<td valign="top" align="left">Concise</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">4.3</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">5</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.6</td>
</tr>
<tr>
<td valign="top" align="left">Act</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.3</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.3</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.7</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.3</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.3</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">3.3</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.0</td>
<td valign="top" align="center">3.2</td>
</tr>
<tr>
<td valign="top" align="left">Avg.</td>
<td valign="top" align="center">4.2</td>
<td valign="top" align="center">3.8</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">3.1</td>
<td valign="top" align="center">3.8</td>
<td valign="top" align="center">3.4</td>
<td valign="top" align="center">3.4</td>
<td valign="top" align="center">4.5</td>
<td valign="top" align="center">4.1</td>
<td valign="top" align="center">3.4</td>
<td valign="top" align="center">3.7</td>
<td valign="top" align="center">4.0</td>
<td valign="top" align="center">3.4</td>
<td valign="top" align="center">4.3</td>
<td valign="top" align="center">3.8</td>
<td valign="top" align="center">4.4</td>
<td valign="top" align="center">3.4</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">3.6</td>
<td/>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>The highest scores are marked in gray.</p>
</table-wrap-foot>
</table-wrap>
<p>A Spearman rank correlation analysis was conducted to examine the relationship between explanation prototypes and user criteria for both the <bold>Insurance Claims</bold> and <bold>FinTech Credit</bold> use-case. <xref ref-type="table" rid="T5">Table 5</xref> presents the correlation coefficients (&#x003C1;) for each criterion across the two use-cases. The results indicate that for <bold>Insurance Claims</bold>, the strongest correlation was observed for <italic>Concise</italic> (&#x003C1; &#x0003D; 0.56), followed closely by <italic>Easy to Use</italic> (&#x003C1; &#x0003D; 0.51). The correlation for <italic>Satisfying</italic> was also relatively high (&#x003C1; &#x0003D; 0.48), indicating that user satisfaction is closely linked to the preferred prototypes. For <bold>FinTech Credit</bold>, the highest correlation was found for <italic>Satisfying</italic> (&#x003C1; &#x0003D; 0.55), followed by <italic>Easy to Use</italic> (&#x003C1; &#x0003D; 0.50). Other notable correlations include <italic>Sufficient</italic> (&#x003C1; &#x0003D; 0.41) and <italic>Correct</italic> (&#x003C1; &#x0003D; 0.41).</p>
<table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>Spearman rank correlations for both use-cases.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left"><bold>Criteria</bold></th>
<th valign="top" align="center"><bold>&#x003C1; (Insurance claims)</bold></th>
<th valign="top" align="center"><bold>&#x003C1; (FinTech credit)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Understand</td>
<td valign="top" align="center">0.21</td>
<td valign="top" align="center">0.24</td>
</tr>
<tr>
<td valign="top" align="left">Easy to understand</td>
<td valign="top" align="center">0.44</td>
<td valign="top" align="center">0.34</td>
</tr>
<tr>
<td valign="top" align="left">Easy to use</td>
<td valign="top" align="center"><bold>0.51</bold></td>
<td valign="top" align="center"><bold>0.50</bold></td>
</tr>
<tr>
<td valign="top" align="left">Satisfying</td>
<td valign="top" align="center">0.48</td>
<td valign="top" align="center"><bold>0.55</bold></td>
</tr>
<tr>
<td valign="top" align="left">Useful</td>
<td valign="top" align="center">0.30</td>
<td valign="top" align="center">0.20</td>
</tr>
<tr>
<td valign="top" align="left">Trustworthy</td>
<td valign="top" align="center">0.23</td>
<td valign="top" align="center">0.20</td>
</tr>
<tr>
<td valign="top" align="left">Typical</td>
<td valign="top" align="center">0.28</td>
<td valign="top" align="center">0.39</td>
</tr>
<tr>
<td valign="top" align="left">Sufficient</td>
<td valign="top" align="center">0.39</td>
<td valign="top" align="center">0.41</td>
</tr>
<tr>
<td valign="top" align="left">Correct</td>
<td valign="top" align="center">0.44</td>
<td valign="top" align="center">0.41</td>
</tr>
<tr>
<td valign="top" align="left">Concise</td>
<td valign="top" align="center"><bold>0.56</bold></td>
<td valign="top" align="center">0.26</td>
</tr>
<tr>
<td valign="top" align="left">Act</td>
<td valign="top" align="center">0.34</td>
<td valign="top" align="center">0.40</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>Bold values indicate the strongest correlations observed in the table.</p>
</table-wrap-foot>
</table-wrap>
<p>It is worth noting that in 10 of the 24 cases (41.7%) the design preferred by participants did not align with the design receiving the lowest aggregated criteria score (<xref ref-type="table" rid="T6">Table 6</xref>). While this discrepancy may partly reflect noise due to the small sample size, it also suggests that participants may weigh certain criteria differently when making overall preferences. In Company 2, the divergence was even more pronounced (8 of 12 cases, 75%), indicating that contextual factors could play a role in shaping XAI design preferences, though this interpretation should be treated as exploratory given the limited dataset.</p>
<table-wrap position="float" id="T6">
<label>Table 6</label>
<caption><p>For each participant, the preferred design is presented (left) and the design based on the sum of scores for the individual characteristics (right).</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th/>
<th valign="top" align="center" colspan="2"><bold>Feature importance</bold></th>
<th valign="top" align="center" colspan="2"><bold>Counter-factuals</bold></th>
<th valign="top" align="center" colspan="2"><bold>Similar/ contrastive</bold></th>
<th valign="top" align="center" colspan="2"><bold>Rule-based explanations</bold></th>
</tr>
<tr>
<th/>
<th valign="top" align="center"><bold>Pref</bold></th>
<th valign="top" align="center">&#x02211;</th>
<th valign="top" align="center"><bold>Pref</bold></th>
<th valign="top" align="center">&#x02211;</th>
<th valign="top" align="center"><bold>Pref</bold></th>
<th valign="top" align="center">&#x02211;</th>
<th valign="top" align="center"><bold>Pref</bold></th>
<th valign="top" align="center">&#x02211;</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Participant 1.1</td>
<td valign="top" align="center">D</td>
<td valign="top" align="center">D</td>
<td valign="top" align="center">D</td>
<td valign="top" align="center">D</td>
<td valign="top" align="center">C</td>
<td valign="top" align="center">C</td>
<td valign="top" align="center">B</td>
<td valign="top" align="center">B</td>
</tr>
<tr>
<td valign="top" align="left">Participant 1.2</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">B</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">D</td>
<td valign="top" align="center">A</td>
<td valign="top" align="center">A</td>
<td valign="top" align="center">B</td>
<td valign="top" align="center">B</td>
<td valign="top" align="center">E</td>
<td valign="top" align="center">E</td>
</tr>
<tr>
<td valign="top" align="left">Participant 1.3</td>
<td valign="top" align="center">D</td>
<td valign="top" align="center">D</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">A</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">D</td>
<td valign="top" align="center">D</td>
<td valign="top" align="center">D</td>
<td valign="top" align="center">E</td>
<td valign="top" align="center">E</td>
</tr>
<tr>
<td valign="top" align="left">Participant 2.1</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">C</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">A</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">D</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">E</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">B</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">E</td>
<td valign="top" align="center">B</td>
<td valign="top" align="center">B</td>
</tr>
<tr>
<td valign="top" align="left">Participant 2.2</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">A</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">C</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">E</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">D</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">E</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">B</td>
<td valign="top" align="center">B</td>
<td valign="top" align="center">B</td>
</tr>
<tr>
<td valign="top" align="left">Participant 2.3</td>
<td valign="top" align="center">D</td>
<td valign="top" align="center">D</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">D</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">A</td>
<td valign="top" align="center">E</td>
<td valign="top" align="center">E</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">A</td>
<td valign="top" align="center" style="background-color:#bfbfbf;000000">B</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>Colored cells indicate a difference in preferred design.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec>
<label>3.1.4</label>
<title>Correlation between criteria and prototype preference</title>
<p>The Spearman results suggest that explanations that are easy to use, concise, and satisfying tend to correlate with prototype preference. Notably, while <italic>conciseness</italic> was the most important factor in <bold>Insurance Claims</bold>, <italic>satisfaction</italic> was more strongly associated with <bold>FinTech Credit</bold>. This variation indicates that use-case specific needs influence how users perceive and evaluate explanations.</p>
<p>The correlation between <italic>easy to use</italic> and prototype preference in both use-cases suggests that explanations should prioritize usability to maximize their effectiveness. In contrast, attributes such as <italic>trustworthy</italic> and <italic>useful</italic> exhibit lower correlations; however, these results should be interpreted in light of the fact that the explanations were aligned with end-users&#x00027; decision-making needs, and received relatively high ratings across all criteria. Because these attributes were already evaluated favorably&#x02014;at or above the midpoint of the 5-point Likert scale&#x02014;there was limited variation for them to exert a stronger influence on preference. Their lower correlations do not suggest that they are unimportant, but rather that, within an already sufficiently rated set of explanations, they are potentially less decisive than perceptions of ease of use.</p>
<p>Additionally, the absolute scores of each criterion for the preferred prototype designs indicate that they perform at a sufficient level as seen in <xref ref-type="table" rid="T4">Table 4</xref>. This finding suggests that these designs are not only preferred but also meaningful. This indicates that the designs align with user expectations and match the use-case.</p>
</sec>
</sec>
<sec>
<label>3.2</label>
<title>Workshop results on generality of visual designs</title>
<p>The workshop provided insights into the strengths and weaknesses of the 20 explanation designs, covering four explanation types: <italic>feature importance, contrastive/similar examples, counterfactuals, and rule-based explanations</italic>. The evaluation followed a two-step mixed-method approach: first, participants ranked the prototypes using the <italic>100-dollar method</italic>, and after which they provided qualitative feedback on the top three designs per explanation type, focusing on usability and generalizability.</p>
<sec>
<label>3.2.1</label>
<title>100-dollar method</title>
<p>The results of the <italic>100-dollar method</italic> are presented in <xref ref-type="table" rid="T7">Table 7</xref>, with scores averaged across all participants. The findings indicate that Prototype D is the most preferred among participants in terms of Feature Importance. For counterfactual explanations, Prototypes C and E are the most favored ones. In the case of similar and contrastive examples, participants demonstrated a preference for Prototype A. Lastly, for rule-based explanations, Prototype B received the highest score. The rationale behind these preferences and the implications of the highest-rated prototypes are further analyzed in the subsequent sections.</p>
<table-wrap position="float" id="T7">
<label>Table 7</label>
<caption><p>Ranking of explanation prototypes based on the 100-dollar method.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left"><bold>Prototype</bold></th>
<th valign="top" align="center"><bold>A</bold></th>
<th valign="top" align="center"><bold>B</bold></th>
<th valign="top" align="center"><bold>C</bold></th>
<th valign="top" align="center"><bold>D</bold></th>
<th valign="top" align="center"><bold>E</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Feature importance</td>
<td valign="top" align="center">32.5 &#x000B1; 10.08</td>
<td valign="top" align="center">7.5 &#x000B1; 7.68</td>
<td valign="top" align="center">2.5 &#x000B1; 4.00</td>
<td valign="top" align="center"><bold>47.5</bold> &#x000B1; 7.68</td>
<td valign="top" align="center">10 &#x000B1; 6.56</td>
</tr>
<tr>
<td valign="top" align="left">Counterfactuals</td>
<td valign="top" align="center">27.5 &#x000B1; 21.04</td>
<td valign="top" align="center">11.25 &#x000B1; 5.04</td>
<td valign="top" align="center"><bold>30</bold> &#x000B1; 6.56</td>
<td valign="top" align="center"><bold>30</bold> &#x000B1; 28.49</td>
<td valign="top" align="center">1.25 &#x000B1; 2.00</td>
</tr>
<tr>
<td valign="top" align="left">Similar/contrastive examples</td>
<td valign="top" align="center"><bold>47.5</bold> &#x000B1; 13.68</td>
<td valign="top" align="center">2.5 &#x000B1; 4.00</td>
<td valign="top" align="center">23.75 &#x000B1; 11.92</td>
<td valign="top" align="center">8.75 &#x000B1; 11.44</td>
<td valign="top" align="center">17.5 &#x000B1; 13.68</td>
</tr>
<tr>
<td valign="top" align="left">Rule-based explanation</td>
<td valign="top" align="center">13.75 &#x000B1; 14.40</td>
<td valign="top" align="center">31.25 &#x000B1; 8.24</td>
<td valign="top" align="center"><bold>32.5</bold> &#x000B1; 16.48</td>
<td valign="top" align="center">15 &#x000B1; 13.84</td>
<td valign="top" align="center">7.5 &#x000B1; 12.00</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>Higher scores indicate stronger preference among participants. The scores are averaged over participants and reported as mean &#x000B1; 95% confidence interval. Bold values indicate the highest mean scores observed per explanation type.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec>
<label>3.2.2</label>
<title>Feature importance explanations</title>
<p>Participants identified several challenges related to the interpretability and usability of feature importance visualizations. A recurring concern was the lack of clarity regarding the scale of values, as it was often unclear whether scores were normalized, what their maximum values were, or how they should be interpreted in context. Many participants also questioned how a system should handle cases with numerous features, debating whether to display only the most important ones or set a threshold for inclusion.</p>
<p>The use of color played a crucial role in interpretation, with red often signaling negative impact. However, some participants pointed out that the prototypes focused solely on highlighting potential risk factors, without presenting any mitigating information that could balance or contextualize those risks. This absence, they noted, may contribute to automation bias, as users might be more likely to accept negative system outputs without questioning or critically assessing them. Additionally, the effectiveness of color coding was debated, with some suggesting a green-to-red gradient to indicate varying levels of importance, while others were concerned that green might imply a beneficial impact even when a feature simply had a neutral or weak effect.</p>
<p>Design preferences varied regarding layout and information density. While some prototypes were criticized for excessive negative space, making them harder to interpret at a glance, others were praised for their compact and structured design.</p></sec>
<sec>
<label>3.2.3</label>
<title>Contrastive/similar examples</title>
<p>Feedback on contrastive explanations largely revolved around readability and layout efficiency. Participants expressed a preference for a logical reading order, typically from left to right and top to bottom. Some prototypes disrupted this flow, requiring users to navigate back and forth, which made interpretation more cumbersome. Example positioning was also a point of discussion, with many emphasizing that the current case should be more prominently displayed, ideally in the top left or highlighted with additional visual effects.</p>
<p>The scalability of presenting multiple similar or contrastive examples emerged as a concern, particularly in prototypes that displayed several cases side by side. Participants questioned which examples should be selected and how many could feasibly be shown without overwhelming the user or diluting the relevance of the comparison. This raised broader issues around curating representative examples while maintaining clarity and usability. There was general interest in interactive elements, such as hover effects or selection mechanisms, to allow users to focus on specific examples without overwhelming them with information.</p>
<p>Design E featured a wave-like structure intended to represent a decision boundary between different possible outcomes of the model. However, during evaluation, the wave motif introduced unintended ambiguity. While the underlying model employed hard categorical decisions, wave-like shape implied probabilistic uncertainty or a soft boundary, which led to user confusion. This mismatch between visual metaphor and model behavior highlights the importance of aligning graphical representations with the actual semantics of the model. Furthermore, this feedback suggests that users tend to favor lean, unambiguous designs that do not introduce unnecessary visual complexity. This underscores that even subtle stylistic choices can affect user understanding in explanation interfaces.</p></sec>
<sec>
<label>3.2.4</label>
<title>Counterfactual explanations</title>
<p>Reactions to counterfactual explanations were mixed, often influenced by the specific context in which they would be applied. While some participants appreciated the clean and structured nature of certain designs, others questioned whether counterfactuals were useful in all scenarios. The ability to visually compare original and modified cases was generally well received.</p>
<p>Color coding was again a topic of discussion, with some participants advocating for more intuitive risk indicators. In one prototype, for instance, a risk score was highlighted in red, even though the predicted risk level was medium. Several participants suggested that orange would be more appropriate in such cases, emphasizing the importance of selecting colors that semantically align with perceived risk levels.</p>
<p>Again, compact, lean layouts were preferred over repetitive or overly detailed displays, as excessive information made interpretation more difficult.</p></sec>
<sec>
<label>3.2.5</label>
<title>Rule-based explanations</title>
<p>Decision rule visualizations were found to be highly context-dependent, making it difficult to determine a universally effective design. A decision tree can be simplified by collapsing it into a sequence of rules rather than depicting the full tree. We captured mixed preferences regarding the presentation of plain rules versus the whole decision tree.</p>
<p>Participants generally preferred designs with interactive elements, such as drop-down menus, which helped reduce visual clutter and supported intuitive navigation. Risk levels&#x02014; high, medium, and low&#x02014;were effectively communicated through familiar color shading (red, orange, green) and were widely understood. However, one prototype introduced a sequence of shaded boxes intended to represent a series of risk mitigation steps. Although the concept aimed to convey progression, participants found the visual flow ambiguous, making it difficult to discern the intended order or interpret meaning at a glance. This underscores the importance of clear visual hierarchies and cautions against overly abstract or non-standard design metaphors that may increase cognitive load. In contrast, participants who prioritized efficiency over detailed step-by-step representations preferred more compact, actionable, and straightforward designs, those that clearly highlight the outcome and suggest how risks might be mitigated.</p></sec>
<sec>
<label>3.2.6</label>
<title>Preliminary design principles</title>
<p>This section presents context-specific insights derived from user studies and workshop discussions conducted throughout the design process. Given the limited scope of two use cases in a single industry and a total of six end-users, these findings should be interpreted as exploratory and illustrative.</p>
<sec>
<label>3.2.6.1</label>
<title>Use of color</title>
<p>Users tend to associate red with negative and green with positive outcomes, due in part to common real-world associations such as traffic signals or software indicators. While we used red to mark negative outcomes, leveraging its strong intuitive signal, we intentionally avoided using green for positive outcomes to reduce the risk of overly strong or biased interpretations. Instead, we chose light blue as a more neutral alternative.</p>
<p>This choice reflects the nature of the decision-making context in our studies. The values being visualized are not inherently &#x0201C;good&#x0201D; or &#x0201C;bad&#x0201D; but represent a continuum of approval levels. A lower score, for instance, might simply indicate a lesser degree of risk rather than a definitively positive outcome. Blue was found to be a more neutral and appropriate color for such nuanced interpretations. Importantly, this also preserves the use of green for truly positive confirmations.</p></sec>
<sec>
<label>3.2.6.2</label>
<title>Contextual information</title>
<p>Many data visualizations rely solely on icons, numbers, or simplified graphics, which often strip away essential context. Users reported that visualizations lacking explanatory information&#x02014;such as bar charts without numeric values or raw scores without interpretation&#x02014;felt incomplete or even confusing. To improve comprehension, we added visual &#x0201C;hints&#x0201D; such as numeric breakdowns and comparative scales.</p>
<p>Users appreciated having just enough information to understand a result without feeling overwhelmed by data they could not immediately interpret.</p></sec>
<sec>
<label>3.2.6.3</label>
<title>Visual restraint</title>
<p>Effective visual design requires restraint. Not every data point needs to be visualized, and minimalism can often be more informative than information overload. Users process visuals faster than text, so visual cues&#x02014;such as shapes, icons, or layout&#x02014;were used strategically to communicate key insights at a glance.</p>
<p>In our designs, we focused on including only the elements necessary for a meaningful understanding of an outcome. For example, rather than showing raw scores alone, we contextualized them with relevant reference points and brief explanations.</p></sec>
<sec>
<label>3.2.6.4</label>
<title>Structure and visual hierarchy</title>
<p>Clarity also depends on how information is arranged. Realistic, freeform layouts may be visually rich, but they often lack the structure needed for efficient comprehension. Instead, we emphasized ordered layouts, logical grouping of elements, and visual hierarchy to guide the user&#x00027;s eye.</p>
<p>Typography size, contrast, and positioning were leveraged to indicate importance and encourage a left-to-right, top-down reading pattern, aligned with Western reading habits. This balance aims to reduce cognitive load while still directing attention where it mattered most.</p></sec>
<sec>
<label>3.2.6.5</label>
<title>Cognitive load</title>
<p>The cognitive load imposed by certain visualizations was also a key consideration. Some designs, particularly in the contrastive and counterfactual categories, presented information in ways that were visually appealing but cognitively demanding. Participants often preferred more compact, structured layouts over those that relied on excessive whitespace or overly complex designs. Additionally, the ordering of elements seem to influence usability, with a strong preference for intuitive reading flows,</p></sec>
<sec>
<label>3.2.6.6</label>
<title>Generalizability</title>
<p>A major challenge in XAI visualization design is ensuring generalizability across different use-cases. Many stakeholders, particularly consultancy partners, emphasized that explanation visualizations need to be adaptable to various domains and decision-making contexts. In some cases, end-users needed to make quick decisions based on explanations, favoring concise and direct visualizations. In other cases, users required more nuanced information to balance competing factors, necessitating richer, interactive designs.</p>
<p>This variability in user needs might indicate that a single, one-size-fits-all explanation is unlikely to be effective. Instead, flexible, customizable explanations that allow users to adjust the level of detail or interact with different elements may provide a better balance between usability and comprehensiveness. Additionally, interactive features such as hover effects, tooltips, or filtering options could help tailor explanations to different levels of expertise and situational demands.</p>
</sec>
</sec>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<label>4</label>
<title>Discussion</title>
<p>This study highlights the potential role of human-centered design in Explainable AI (XAI) for financial decision-making. While based on a limited sample size, our findings suggest that the perceived meaningfulness of AI explanations is influenced by design choices, such as layout and color coding. Participants tended to prefer explanations that were easy to use, satisfying, and&#x02014;depending on the context&#x02014;concise. Furthermore, the results indicate that different explanation formats may serve distinct user needs, underscoring the importance of adaptable and context-sensitive designs. These findings should be interpreted as exploratory and motivate further validation with larger and more diverse user populations.</p>
<p>A key strength of this research is its mixed-method approach, which combines a user study with end users and a workshop with other stakeholders. This allowed us to capture both end-user preferences and broader regulatory and professional perspectives, ensuring the relevance of our findings across multiple stakeholders. Additionally, this study contributes empirical evidence on how XAI design choices influence usability and how this impacts the quality of explanations experienced by its users&#x02014;an area often overlooked in more technically focused XAI research.</p>
<p>Our findings revealed a notable discrepancy between end-users&#x00027; preferences and those of other key stakeholders regarding explanation design. This divergence is particularly evident in the case of rule-based explanations. While users favored simpler, more compact representations&#x02014;such as minimal sets of rules that support interpretability and reduce cognitive load&#x02014;stakeholders including XAI consultants, compliance officers, and developers expressed a preference for more complete representations, such as full decision trees that capture the entirety of the model&#x00027;s logic. This tension highlights a trade-off between interpretability and completeness that is contingent on the audience&#x00027;s needs and goals.</p>
<p>A similar divergence was observed in the use of similar and contrastive example-based explanations. End-users expressed a preference for visualizations, particularly graphical risk plots, which helped them contextualize predictions relative to similar or contrasting cases. In contrast, other stakeholders prioritized concise, structured tabular formats.</p>
<p>These differences underscore the importance of tailoring explanation strategies not only to users but also to the broader ecosystem of stakeholders involved in the deployment and oversight of AI systems. A one-size-fits-all approach to explainability may fall short; instead, layered or customizable explanations may better accommodate varying expectations and information needs across stakeholder groups.</p>
<p>Despite these strengths, the study has limitations. The sample size was small and drawn from two financial organizations, which may limit generalizability; broader and more diverse samples across domains would strengthen future work. Additionally, the focus on financial decision-making may reduce applicability to other high-risk domains such as healthcare or emergency response. Finally, real-world deployment and longitudinal studies are needed to evaluate long-term effects on decision-making and trust in AI.</p>
<p>Similar to prior studies, we found that users have difficulty with ambiguous color schemes and unclear scoring metrics (<xref ref-type="bibr" rid="B7">Doshi-Velez and Kim, 2017</xref>). Furthermore, our study extends existing literature by providing empirical rankings of explanation prototypes, reinforcing the argument that design elements impact user comprehension and trust (<xref ref-type="bibr" rid="B27">Mohseni et al., 2021</xref>). Importantly, this study confirms that the effectiveness of XAI explanations depends not only on their transparency but also on their ability to support efficient decision-making (<xref ref-type="bibr" rid="B18">Kaur et al., 2020</xref>).</p>
<p>Future research should explore XAI design in real-world settings to assess how explanations influence long-term trust and user behavior. Notably, for some explanation types, multiple designs were highly ranked, suggesting that no single approach fits all users or contexts. This points to the need for future studies to explore adaptive and personalized explanation mechanisms that can adjust based on user expertise, organizational and situational context. Effective XAI requires integrating technical accuracy with human-centered design principles, while considering the cognitive factors affecting user engagement. Another direction is the exploration of interactive explanations, where users can dynamically engage with AI outputs to refine their understanding. Bridging the gap between algorithmic transparency and usability demands interdisciplinary collaborations between AI researchers, cognitive scientists, and UX designers, which will be essential to developing more effective and human-centered XAI solutions that support informed decision-making.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found in the article/<xref ref-type="supplementary-material" rid="SM1">Supplementary material</xref>.</p>
</sec>
<sec sec-type="ethics-statement" id="s6">
<title>Ethics statement</title>
<p>The studies involving humans were approved by Ethical Committee Research (Hogeschool Utrecht). The studies were conducted in accordance with the local legislation and institutional requirements. The participants provided their written informed consent to participate in this study. Written informed consent was obtained from the individual(s) for the publication of any potentially identifiable images or data included in this article.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>HM: Conceptualization, Formal analysis, Investigation, Methodology, Project administration, Visualization, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. MS: Investigation, Visualization, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. SO: Investigation, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. RZ: Investigation, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. KM: Investigation, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. DS: Conceptualization, Investigation, Supervision, Visualization, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing.</p>
</sec>
<ack><title>Acknowledgments</title><p>The authors wish to thank the two financial institutions that generously provided the use-cases for this study. Their collaboration, domain expertise, and access to real-world data were invaluable in shaping and validating the findings presented in this work.</p>
</ack>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s9">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was used in the creation of this manuscript. The author(s) verify and take full responsibility for the use of generative AI in the preparation of this manuscript. Generative AI tools (ChatGPT) were used to check and improve grammar, clarity, and wording in portions of the text. No content was generated that substitutes for the author(s)&#x00027; original research, analysis, or interpretations. All intellectual contributions, study design, and results are solely those of the author(s).</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="s11">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/frai.2026.1668029/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/frai.2026.1668029/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.zip" id="SM1" mimetype="application/zip" xmlns:xlink="http://www.w3.org/1999/xlink"/></sec>
<ref-list>
<title>References</title>
 <ref id="B1">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ali</surname> <given-names>S.</given-names></name> <name><surname>Abuhmed</surname> <given-names>T.</given-names></name> <name><surname>El-Sappagh</surname> <given-names>S.</given-names></name> <name><surname>Muhammad</surname> <given-names>K.</given-names></name> <name><surname>Alonso-Moral</surname> <given-names>J. M.</given-names></name> <name><surname>Confalonieri</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Explainable artificial intelligence (XAI): What we know and what is left to attain trustworthy artificial intelligence</article-title>. <source>Inform. Fusion</source> <volume>99</volume>:<fpage>101805</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.inffus.2023.101805</pub-id></mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Amann</surname> <given-names>J.</given-names></name> <name><surname>Blasimme</surname> <given-names>A.</given-names></name> <name><surname>Vayena</surname> <given-names>E.</given-names></name> <name><surname>Frey</surname> <given-names>D.</given-names></name> <name><surname>Madai</surname> <given-names>V. I.</given-names></name> <name><surname>Consortium</surname> <given-names>P.</given-names></name></person-group> (<year>2020</year>). <article-title>Explainability for artificial intelligence in healthcare: a multidisciplinary perspective</article-title>. <source>BMC Med. Inform. Decis. Mak</source>. <volume>20</volume>, <fpage>1</fpage>&#x02013;<lpage>9</lpage>. doi: <pub-id pub-id-type="doi">10.1186/s12911-020-01332-6</pub-id><pub-id pub-id-type="pmid">33256715</pub-id></mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Arrieta</surname> <given-names>A. B.</given-names></name> <name><surname>D&#x000ED;az-Rodr&#x000ED;guez</surname> <given-names>N.</given-names></name> <name><surname>Del Ser</surname> <given-names>J.</given-names></name> <name><surname>Bennetot</surname> <given-names>A.</given-names></name> <name><surname>Tabik</surname> <given-names>S.</given-names></name> <name><surname>Barbado</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Explainable artificial intelligence (XAI): Concepts, taxonomies, opportunities and challenges toward responsible ai</article-title>. <source>Inform. Fusion</source> <volume>58</volume>, <fpage>82</fpage>&#x02013;<lpage>115</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.inffus.2019.12.012</pub-id></mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Belle</surname> <given-names>V.</given-names></name> <name><surname>Papantonis</surname> <given-names>I.</given-names></name></person-group> (<year>2021</year>). <article-title>Principles and practice of explainable machine learning</article-title>. <source>Front. Big Data</source> <volume>4</volume>:<fpage>688969</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fdata.2021.688969</pub-id><pub-id pub-id-type="pmid">34278297</pub-id></mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Bertrand</surname> <given-names>A.</given-names></name> <name><surname>Belloum</surname> <given-names>R.</given-names></name> <name><surname>Eagan</surname> <given-names>J. R.</given-names></name> <name><surname>Maxwell</surname> <given-names>W.</given-names></name></person-group> (<year>2022</year>). <article-title>&#x0201C;How cognitive biases affect xai-assisted decision-making: a systematic review,&#x0201D;</article-title> in <source>Proceedings of the 2022 AAAI/ACM Conference on AI, Ethics, and Society</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>Association for Computing Machinery</publisher-name>), <fpage>78</fpage>&#x02013;<lpage>91</lpage>. doi: <pub-id pub-id-type="doi">10.1145/3514094.3534164</pub-id></mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Brennen</surname> <given-names>A.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;What do people really want when they say they want &#x0201C;explainable ai&#x0201D;? we asked 60 stakeholders,&#x0201D;</article-title> in <source>Extended Abstracts of the 2020 CHI Conference on Human Factors in Computing Systems</source> (<publisher-loc>New York</publisher-loc>: <publisher-name>ACM</publisher-name>), <fpage>1</fpage>&#x02013;<lpage>7</lpage>.</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Doshi-Velez</surname> <given-names>F.</given-names></name> <name><surname>Kim</surname> <given-names>B.</given-names></name></person-group> (<year>2017</year>). <article-title>Towards a rigorous science of interpretable machine learning</article-title>. <source>arXiv</source> [preprint] arXiv:1702.08608. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1702.08608</pub-id></mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Ehsan</surname> <given-names>U.</given-names></name> <name><surname>Liao</surname> <given-names>Q. V.</given-names></name> <name><surname>Muller</surname> <given-names>M.</given-names></name> <name><surname>Riedl</surname> <given-names>M. O.</given-names></name> <name><surname>Weisz</surname> <given-names>J. D.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;Expanding explainability: Towards social transparency in ai systems,&#x0201D;</article-title> in <source>Proceedings of the 2021 CHI Conference on Human Factors in Computing Systems</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>Association for Computing Machinery</publisher-name>), <fpage>1</fpage>&#x02013;<lpage>19</lpage>. doi: <pub-id pub-id-type="doi">10.1145/3411764.3445188</pub-id></mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="web"><collab>European Commission</collab> (<year>2021</year>). <article-title>&#x0201C;Proposal for a regulation laying down harmonised rules on artificial intelligence and amending certain union legislative acts,&#x0201D;</article-title> in <source>Technical Report, European Commission</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:52021PC0206">https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:52021PC0206</ext-link> (Accessed February 21, 2025).</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="web"><collab>European Commission</collab> (<year>2024</year>). <article-title>The Artificial Intelligence Act &#x0201C;Ensuring trustworthy AI,&#x0201D;</article-title> in <italic>Technical Report, European Union</italic>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://digital-strategy.ec.europa.eu/en/policies/regulatory-framework-ai">https://digital-strategy.ec.europa.eu/en/policies/regulatory-framework-ai</ext-link></mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal"><collab>European Union</collab> (<year>2016</year>). <article-title>&#x0201C;Regulation (EU) 2016/679 of the european parliament and of the council of 27 april 2016 on the protection of natural persons with regard to the processing of personal data and on the free movement of such data, and repealing directive 95/46/ec (general data protection regulation),&#x0201D;</article-title> in <source>EU Regulation 2016/679, European Union. Official Journal of the European Union, L 119</source>, <fpage>1</fpage>&#x02013;<lpage>88</lpage>.</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gaudeul</surname> <given-names>A.</given-names></name> <name><surname>Arrigoni</surname> <given-names>O.</given-names></name> <name><surname>Charisi</surname> <given-names>V.</given-names></name> <name><surname>Escobar Planas</surname> <given-names>M.</given-names></name> <name><surname>Hupont Torres</surname> <given-names>I.</given-names></name></person-group> (<year>2025</year>). <article-title>&#x0201C;The impact of human-ai interaction on discrimination,&#x0201D;</article-title> in <source>Techreport KJ-01-24-180-EN-N (online)</source>.</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Gilpin</surname> <given-names>L. H.</given-names></name> <name><surname>Bau</surname> <given-names>D.</given-names></name> <name><surname>Yuan</surname> <given-names>B. Z.</given-names></name> <name><surname>Bajwa</surname> <given-names>A.</given-names></name> <name><surname>Specter</surname> <given-names>M.</given-names></name> <name><surname>Kagal</surname> <given-names>L.</given-names></name></person-group> (<year>2018</year>). <article-title>&#x0201C;Explaining explanations: An overview of interpretability of machine learning,&#x0201D;</article-title> in <source>2018 IEEE 5th International Conference on Data Science and Advanced Analytics (DSAA)</source> (<publisher-loc>Turin</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>80</fpage>&#x02013;<lpage>89</lpage>. IEEE.</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hassija</surname> <given-names>V.</given-names></name> <name><surname>Chamola</surname> <given-names>V.</given-names></name> <name><surname>Mahapatra</surname> <given-names>A.</given-names></name> <name><surname>Singal</surname> <given-names>A.</given-names></name> <name><surname>Goel</surname> <given-names>D.</given-names></name> <name><surname>Huang</surname> <given-names>K.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Interpreting black-box models: a review on explainable artificial intelligence</article-title>. <source>Cognit. Comput</source>. <volume>16</volume>, <fpage>45</fpage>&#x02013;<lpage>74</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s12559-023-10179-8</pub-id></mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hawley</surname> <given-names>D. D.</given-names></name> <name><surname>Johnson</surname> <given-names>J. D.</given-names></name> <name><surname>Raina</surname> <given-names>D.</given-names></name></person-group> (<year>1990</year>). <article-title>Artificial neural systems: a new tool for financial decision-making</article-title>. <source>Finan. Analysts J</source>. <volume>46</volume>, <fpage>63</fpage>&#x02013;<lpage>72</lpage>. doi: <pub-id pub-id-type="doi">10.2469/faj.v46.n6.63</pub-id></mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="web"><collab>High-Level Expert Group on Artificial Intelligence</collab> (<year>2019</year>). <article-title>&#x0201C;The expert group&#x00027;s policy and investment recommendations for trustworthy AI,&#x0201D;</article-title> in <source>Technical report, European Commission</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://digital-strategy.ec.europa.eu/en/library/policy-and-investment-recommendations-trustworthy-artificial-intelligence">https://digital-strategy.ec.europa.eu/en/library/policy-and-investment-recommendations-trustworthy-artificial-intelligence</ext-link> (Accessed February 21, 2025).</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Hudon</surname> <given-names>A.</given-names></name> <name><surname>Demazure</surname> <given-names>T.</given-names></name> <name><surname>Karran</surname> <given-names>A.</given-names></name> <name><surname>L&#x000E9;ger</surname> <given-names>P.-M.</given-names></name> <name><surname>S&#x000E9;n&#x000E9;cal</surname> <given-names>S.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;Explainable artificial intelligence (xai): how the visualization of ai predictions affects user cognitive load and confidence,&#x0201D;</article-title> in <source>Information Systems and Neuroscience: NeuroIS Retreat 2021</source> (<publisher-loc>Springer</publisher-loc>), <fpage>237</fpage>&#x02013;<lpage>246</lpage>. doi: <pub-id pub-id-type="doi">10.1007/978-3-030-88900-5_27</pub-id></mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Kaur</surname> <given-names>H.</given-names></name> <name><surname>Nori</surname> <given-names>H.</given-names></name> <name><surname>Jenkins</surname> <given-names>S.</given-names></name> <name><surname>Caruana</surname> <given-names>R.</given-names></name> <name><surname>Wallach</surname> <given-names>H.</given-names></name> <name><surname>Wortman Vaughan</surname> <given-names>J.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;Interpreting interpretability: understanding data scientists&#x00027; use of interpretability tools for machine learning,&#x0201D;</article-title> in <source>Proceedings of the 2020 CHI Conference on Human Factors in Computing Systems</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>Association for Computing Machinery</publisher-name>), <fpage>1</fpage>&#x02013;<lpage>14</lpage>. doi: <pub-id pub-id-type="doi">10.1145/3313831.3376219</pub-id></mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname> <given-names>J.</given-names></name> <name><surname>Maathuis</surname> <given-names>H.</given-names></name> <name><surname>Sent</surname> <given-names>D.</given-names></name></person-group> (<year>2024a</year>). <article-title>Human-centered evaluation of explainable ai applications: a systematic review</article-title>. <source>Front. Artif. Intellig</source>. <volume>7</volume>:<fpage>1456486</fpage>. doi: <pub-id pub-id-type="doi">10.3389/frai.2024.1456486</pub-id><pub-id pub-id-type="pmid">39484154</pub-id></mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname> <given-names>J.</given-names></name> <name><surname>Maathuis</surname> <given-names>H.</given-names></name> <name><surname>van Montfort</surname> <given-names>K.</given-names></name> <name><surname>Sent</surname> <given-names>D.</given-names></name></person-group> (<year>2024b</year>). <article-title>&#x0201C;Identifying XAI user needs: gaps between literature and use cases in the financial sector,&#x0201D;</article-title> in <source>HHAI-WS 2024: Workshops at the Third International Conference on Hybrid Human-Artificial Intelligence (HHAI)</source>, 221&#x02013;227.</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname> <given-names>M.</given-names></name> <name><surname>Kim</surname> <given-names>S.</given-names></name> <name><surname>Kim</surname> <given-names>J.</given-names></name> <name><surname>Song</surname> <given-names>T.-J.</given-names></name> <name><surname>Kim</surname> <given-names>Y.</given-names></name></person-group> (<year>2024c</year>). <article-title>Do stakeholder needs differ?-designing stakeholder-tailored explainable artificial intelligence (XAI) interfaces</article-title>. <source>Int. J. Hum. Comput. Stud</source>. <volume>181</volume>:<fpage>103160</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ijhcs.2023.103160</pub-id></mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Langer</surname> <given-names>M.</given-names></name> <name><surname>Oster</surname> <given-names>D.</given-names></name> <name><surname>Speith</surname> <given-names>T.</given-names></name> <name><surname>Hermanns</surname> <given-names>H.</given-names></name> <name><surname>K&#x000E4;stner</surname> <given-names>L.</given-names></name> <name><surname>Schmidt</surname> <given-names>E.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>What do we want from explainable artificial intelligence (XAI)?-a stakeholder perspective on xai and a conceptual model guiding interdisciplinary xai research</article-title>. <source>Artif. Intell</source>. <volume>296</volume>:<fpage>103473</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.artint.2021.103473</pub-id></mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Liao</surname> <given-names>Q. V.</given-names></name> <name><surname>Gruen</surname> <given-names>D.</given-names></name> <name><surname>Miller</surname> <given-names>S.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;Questioning the ai: informing design practices for explainable ai user experiences,&#x0201D;</article-title> in <source>Proceedings of the 2020 CHI Conference on Human Factors in Computing Systems</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>Association for Computing Machinery</publisher-name>), <fpage>1</fpage>&#x02013;<lpage>15</lpage>. doi: <pub-id pub-id-type="doi">10.1145/3313831.3376590</pub-id></mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liao</surname> <given-names>Q. V.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Luss</surname> <given-names>R.</given-names></name> <name><surname>Doshi-Velez</surname> <given-names>F.</given-names></name> <name><surname>Dhurandhar</surname> <given-names>A.</given-names></name></person-group> (<year>2022</year>). <article-title>&#x0201C;Connecting algorithmic research and usage contexts: a perspective of contextualized evaluation for explainable AI,&#x0201D;</article-title> in <source>Proceedings of the AAAI Conference on Human Computation and Crowdsourcing, Vol. 10</source>, <fpage>147</fpage>&#x02013;<lpage>159</lpage>. doi: <pub-id pub-id-type="doi">10.1609/hcomp.v10i1.21995</pub-id></mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lipton</surname> <given-names>Z. C.</given-names></name></person-group> (<year>2018</year>). <article-title>The mythos of model interpretability: in machine learning, the concept of interpretability is both important and slippery</article-title>. <source>Queue</source> <volume>16</volume>, <fpage>31</fpage>&#x02013;<lpage>57</lpage>. doi: <pub-id pub-id-type="doi">10.1145/3236386.3241340</pub-id></mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Miller</surname> <given-names>T.</given-names></name></person-group> (<year>2019</year>). <article-title>Explanation in artificial intelligence: insights from the social sciences</article-title>. <source>Artif. Intell</source>. <volume>267</volume>, <fpage>1</fpage>&#x02013;<lpage>38</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.artint.2018.07.007</pub-id></mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mohseni</surname> <given-names>S.</given-names></name> <name><surname>Zarei</surname> <given-names>N.</given-names></name> <name><surname>Ragan</surname> <given-names>E. D.</given-names></name></person-group> (<year>2021</year>). <article-title>A multidisciplinary survey and framework for design and evaluation of explainable ai systems</article-title>. <source>ACM Trans. Interact. Intell. Syst</source>. <volume>11</volume>, <fpage>3</fpage>&#x02013;<lpage>4</lpage>). doi: <pub-id pub-id-type="doi">10.1145/3387166</pub-id></mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Norman</surname> <given-names>D.</given-names></name></person-group> (<year>2013</year>). <source>The Design of Everyday Things: Revised and Expanded Edition</source>. New York: Basic Books.</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Phillips-Wren</surname> <given-names>G.</given-names></name></person-group> (<year>2012</year>). <article-title>Ai tools in decision making support systems: a review</article-title>. <source>Int. J. Artif. Intellig. Tools</source> <volume>21</volume>:<fpage>1240005</fpage>. doi: <pub-id pub-id-type="doi">10.1142/S0218213012400052</pub-id></mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sweller</surname> <given-names>J.</given-names></name></person-group> (<year>1988</year>). <article-title>Cognitive load during problem solving: Effects on learning</article-title>. <source>Cogn. Sci</source>. <volume>12</volume>, <fpage>257</fpage>&#x02013;<lpage>285</lpage>. doi: <pub-id pub-id-type="doi">10.1207/s15516709cog1202_4</pub-id></mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tidwell</surname> <given-names>J.</given-names></name></person-group> (<year>2010</year>). <source>Designing Interfaces: Patterns for Effective Interaction Design</source>. Cambridge: O&#x00027;Reilly Media, Inc.</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Treisman</surname> <given-names>A. M.</given-names></name> <name><surname>Gelade</surname> <given-names>G.</given-names></name></person-group> (<year>1980</year>). <article-title>A feature-integration theory of attention</article-title>. <source>Cogn. Psychol</source>. <volume>12</volume>, <fpage>97</fpage>&#x02013;<lpage>136</lpage>. doi: <pub-id pub-id-type="doi">10.1016/0010-0285(80)90005-5</pub-id><pub-id pub-id-type="pmid">7351125</pub-id></mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1365653/overview">Yosuke Fukuchi</ext-link>, Tokyo Metropolitan University, Japan</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1000698/overview">Evaldas Vaiciukynas</ext-link>, Kaunas University of Technology, Lithuania</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/138165/overview">Sotiris Kotsiantis</ext-link>, University of Patras, Greece</p>
</fn>
</fn-group>
</back>
</article>
