<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Psychol.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Psychology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Psychol.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-1078</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpsyg.2026.1782184</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Divergent patterns of probabilistic reasoning in humans and GPT-5</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Imannezhad</surname>
<given-names>Pegah</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3401625"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Pothos</surname>
<given-names>Emmanuel M.</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/9670"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wills</surname>
<given-names>Andy J.</given-names>
</name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/10010"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Department of Psychology, City St George&#x2019;s, University of London</institution>, <city>London</city>, <country country="gb">United Kingdom</country></aff>
<aff id="aff2"><label>2</label><institution>School of Psychology, University of Plymouth</institution>, <city>Plymouth</city>, <country country="gb">United Kingdom</country></aff>
<author-notes>
<corresp id="c001"><label>&#x002A;</label>Correspondence: Emmanuel M. Pothos, <email xlink:href="mailto:e.m.pothos@gmail.com">e.m.pothos@gmail.com</email>; <email xlink:href="mailto:Emmanuel.pothos.1@city.ac.uk">Emmanuel.pothos.1@city.ac.uk</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-03-03">
<day>03</day>
<month>03</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>17</volume>
<elocation-id>1782184</elocation-id>
<history>
<date date-type="received">
<day>06</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="rev-recd">
<day>13</day>
<month>02</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>17</day>
<month>02</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2026 Imannezhad, Pothos and Wills.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Imannezhad, Pothos and Wills</copyright-holder>
<license>
<ali:license_ref start_date="2026-03-03">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Large Language Models (LLMs) such as GPT&#x2011;5 are increasingly consulted for advice across a wide range of domains, yet little is known about how their probability judgments compare to those of humans. This study examined GPT&#x2011;5&#x2019;s adherence to classical probability rules, focusing on conjunction fallacies, disjunction fallacies, and violations of binary complementarity. Using a large dataset on human probabilistic judgments, in which participants displayed multiple types of fallacies, we tested GPT&#x2011;5 on the same task and with matched participant profiles. GPT&#x2011;5 produced only single conjunction or disjunction fallacies and showed near&#x2011;perfect compliance with binary complementarity constraints. Its overall response pattern aligned with predictions of early quantum&#x2011;probabilistic models rather than more recent variants incorporating noise. These findings suggest that GPT&#x2011;5 implements a more coherent and internally consistent form of probabilistic reasoning compared to na&#x00EF;ve human participants.</p>
</abstract>
<kwd-group>
<kwd>AI participants (AI subjects)</kwd>
<kwd>complementarity</kwd>
<kwd>conjunction fallacy</kwd>
<kwd>disjunction fallacy</kwd>
<kwd>GPt-5</kwd>
<kwd>human vs. AI cognition</kwd>
<kwd>large language models (LLMs)</kwd>
<kwd>probabilistic reasoning</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. EMP was supported by European Office of Aerospace Research and Development (EOARD) grant FA8655-23-1-7220.</funding-statement>
</funding-group>
<counts>
<fig-count count="3"/>
<table-count count="1"/>
<equation-count count="8"/>
<ref-count count="62"/>
<page-count count="12"/>
<word-count count="9979"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Cognitive Science</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec1">
<label>1</label>
<title>Introduction</title>
<p>Large Language Models (LLMs) have rapidly transformed natural language processing, driving breakthroughs in text generation, translation, summarizing, and interactive artificial intelligence (AI) systems (<xref ref-type="bibr" rid="ref3">Bommasani et al., 2021</xref>; <xref ref-type="bibr" rid="ref5">Brown et al., 2020</xref>). Their integration into academic, professional, and public settings has fuelled both optimism and concern regarding their growing influence on cognitive work, decision-making, and epistemic reliance on automated systems (<xref ref-type="bibr" rid="ref3">Bommasani et al., 2021</xref>; <xref ref-type="bibr" rid="ref58">Weidinger et al., 2021</xref>). As LLMs increasingly function as partners in judgment and reasoning tasks, understanding their strengths and limitations in probabilistic inference becomes critical.</p>
<p>Recent model generations, including OpenAI&#x2019;s GPT-5, are explicitly optimized for complex multi-step reasoning and knowledge-intensive decision support (<xref ref-type="bibr" rid="ref41">OpenAI, 2025</xref>). Such systems may therefore be perceived as credible sources of consultation on probabilistic tasks, given (i) their access to large bodies of human-generated knowledge, (ii) their status as cutting-edge AI, and (iii) their frictionless accessibility without social scrutiny (of course, there are concerns regarding data privacy, though related issues are typically poorly understood at best). Real-world usage patterns already point to this shift: students seek help with academic reasoning, professionals turn to LLMs for second opinions on uncertain tasks, and some individuals consult them for personal guidance (<xref ref-type="bibr" rid="ref4">Bright et al., 2024</xref>; <xref ref-type="bibr" rid="ref25">Hou et al., 2024</xref>; <xref ref-type="bibr" rid="ref27">Jung et al., 2024</xref>). Yet the quality of advice supplied by LLMs remains insufficiently examined. Their effectiveness and trustworthiness ultimately depends on two dimensions: (1) accuracy of domain-relevant knowledge and (2) adherence to normative standards of rational decision-making. The first dimension is highly contextual and task-specific, whereas the second relates to a broader debate in cognitive science: to what extent are probabilistic inferences normatively rational, and to what extent are they systematically biased (<xref ref-type="bibr" rid="ref29">Kahneman and Tversky, 1979</xref>; <xref ref-type="bibr" rid="ref54">Tversky and Kahneman, 1983</xref>)?</p>
<p>Normative probabilistic inference is grounded on the principles of Bayesian probability theory. Under this framework, probabilistic judgments are expected to satisfy fundamental constraints such as additivity, complementarity, and overall coherence. These requirements have played a central role in both cognitive science and formal probabilistic modelling (<xref ref-type="bibr" rid="ref22">Griffiths et al., 2008</xref>; <xref ref-type="bibr" rid="ref40">Oaksford and Chater, 2009</xref>; <xref ref-type="bibr" rid="ref50">Tenenbaum et al., 2011</xref>). Decades of behavioural research reveal that human probabilistic reasoning is often consistent with Bayesian principles, but also invariably systematically deviates from these principles, giving rise to robust evidence for cognitive biases such as the conjunction and disjunction fallacies (<xref ref-type="bibr" rid="ref10">Carlson and Yates, 1989</xref>; <xref ref-type="bibr" rid="ref29">Kahneman and Tversky, 1979</xref>; <xref ref-type="bibr" rid="ref54">Tversky and Kahneman, 1983</xref>).</p>
<sec id="sec2">
<label>1.1</label>
<title>Classical probabilistic reasoning fallacies</title>
<p>A central foundation of the present study concerns several well-documented departures from normative probability theory in human judgment, the conjunction fallacy, the disjunction fallacy, and violations of binary complementarity. The first two biases represent some of the most reliable phenomena in the psychology of judgment and decision-making, having been repeatedly observed across diverse populations, measurement approaches, and experimental paradigms (<xref ref-type="bibr" rid="ref19">Dulany and Hilton, 1991</xref>; <xref ref-type="bibr" rid="ref21">Fisk, 2002</xref>; <xref ref-type="bibr" rid="ref38">Moro, 2009</xref>; <xref ref-type="bibr" rid="ref48">Sides et al., 2002</xref>; <xref ref-type="bibr" rid="ref55">Tversky and Koehler, 1994</xref>). Violations of binary complementarity were first systematically reported in <xref ref-type="bibr" rid="ref26">Huang et al. (2025)</xref> and, though their robustness across paradigms has yet to be established, they are included here because they are particularly surprising (arguably more so than the more common conjunction and disjunction fallacies).</p>
<p><italic>Conjunction fallacy</italic>: the conjunction fallacy is one of the most influential and extensively replicated violations of Bayesian theory. In their seminal work, <xref ref-type="bibr" rid="ref54">Tversky and Kahneman (1983)</xref> demonstrated that individuals often judge the probability of a conjunction of two events to exceed that of one of its constituent events. Participants read a description of Linda, designed to match the stereotype of a feminist and were then asked to assess the likelihood of several statements about her. Respondents consistently rated the statement &#x201C;Linda is a feminist and a bank teller&#x201D; as more probable than &#x201C;Linda is a bank teller&#x201D;, despite the logical impossibility of a conjunction exceeding the probability of either component (<xref ref-type="bibr" rid="ref31">Kolmogorov, 1950</xref>).</p>
<p>This reasoning error directly violates the conjunction rule, which states that for any events A and B (<xref ref-type="disp-formula" rid="E1">Equation 1</xref>):</p>
<disp-formula id="E1">
<mml:math id="M1">
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2227;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2227;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(1)</label>
</disp-formula>
<p>The fallacy persists across variations intended to clarify task interpretation, reduce pragmatic confounds, and mitigate stereotype-driven responses, underscoring its robustness and theoretical importance (<xref ref-type="bibr" rid="ref19">Dulany and Hilton, 1991</xref>; <xref ref-type="bibr" rid="ref38">Moro, 2009</xref>; <xref ref-type="bibr" rid="ref51">Tentori et al., 2004</xref>).</p>
<p>Conjunction fallacies can be classified into single versus double variations. A single violation occurs when the conjunction is judged more probable than one marginal event [e.g., <inline-formula>
<mml:math id="M2">
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2227;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>&#x003E;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula>], whereas a double violation occurs when it is judged more probable than both A and B. Double violations represent a stronger form of probabilistic incoherence and are frequently interpreted as reflecting particularly strong reliance on heuristics, such as representativeness, judgment strategies (<xref ref-type="bibr" rid="ref18">Crupi et al., 2018</xref>; <xref ref-type="bibr" rid="ref59">Wojciechowski and Pothos, 2018</xref>; <xref ref-type="bibr" rid="ref60">Yates and Carlson, 1986</xref>).</p>
<p><italic>Disjunction fallacy</italic>: the disjunction fallacy represents a violation complementary to the conjunction fallacy: individuals judge the probability of a disjunction (one possibility <italic>or</italic> another possibility to be true) to be lower than the probability of one of its constituent events. This contradicts another fundamental axiom of Bayesian theory (<xref ref-type="bibr" rid="ref31">Kolmogorov, 1950</xref>), which states that the likelihood of a disjunction cannot be less than that of either component event, as shown in <xref ref-type="disp-formula" rid="E2">Equation (2)</xref>:</p>
<disp-formula id="E2">
<mml:math id="M3">
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2228;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>&#x2265;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2228;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>&#x2265;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(2)</label>
</disp-formula>
<p>Nevertheless, empirical research demonstrates that such violations do occur in human judgment (<xref ref-type="bibr" rid="ref10">Carlson and Yates, 1989</xref>). Double disjunction fallacies have also been documented&#x2014;cases in which the disjunction is rated as less probable than both constituent events simultaneously (<xref ref-type="bibr" rid="ref26">Huang et al., 2025</xref>).</p>
<p><italic>Binary complementarity violations</italic>: a foundational axiom of classical probability theory is binary complementarity, which states that the probabilities of an event and its negation must sum to unity (<xref ref-type="bibr" rid="ref31">Kolmogorov, 1950</xref>), as shown in <xref ref-type="disp-formula" rid="E3">Equation (3)</xref>:</p>
<disp-formula id="E3">
<mml:math id="M4">
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mo>&#x00AC;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:math>
<label>(3)</label>
</disp-formula>
<p>When extended to joint probabilities, complementarity entails that the exhaustive set of possibilities must also sum to unity, as shown in <xref ref-type="disp-formula" rid="E4">Equation (4)</xref>:</p>
<disp-formula id="E4">
<mml:math id="M5">
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2227;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2227;</mml:mo>
<mml:mo>&#x00AC;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mo>&#x00AC;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2227;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mo>&#x00AC;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2227;</mml:mo>
<mml:mo>&#x00AC;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:math>
<label>(4)</label>
</disp-formula>
<p>Although complementarity may appear trivial&#x2014;an immediate consequence of the axioms of probability&#x2014;empirical evidence, however, indicates that complementarity is not universally respected in human judgment. Research in subjective probability elicitation shows that individuals generally approximate complementary coherence when negations are explicitly stated and tasks are transparent (<xref ref-type="bibr" rid="ref6">Budescu et al., 1997</xref>; <xref ref-type="bibr" rid="ref55">Tversky and Koehler, 1994</xref>; <xref ref-type="bibr" rid="ref56">Wallsten et al., 1993</xref>). Violations emerge, however, in contexts where cognitive interpretation, task structure, or framing introduce ambiguity. For example, complementarity failures have been documented in choice behaviour (<xref ref-type="bibr" rid="ref35">Macchi et al., 1999</xref>; <xref ref-type="bibr" rid="ref47">Shafir, 1993</xref>), similarity-based categorization, where perceived overlap between concepts distorts binary relations (<xref ref-type="bibr" rid="ref52">Tversky and Gati, 1978</xref>), and in framing tasks in which negations are implicit rather than explicit (<xref ref-type="bibr" rid="ref20">Epping and Busemeyer, 2023</xref>). The most systematic evidence for violations of binary complementarity has been reported in <xref ref-type="bibr" rid="ref26">Huang et al. (2025)</xref>. In their work, even though events and their negations were not explicitly represented (i.e., participants did not see A and not A), the corresponding judgments for marginals were in the same block and so participants violated this constraint despite essentially having these judgments side by side&#x2014;this is a very surprising finding. Violations of binary complementarity were also reported on conjunctions in the rest of their experiment.</p>
<p>While complementarity violations are less frequently discussed than conjunction or disjunction fallacies, they highlight important constraints in human probabilistic reasoning and provide a valuable benchmark for evaluating coherence in artificial systems such as LLMs.</p>
<p>Summing up, interest in conjunction and disjunction fallacies, as well as violations of binary complementarity, is not merely theoretical. Probabilistic errors in applied domains&#x2014;such as medicine, law, and public policy&#x2014;can lead to significant real-world consequences, including diagnostic errors, misjudged legal probabilities, and flawed policy decisions (<xref ref-type="bibr" rid="ref1">Bar-Hillel and Neter, 1993</xref>; <xref ref-type="bibr" rid="ref18">Crupi et al., 2018</xref>; <xref ref-type="bibr" rid="ref23">Guthrie et al., 2009</xref>; <xref ref-type="bibr" rid="ref59">Wojciechowski and Pothos, 2018</xref>). Recognizing the prevalence and impact of these biases underscores the importance of understanding probabilistic reasoning not only in humans but also in emerging computational agents such as LLMs, which are increasingly consulted for judgment and decision-making tasks.</p>
</sec>
<sec id="sec3">
<label>1.2</label>
<title>Probabilistic reasoning fallacies in LLMs</title>
<p>Emerging evidence indicates that LLMs may exhibit patterns of probabilistic judgment that partially parallel human cognition. Studies report that earlier models (e.g., GPT-3 and GPT-3.5) often commit conjunction or disjunction fallacies and sometimes underperform relative to human participants, suggesting bounded rational or heuristic-like reasoning (<xref ref-type="bibr" rid="ref2">Binz and Schulz, 2023</xref>; <xref ref-type="bibr" rid="ref12">Chen et al., 2023</xref>; <xref ref-type="bibr" rid="ref32">Koralus and Wang-Ma&#x015B;cianica, 2023</xref>; <xref ref-type="bibr" rid="ref49">Suri et al., 2023</xref>; <xref ref-type="bibr" rid="ref57">Wang et al., 2024</xref>; <xref ref-type="bibr" rid="ref61">Yax et al., 2024</xref>). More advanced systems such as GPT-4 show fewer such errors and, in some cases, superhuman performance on structured probabilistic benchmarks (<xref ref-type="bibr" rid="ref24">Hagendorff et al., 2023</xref>; <xref ref-type="bibr" rid="ref57">Wang et al., 2024</xref>; <xref ref-type="bibr" rid="ref61">Yax et al., 2024</xref>). However, it remains unclear whether LLMs&#x2019; successes reflect genuine probabilistic competence or the superficial avoidance of specific pitfalls. Moreover, prior work seldom distinguishes single from double violations or examines binary complementarity, limiting our understanding of the competence of such models on probabilistic tasks. The broader theoretical question&#x2014;whether LLM biases arise in a way analogous to human-like heuristics, idiosyncratic architectural constraints, or training-data artifacts&#x2014;remains unresolved (<xref ref-type="bibr" rid="ref36">Macmillan-Scott and Musolesi, 2024</xref>).</p>
</sec>
<sec id="sec4">
<label>1.3</label>
<title>Contribution of the present study</title>
<p>The present study systematically evaluates whether GPT-5 adheres to core probabilistic axioms or exhibits characteristic fallacies in its judgments. We focus on three foundational phenomena: conjunction fallacies, disjunction fallacies, and violations of binary complementarity. Extending previous research, we explicitly distinguish single and double violations, allowing us to probe deeper into the capabilities of such. We further incorporate complementarity, a central but understudied test of LLM probabilistic competence, motivated by recent findings that humans can violate this important constraint (<xref ref-type="bibr" rid="ref26">Huang et al., 2025</xref>).</p>
<p>By comparing GPT-5&#x2019;s responses to human behavioural data, we examine whether its deviations from classical probability theory replicate human patterns, partially overlap with them, or diverge in ways unique to language-based computation. Beyond measuring error prevalence, we analyse the structural profile of GPT-5&#x2019;s misjudgement&#x2019;s to determine whether its reasoning resembles bounded rationality, noise-driven inconsistency, or a qualitatively different form of probabilistic processing. By mapping the alignment or divergence between human and artificial reasoning, we provide a foundation for evaluating whether LLMs should be treated as probabilistically reliable advisers or as systems whose coherence requires continual scrutiny.</p>
<p>Put differently, the motivation for the present study ultimately concerns whether LLM (specifically GPT-5) judgments concerning probabilities can be trusted or not. This is a timely question, in light of the fact that LLMs are becoming increasingly popular as sources of advice (<xref ref-type="bibr" rid="ref4">Bright et al., 2024</xref>; <xref ref-type="bibr" rid="ref25">Hou et al., 2024</xref>; <xref ref-type="bibr" rid="ref27">Jung et al., 2024</xref>) and recent calls for principled evaluation frameworks when LLMs are used as judgment or decision-support systems (<xref ref-type="bibr" rid="ref33">Lee et al., 2026</xref>). As noted, quality and trustworthiness of probability judgments depend on two dimensions. The first dimension is whether probabilities are set in an accurate way, given the content of the premises. For example, a judgment that the probability it will rain in London in December is 10% is highly misleading. The second dimension &#x2013; and the focus of the present work &#x2013; is whether probabilities relate to each other in the correct way. For example, regardless of whether individual probabilities are estimated accurately or not, if an LLM decides that the probability that it will rain and snow in London in December is greater than just the probability it will rain, this is a (conjunction) fallacy and an error under most circumstances (<xref ref-type="bibr" rid="ref45">Pothos et al., 2017</xref>).</p>
<p>Finally, the probabilistic task we employ concerns political judgments (<xref ref-type="bibr" rid="ref11">Ceron et al., 2024</xref>; <xref ref-type="bibr" rid="ref46">Santurkar et al., 2023</xref>). Therefore, it is useful to examine whether GPT-5 displays some sensitivity to modern politics. Otherwise, internal consistency of probabilistic assignment (or otherwise) might simply reflect non-committal probabilities, that is, probabilities which are fairly uniform across the relevant events. To this end, we conducted an assessment of GPT-5&#x2019;s political sensitivity, by comparing its probability estimates for election outcomes across parallel, matched prompts involving the two candidates relevant to the probability judgments.</p>
</sec>
</sec>
<sec id="sec5">
<label>2</label>
<title>Models of probabilistic reasoning</title>
<p>A wide range of formal frameworks have been developed to account for systematic deviations from (baseline) classical probability theory in human judgment (<xref ref-type="bibr" rid="ref7">Busemeyer and Bruza, 2012</xref>; <xref ref-type="bibr" rid="ref53">Tversky and Kahneman, 1974</xref>, <xref ref-type="bibr" rid="ref54">1983</xref>; <xref ref-type="bibr" rid="ref62">Zhu et al., 2020</xref>). Broadly, these approaches fall into three families: classical Bayesian models, which (in their baseline form) preserve the Kolmogorov axioms and attribute errors to cognitive limitations or noise, heuristic approaches, which explain fallacies with individual, often unconnected principles, and quantum probability models, which posit a fundamentally different geometric structure of mental representations that naturally gives rise to certain violations. In this work, we focus on Bayesian and quantum approaches, only because these approaches provide more constraints regarding what probabilistic inference should be like.</p>
<p>Classical Bayesian models treat probabilities as measures over subsets of a fixed sample space (<xref ref-type="bibr" rid="ref39">Oaksford and Chater, 2007</xref>). Within this framework, conjunctions, disjunctions, and conditional probabilities must satisfy additivity and coherence constraints. To explain departures from these constraints, early classical accounts introduced stochastic mechanisms based on sampling and noise. In the probability-plus-noise model (<xref ref-type="bibr" rid="ref15">Costello and Watts, 2016</xref>, <xref ref-type="bibr" rid="ref14">2014</xref>, <xref ref-type="bibr" rid="ref16">2018</xref>), individuals maintain internally coherent classical probabilities, but their expressed judgments are corrupted by &#x201C;recording noise.&#x201D; Increasing the noise for complex events allows the model to reproduce conjunction and disjunction fallacies. A related approach, the Bayesian sampler (<xref ref-type="bibr" rid="ref62">Zhu et al., 2020</xref>), assumes that people generate probability estimates from finite memory samples governed by a symmetric beta prior. Smaller samples for conjunctions and disjunctions lead those judgments to be more heavily influenced by the prior, yielding systematic biases. Both models produce mostly similar predictions.</p>
<p>Quantum probability models offer a contrasting theoretical foundation for probabilistic inference, by representing probabilities as geometric projections of a cognitive state vector in a Hilbert space (<xref ref-type="bibr" rid="ref7">Busemeyer and Bruza, 2012</xref>). A central insight of this framework is that certain cognitive questions are incompatible: forming a judgment about one event changes the internal state relevant for judging another (<xref ref-type="bibr" rid="ref7">Busemeyer and Bruza, 2012</xref>, <xref ref-type="bibr" rid="ref8">2024</xref>; <xref ref-type="bibr" rid="ref43">Pothos and Busemeyer, 2013</xref>, <xref ref-type="bibr" rid="ref44">2022</xref>). When events are incompatible, quantum theory requires conjunctions and disjunctions to be computed sequentially, rather than through classical set-theoretic rules. <xref ref-type="bibr" rid="ref9">Busemeyer et al. (2011)</xref> applied this principle by proposing a &#x201C;more-likely-first&#x201D; rule: people evaluate the event they judge more probable first and then the one considered less likely. Under this assumption, the quantum model predicts directional fallacies&#x2014;conjunction errors involving the less probable event and disjunction errors involving the more probable event&#x2014;while strictly prohibiting double fallacies. For example, if <inline-formula>
<mml:math id="M6">
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>&#x003E;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula>, the sequential rule requires <inline-formula>
<mml:math id="M7">
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mspace width="0.25em"/>
<mml:mtext mathvariant="italic">and then</mml:mtext>
<mml:mspace width="0.25em"/>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula>, making it impossible for the conjunction to exceed both constituents. Equivalent constraints govern disjunctions. Thus, quantum models explain certain systematic patterns in human judgments, while sharply restricting others.</p>
<p>Early quantum models left open how internal quantum probabilities translate into the noisy numerical ratings that participants report. <xref ref-type="bibr" rid="ref26">Huang et al. (2025)</xref> addressed this limitation by introducing the quantum sequential sampler, a hybrid model that merges quantum state dynamics with sampling mechanisms inspired by classical memory-based accounts. The quantum sampler assumes that people draw sequential samples from a quantum cognitive state and convert these samples into explicit numerical judgments subject to estimation noise.</p>
<p>An innovation of the quantum sampler is its use of POVMs (positive operator-valued measures) to compute probabilities, instead of the ideal projective measurements assumed in earlier work (<xref ref-type="bibr" rid="ref9">Busemeyer et al., 2011</xref>). POVMs allow a controlled mismatch between the underlying cognitive state and the observed response, providing a principled noise model that generalizes classical &#x201C;recording noise&#x201D; into the quantum framework. This increased flexibility enables the model to capture a broader spectrum of empirical phenomena, including systematic violations of binary complementarity and the presence of both double conjunction and double disjunction fallacies&#x2014;patterns the earlier quantum models could not accommodate.</p>
<p>To evaluate the quantum sequential sampler against the predominant Bayesian model (the Bayesian sampler), <xref ref-type="bibr" rid="ref26">Huang et al. (2025)</xref> conducted a large-scale study with 1,451&#x202F;U. S. participants shortly before the 2020&#x202F;US presidential election. Each participant provided 78 judgments, including marginal, conditional, conjunctive, disjunctive, and negation probabilities across three event pairs. Across multiple generalization tests, the quantum sampler consistently outperformed the Bayesian sampler, offering the best available joint explanation of the full pattern of human probabilistic judgments.</p>
</sec>
<sec id="sec6">
<label>3</label>
<title>Experiment</title>
<p>Briefly, the main aim of this study is to examine whether GPT-5, a state-of-the-art LLM, exhibits human-like patterns of probabilistic bias when confronted with the same tasks used in recent behavioural studies, specifically <xref ref-type="bibr" rid="ref26">Huang et al. (2025)</xref>. This study asked U.S. participants to judge the likelihood that the 2020 presidential candidates&#x2014;Joe Biden and Donald Trump&#x2014;would win various combinations of states. Human responses reliably violated classical axioms, including complementarity, conjunction, and disjunction constraints.</p>
<p>Our central objective is to assess whether GPT-5, when instructed through persona-based prompts mirroring the demographic profiles of Huang et al.&#x2019;s participants (age, gender, educational attainment, and geographic region), reproduces these non-normative patterns. The persona strategy enables a one-to-one mapping between individual human participants and corresponding model simulations, thereby allowing a fine-grained comparison of response distributions.</p>
<sec id="sec7">
<label>3.1</label>
<title>Materials and methods</title>
<sec id="sec8">
<label>3.1.1</label>
<title>Human experimental paradigm</title>
<p><xref ref-type="bibr" rid="ref26">Huang et al. (2025)</xref> investigated probabilistic reasoning about the 2020&#x202F;U.S. presidential election using a structured set of probability-judgment tasks. Each participant provided 78 probability judgments concerning whether either or both candidates would win specific states. The paradigm involved two triplets of states&#x2014;Ohio&#x2013;Missouri&#x2013;Michigan (Triplet 1; T1) and Georgia&#x2013;Montana&#x2013;Nevada (Triplet 2; T2). For each triplet, participants evaluated all possible combinations of events, comprising six marginal events (e.g., Biden wins Ohio), 12 conjunctions (e.g., Biden wins Ohio and Missouri), 12 disjunctions (e.g., Trump wins Nevada or Biden wins Georgia), and 12 conditionals (e.g., Biden wins Michigan given Trump wins Missouri). Each composite event was additionally presented in the reversed order to counterbalance potential order effects, contributing 36 further items and bringing the total to 78.</p>
<p>To reduce anchoring from composite judgments, all marginal probabilities were elicited first. The remaining items were grouped into thematic blocks, and items within each block were randomized. Participants responded using a continuous slider from 0 to 100.</p>
<p>The original study employed a 2&#x202F;&#x00D7;&#x202F;2 between-subjects design varying State Triplet (T1 vs. T2) and Complexity (Low Complexity, LC, vs. High Complexity, HC). Complexity was manipulated by structuring composite-event blocks either by state pair (LC) or by intermixing different pairs within the same block (HC), ostensibly manipulating complexity (this is because the greater the range of events, the more complex the corresponding probability representations needed, from a Bayesian perspective). The four groups&#x2014;T1LC, T1HC, T2LC, and T2HC&#x2014;allowed the authors to test whether complexity modulated violations of probabilistic coherence.</p>
<p>For the present GPT-5 investigation, we use only the low-complexity conditions (T1LC and T2LC). Huang et al. reported no systematic effects attributable to complexity, and the LC conditions provide a cleaner, more interpretable structure for persona-driven simulations. These two groups included 284 and 269 participants, respectively, yielding 553 distinct demographic personas used in GPT-5 data collection.</p>
</sec>
<sec id="sec9">
<label>3.1.2</label>
<title>GPT-5 data-collection paradigm</title>
<p>Model-generated data were obtained using GPT-5-2025-08-07, a release from OpenAI&#x2019;s GPT-5 reasoning family issued in August 2025. All data collection was conducted during November 2025 through the OpenAI Python API, using standardized scripts to ensure complete procedural uniformity and reproducibility across sessions. The overall objective was to replicate the structural, instructional, and cognitive conditions of the human experiment as faithfully as possible, while maintaining reasonable experimental control. Specifically, our intention was to capture GPT-5 reasoning &#x2018;by default&#x2019;, in the same way that participants in a typical psychology experiment (such as the one from Huang et al.) are invited to reason &#x2018;by default&#x2019;. In the human case, by default implies certain instructional manipulations and procedural limitations. We aimed to simulate analogous conditions in GPT-5, in a very approximate sense (given the lack of understanding in GPT-5 behaviour and the practical impossibility of systematically controlling across different instructional and procedural variants). We attempted to do this in three ways. First, by providing suitable instructions to GPT. Second, by asking GPT to simulate (very broadly speaking) some general characteristics of the participants. Finally, by ensuring that each judgment is carried out in a broad content of memory of previous judgments, which we considered analogous to that of humans.</p>
<p>Each simulated session began with a persona-initialization prompt embedding the demographic characteristics of a specific participant from the T1LC or T2LC human groups. For each human participant, four demographic variables were loaded from the study dataset&#x2014;age, gender, education level, and U.S. state of residence&#x2014;and inserted into a fixed natural-language template that generated the persona-initialization prompt. This prompt specified the participant&#x2019;s demographic attributes in a single descriptive sentence (e.g., &#x201C;You are a 54-year-old female with a bachelor&#x2019;s degree living in Ohio&#x201D;) and was followed by standardized task instructions reproduced verbatim from the human experiment. No additional psychological, behavioural, or personality descriptors were added; the personas differed only in the demographic information originally provided by each human participant. This procedure ensured a one-to-one correspondence between human participants and simulated personas and prevented any experimenter-induced variance in persona content.</p>
<p>The instructions presented to human participants were reproduced verbatim. These included the directive to give intuitive, first-impression estimates, to provide a single integer between 0 and 100 for each item, and to respond as if the 2020 election outcome were not yet known. GPT-5&#x2019;s response to this instructional prompt remained in the message history throughout the session, maintaining a consistent cognitive frame across all 553 runs. Note that it is unclear whether GPT-5 can suppress knowledge like this, in a way analogous to how humans cannot suppress knowledge. In the Discussion we consider whether factual knowledge of the election results might impact on model behaviour.</p>
<p>Following initialization, GPT-5 received the complete set of 78 probability-judgment items arranged in the same block structure used in the human study. Within each block, item order was randomized independently for every persona, thereby mirroring the block-level randomization procedure applied to human participants. Each item was embedded in a concise prompt instructing the model to return a single numeral (0&#x2013;100). Responses were parsed automatically and saved to structured output files.</p>
<p>One design element was the implementation of a bounded working-memory constraint, intended to approximate human cognitive limitations during sequential judgment tasks. Although GPT-5 normally operates with an extensive context window, note that the API we employed &#x2018;resets&#x2019; the memory window with each query. We constrained the model to a sliding window consisting only of the original persona-initialization message and the seven most recent question&#x2013;answer pairs. All earlier interactions were removed from the message history prior to issuing the next item. This procedure prevented the model from maintaining a perfect record of its earlier outputs and forced it to operate within a cognitively limited environment, more comparable to human working memory. The seven&#x2013;pair limit was selected because it approximates the upper bound of human working-memory capacity for maintaining discrete verbal&#x2013;numerical items during sequential judgment tasks, consistent with classic capacity estimates (<xref ref-type="bibr" rid="ref37">Miller, 1956</xref>) and contemporary evidence that effective working memory in complex tasks is often substantially lower (<xref ref-type="bibr" rid="ref17">Cowan, 2001</xref>). Recent work has emphasized that imposing such psychological constraints can reveal more human-like patterns of reasoning in LLMs (<xref ref-type="bibr" rid="ref24">Hagendorff et al., 2023</xref>; <xref ref-type="bibr" rid="ref34">Liu et al., 2025</xref>) and the present design follows this approach. Whether the length of &#x2018;seven&#x2019; is the most appropriate memory constraint is less relevant: the point is that GPT-5 was asked to perform in a context in-between much more extensive memory of previous responses or no memory at all of previous responses.</p>
<p>All interactions with GPT-5 were conducted via the standard Chat Completions API. No browsing, web search, plugins, or external tool access were enabled; all probability judgments were therefore generated solely from the model&#x2019;s pretrained internal knowledge and the conversational context. API calls included only the model identifier, message history, and the sampling temperature (set at 1.0), while all other parameters remained at their documented defaults. The <italic>reasoning effort</italic> parameter was not explicitly set; according to the OpenAI API reference, this parameter defaults to medium for gpt-5-2025-08-07 and is applied automatically when not specified (<xref ref-type="bibr" rid="ref42">OpenAI, 2026</xref>). Accordingly, all sessions reflect the model&#x2019;s standard, tool-free behaviour under typical usage conditions, rather than behaviour influenced by experimental tuning or enhanced reasoning controls.</p>
<p>Each persona generated an independent run, resulting in 553 sessions &#x00D7; 78 items per session&#x202F;=&#x202F;43,134 model-generated probability estimates.</p>
</sec>
</sec>
<sec id="sec10">
<label>3.2</label>
<title>Results</title>
<p>A series of paired-samples <italic>t</italic>-tests compared the probabilistic reasoning performance of human participants with that of matched GPT-5 personas. Across all measures, GPT-5 demonstrated markedly stronger adherence to probabilistic principles and substantially lower rates of classical reasoning fallacies. We present the results in detail in the following sections.</p>
<p><italic>Binary complementarity</italic>: following <xref ref-type="bibr" rid="ref26">Huang et al. (2025)</xref>, we defined <inline-formula>
<mml:math id="M8">
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula> as the judged probability that Donald Trump would win a given state and <inline-formula>
<mml:math id="M9">
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mo>&#x00AC;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula> as the probability that Joe Biden would win that same state. Under the classical axiom of binary complementarity (<xref ref-type="disp-formula" rid="E3">Equation 3</xref>), these two probabilities should sum to unity. Accordingly, marginal violations of complementarity were quantified for each participant or GPT-5 persona as <inline-formula>
<mml:math id="M10">
<mml:mo>&#x2223;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mo>&#x00AC;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2223;</mml:mo>
</mml:math>
</inline-formula>, averaged across the three state triplets. To assess whether joint probability judgments respected the extended complementarity constraint (<xref ref-type="disp-formula" rid="E4">Equation 4</xref>), we computed <inline-formula>
<mml:math id="M11">
<mml:mo>&#x2223;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2227;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2227;</mml:mo>
<mml:mo>&#x00AC;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mo>&#x00AC;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2227;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mo>&#x00AC;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2227;</mml:mo>
<mml:mo>&#x00AC;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2223;</mml:mo>
</mml:math>
</inline-formula>, averaged across six possible configurations. This yielded one marginal and one joint complementarity score per individual.</p>
<p>GPT-5&#x2019;s complementarity performance was effectively normative. Marginal violations were extremely small (M&#x202F;=&#x202F;0.00004, SD&#x202F;=&#x202F;0.00045), approaching numerical rounding error. Humans showed much larger and more variable deviations (M&#x202F;=&#x202F;0.248, SD&#x202F;=&#x202F;0.238). Joint complementarity revealed an even stronger divergence: GPT-5&#x2019;s deviations were minute (M&#x202F;=&#x202F;0.00108, SD&#x202F;=&#x202F;0.00820), whereas human judgments departed from the theoretical value by more than a full probability point on average (M&#x202F;=&#x202F;1.250, SD&#x202F;=&#x202F;0.656).</p>
<p>Paired-samples <italic>t</italic>-tests confirmed that GPT-5 personas adhered significantly more closely to complementarity than human participants for both marginal, <italic>t</italic>(552)&#x202F;=&#x202F;&#x2212;24.53, <italic>p</italic>&#x202F;&#x003C;&#x202F;0.001, and joint probabilities <italic>t</italic>(552)&#x202F;=&#x202F;&#x2212;44.74, <italic>p</italic>&#x202F;&#x003C;&#x202F;0.001.</p>
<p><xref ref-type="fig" rid="fig1">Figure 1</xref> visualizes these patterns, illustrating the model&#x2019;s systematic coherence compared with the considerable noise in human intuitive probability judgments.</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>Violin plots depicting the distributions of deviations from marginal (left) and joint (right) complementarity for human participants and GPT-5 personas. Human responses display wide, variable departures from the complementarity constraint, with many participants exhibiting large violations. In contrast, GPT-5 personas show extremely narrow distributions centred near zero, indicating almost perfectly coherent probability judgments across both marginal and joint constraints. Regarding GPT-5 performance, in this case because of the narrowness of the distribution the &#x2018;orange&#x2019; is invisible.</p>
</caption>
<graphic xlink:href="fpsyg-17-1782184-g001.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Violin plot comparing deviations from complementarity between GPT-5 and human sources under marginal and joint complementarity. GPT-5 shows minimal deviation, while humans exhibit higher and more variable deviation in both categories. Legend shows Human in cyan and GPT-5 in red.</alt-text>
</graphic>
</fig>
<p><italic>Conjunction fallacies</italic>: to quantify violations of the conjunction rule, we computed a conjunction-fallacy score (CF) for each item involving events A and B. A conjunction fallacy occurs when the judged probability of the conjunction exceeds at least one constituent probability. For item <italic>i</italic>, the magnitude of this violation was computed as defined in <xref ref-type="disp-formula" rid="E5">Equation (5)</xref>:</p>
<disp-formula id="E5">
<mml:math id="M12">
<mml:mi>C</mml:mi>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mo>max</mml:mo>
<mml:mo stretchy="true">{</mml:mo>
<mml:mi>P</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2227;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>P</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo stretchy="true">}</mml:mo>
<mml:mo>+</mml:mo>
<mml:mo>max</mml:mo>
<mml:mo stretchy="true">{</mml:mo>
<mml:mi>P</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2227;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>P</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo stretchy="true">}</mml:mo>
</mml:math>
<label>(5)</label>
</disp-formula>
<p>We also derived a double conjunction fallacy (DCF) score, reflecting cases where the conjunction exceeds both constituents, as defined in <xref ref-type="disp-formula" rid="E6">Equation (6)</xref>:</p>
<disp-formula id="E6">
<mml:math id="M13">
<mml:mi mathvariant="italic">DCF</mml:mi>
<mml:mo>=</mml:mo>
<mml:mo>max</mml:mo>
<mml:mo stretchy="true">{</mml:mo>
<mml:mi>P</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2227;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>max</mml:mo>
<mml:mo stretchy="true">[</mml:mo>
<mml:mi>P</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>P</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="true">]</mml:mo>
<mml:mo>,</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo stretchy="true">}</mml:mo>
</mml:math>
<label>(6)</label>
</disp-formula>
<p>Human participants showed a high prevalence of conjunction-rule violations: 58.5% of conjunction judgments contained at least one fallacy. Of these, 62.9% were single fallacies (CF&#x202F;&#x003E;&#x202F;0 but DCF&#x202F;=&#x202F;0), and 37.0% were double conjunction fallacies (DCF&#x202F;&#x003E;&#x202F;0).</p>
<p>By contrast, GPT-5 displayed much more normative behaviour. Only 18.8% of model-generated judgments contained any conjunction fallacy and almost all were of the single-fallacy type (99.5% CF; 0.5% DCF). Thus, while the model occasionally overestimates a conjunction, it almost never commits the double fallacy.</p>
<p>Participant-level averages showed large differences. GPT-5 produced significantly fewer conjunction fallacies overall, <italic>t</italic>(552)&#x202F;=&#x202F;&#x2212;24.79, <italic>p</italic>&#x202F;&#x003C;&#x202F;0.001, and dramatically fewer double conjunction fallacies, <italic>t</italic>(552)&#x202F;=&#x202F;&#x2212;18.09, <italic>p</italic>&#x202F;&#x003C;&#x202F;0.001.</p>
<p>As shown in <xref ref-type="fig" rid="fig2">Figure 2</xref> (left panel), human CF and DCF distributions are broad and positively skewed, with substantial variability and frequent severe violations. GPT-5 personas cluster tightly near zero, exhibiting small but nonzero CF values and virtually no DCF.</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>Distributions of conjunction and disjunction fallacy rates for human participants and GPT-5 personas. The left panel displays the distributions of single (CF) and double (DCF) conjunction fallacy rates, and the right panel displays the corresponding distributions for single (DF) and double (DDF) disjunction fallacies. Each violin plot reflects the participant-level mean fallacy score computed across all relevant items. Human participants exhibit broad, positively skewed distributions across all fallacy types, with substantial variability and frequent double-fallacy violations. In contrast, GPT-5 personas cluster tightly around zero, showing low but nonzero rates of single fallacies and near-zero rates of double fallacies. The marked difference in distributional shape highlights GPT-5&#x2019;s substantially higher adherence to probabilistic coherence relative to human respondents.</p>
</caption>
<graphic xlink:href="fpsyg-17-1782184-g002.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Violin plot chart compares fallacy rates for conjunction, double conjunction, disjunction, and double disjunction fallacies between humans and GPT-5. Humans exhibit higher fallacy rates than GPT-5 across all categories.</alt-text>
</graphic>
</fig>
<p><italic>Disjunction fallacies</italic>: an analogous procedure was used to quantify violations of the disjunction rule. For each item involving events A and B, a disjunction fallacy occurs when the judged probability of the disjunction is lower than one or both constituent probabilities, as defined in <xref ref-type="disp-formula" rid="E7">Equation (7)</xref>:</p>
<disp-formula id="E7">
<mml:math id="M14">
<mml:mi>D</mml:mi>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mo>max</mml:mo>
<mml:mo stretchy="true">{</mml:mo>
<mml:mi>P</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>P</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2228;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo stretchy="true">}</mml:mo>
<mml:mo>+</mml:mo>
<mml:mo>max</mml:mo>
<mml:mo stretchy="true">{</mml:mo>
<mml:mi>P</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>P</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2228;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo stretchy="true">}</mml:mo>
</mml:math>
<label>(7)</label>
</disp-formula>
<p>Double disjunction fallacies (DDF) occur when the disjunction is lower than both constituents, as defined in <xref ref-type="disp-formula" rid="E8">Equation (8)</xref>:</p>
<disp-formula id="E8">
<mml:math id="M15">
<mml:mi mathvariant="italic">DDF</mml:mi>
<mml:mo>=</mml:mo>
<mml:mo>max</mml:mo>
<mml:mo stretchy="true">{</mml:mo>
<mml:mo>min</mml:mo>
<mml:mo stretchy="true">[</mml:mo>
<mml:mi>P</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>P</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="true">]</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>P</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2228;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo stretchy="true">}</mml:mo>
</mml:math>
<label>(8)</label>
</disp-formula>
<p>Human participants again showed high violation rates: 57.5% f disjunction judgments contained at least one fallacy, of which 60.1% were single fallacies and 39.9% were double fallacies.</p>
<p>GPT-5 once more showed markedly more normative behaviour. Only 18.7% of disjunction judgments contained any violation and nearly all were single fallacies (99.5% DF; 0.5% DDF). Double disjunction fallacies were effectively absent.</p>
<p>As above, participant-level averages revealed significant human&#x2013;GPT-5 differences. GPT-5 exhibited dramatically fewer disjunction fallacies, <italic>t</italic>(552)&#x202F;=&#x202F;&#x2212;23.01, <italic>p</italic>&#x202F;&#x003C;&#x202F;0.001, and substantially fewer double disjunction fallacies, <italic>t</italic>(552)&#x202F;=&#x202F;&#x2212;16.57, <italic>p</italic>&#x202F;&#x003C;&#x202F;0.001.</p>
<p><xref ref-type="fig" rid="fig2">Figure 2</xref> (right panel) shows the same qualitative pattern as for conjunction fallacies: human distributions are wide and positively skewed, whereas GPT-5 distributions are tightly concentrated near zero.</p>
<p><italic>Correlation between conjunction and disjunction fallacy rates</italic>: It is informative to examine whether susceptibility to conjunction and disjunction fallacies covaries. Because both constitute violations of normative probability theory, one might expect a positive association: individuals prone to committing conjunction fallacies may also be prone to committing disjunction fallacies. To assess this possibility, we computed Pearson correlations between mean CF and DF rates within each group.</p>
<p>Among human participants, the correlation was small and not statistically significant, <italic>r</italic>(551)&#x202F;=&#x202F;0.03, <italic>p</italic>&#x202F;=&#x202F;0.496 indicating that susceptibility to the two fallacies was largely independent. In contrast, for GPT-5 personas, the correlation was extremely strong and positive, <italic>r</italic>(551)&#x202F;=&#x202F;0.97, <italic>p</italic>&#x202F;&#x003C;&#x202F;0.001, indicating a near-perfect coupling between the two types of fallacious judgments.</p>
<p>This dissociation suggests that, whereas humans exhibit largely independent tendencies to commit conjunction versus disjunction fallacies, GPT-5&#x2019;s non-normative probability judgments&#x2014;when they occur&#x2014;are tightly linked. Such coupling is consistent with the presence of a single underlying generative mechanism driving the model&#x2019;s residual violations of normative probability theory, rather than distinct cognitive processes for each fallacy type.</p>
<p>Note, the quantum probability model proposed by <xref ref-type="bibr" rid="ref9">Busemeyer et al. (2011)</xref> offers a principled account of why conjunction and disjunction fallacies may covary. In this framework, probability judgments are subject to order effects, such that the judged likelihood of a conjunction depends on the sequence in which constituent events are mentally processed. Processing a likely event first increases the availability of thoughts supportive of a subsequent unlikely event, thereby inflating the perceived probability of the conjunction. Conversely, processing the unlikely event first reduces the availability of supportive thoughts, lowering the conjunction probability. These order-dependent dynamics induce a systematic relationship between conjunction and disjunction judgments, providing a unified explanation for the co-occurrence of both fallacies observed in human reasoning. However, in the subsequent quantum model of <xref ref-type="bibr" rid="ref26">Huang et al. (2025)</xref>, the additional noise in responses makes a prediction of correlation between conjunction and disjunction fallacies less straightforward.</p>
<p><italic>Model-level political sensitivity</italic>: to examine whether GPT-5 displays some political sensitivity, we analysed its probability estimates for Democratic and Republican candidates across multiple U.S. states. <xref ref-type="fig" rid="fig3">Figure 3</xref> displays the distribution of GPT-5&#x2019;s probability estimates for Biden and Trump winning each of six states, based on prompts of marginal events (e.g., &#x201C;What is the likelihood that Biden will win Ohio?&#x201D;). Visual inspection readily reveals systematic variation in the model&#x2019;s probability assignments as a function of both state and candidate. Note, <xref ref-type="bibr" rid="ref26">Huang et al. (2025)</xref> did not collect corresponding results for humans (the assumption in that study being that na&#x00EF;ve participants would have some political sensitivity).</p>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption>
<p>Distribution of GPT-5&#x2019;s probability estimates for Biden and Trump across six U.S. states. Violin plots depict the distribution of GPT-5&#x2019;s repeated probability estimates for each candidate&#x2013;state pairing, with embedded boxplots indicating the median and interquartile range. The figure highlights substantial, state-dependent variation in GPT-5&#x2019;s win-probability predictions.</p>
</caption>
<graphic xlink:href="fpsyg-17-1782184-g003.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Violin plot graphic showing probability distributions for candidates Biden and Trump across six states: Georgia, Michigan, Missouri, Montana, Nevada, and Ohio. Biden is represented in pink, Trump in blue. For Missouri and Montana, Trump has high probabilities; for Michigan and Nevada, Biden is favored; for Georgia and Ohio, distributions are closer between candidates. Candidate and probability axes are labeled, with a legend indicating colors for each candidate.</alt-text>
</graphic>
</fig>
<p>To formally assess these differences, we conducted paired-samples <italic>t</italic>-tests comparing GPT-5&#x2019;s probability estimates for Trump versus Biden within each state. Results are reported in <xref ref-type="table" rid="tab1">Table 1</xref>. Positive mean differences indicate higher probabilities assigned to Trump, whereas negative values indicate higher probabilities assigned to Biden. GPT-5 assigned significantly higher probabilities to Trump in Missouri, Ohio, Georgia, and Montana (all <italic>p</italic>&#x202F;&#x003C;&#x202F;0.001), while significantly higher probabilities were assigned to Biden in Michigan and Nevada (all <italic>p</italic>&#x202F;&#x003C;&#x202F;0.001). Mean differences ranged from &#x2212;39.13 to 83.83, indicating substantial candidate-specific modulation in the model&#x2019;s probability estimates across states.</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Paired-samples <italic>t</italic>-test results comparing GPT-5&#x2019;s probability estimates for Trump versus Biden across six U.S. states.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">State</th>
<th align="center" valign="top">Mean difference (Trump&#x2013;Biden)</th>
<th align="center" valign="top"><italic>t</italic> (DF)</th>
<th align="center" valign="top"><italic>p</italic>-value</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Michigan</td>
<td align="center" valign="top">&#x2212;39.13</td>
<td align="center" valign="top">&#x2212;43.71 (283)</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">Missouri</td>
<td align="center" valign="top">82.31</td>
<td align="center" valign="top">248.49 (283)</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">Ohio</td>
<td align="center" valign="top">35.56</td>
<td align="center" valign="top">56.70 (283)</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">Georgia</td>
<td align="center" valign="top">10.48</td>
<td align="center" valign="top">17.81 (268)</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">Montana</td>
<td align="center" valign="top">83.83</td>
<td align="center" valign="top">247.53 (268)</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">Nevada</td>
<td align="center" valign="top">&#x2212;32.91</td>
<td align="center" valign="top">&#x2212;48.78 (268)</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Positive values indicate higher probabilities assigned to Trump; negative values indicate higher probabilities assigned to Biden.</p>
</table-wrap-foot>
</table-wrap>
<p>Taken together, these results indicate that GPT-5 does not treat these electoral possibilities as equivalent. Instead, its probability estimates exhibit systematic, state-dependent asymmetries that reflect political sensitivity.</p>
</sec>
</sec>
<sec sec-type="discussion" id="sec11">
<label>4</label>
<title>Discussion</title>
<p>The present findings reveal a clear and systematic divergence between human and GPT-5 probabilistic reasoning. Human participants displayed the well-documented pattern of conjunction and disjunction fallacies, including a substantial number of double violations (<xref ref-type="bibr" rid="ref18">Crupi et al., 2018</xref>; <xref ref-type="bibr" rid="ref54">Tversky and Kahneman, 1983</xref>; <xref ref-type="bibr" rid="ref59">Wojciechowski and Pothos, 2018</xref>). These errors were accompanied by large and heterogeneous violations of both marginal and joint complementarity (<xref ref-type="bibr" rid="ref26">Huang et al., 2025</xref>). In some cases, such errors exceeded theoretical limits by wide margins. This broad variability is consistent across behavioural studies and indicates a plurality of generative mechanisms for probability judgments. Possible mechanisms include Bayesian and quantum processes, but perhaps heuristics too, which are outside formal probabilistic models (<xref ref-type="bibr" rid="ref26">Huang et al., 2025</xref>).</p>
<p>GPT-5, by contrast, showed a markedly different profile. The model produced fewer single fallacies and almost no double fallacies; its complementarity deviations were extremely small&#x2014;near the level of numerical noise. This narrow spread of GPT-5 results indicates that the model&#x2019;s rare departures from coherence do not arise from noisy representations or fluctuating uncertainty. Instead, they reflect a highly stable and internally constrained inferential process.</p>
<p>The correlation analysis further highlights this structural distinction. Among human participants, conjunction and disjunction fallacies were essentially uncorrelated, this suggests that these violations originate from processes which are not restricted to the ones assumed in formal probabilistic models (as noted, noise-less quantum theory predicts a correlation between CFs and DFs). GPT-5 displayed the opposite pattern: when errors occurred, they were strongly correlated across CFs and DFs. This tight coupling implies a common mechanism in the workings of the model, which is the cause of both kinds of fallacies.</p>
<p>Specifically, when GPT-5 does produce fallacies, results show that its behaviour is consistent with early quantum-probabilistic models, for example, the original model for CFs and DFs proposed by <xref ref-type="bibr" rid="ref9">Busemeyer et al. (2011)</xref>. This model allows single conjunction or disjunction fallacies, while strictly prohibiting double fallacies and enforcing complementarity. Moreover, in this model CFs and DFs are correlated with each other. In contrast, the quantum model proposed by <xref ref-type="bibr" rid="ref26">Huang et al. (2025)</xref> could generate a broad range of violations&#x2014;including frequent double fallacies and complementarity failures. The greater expressivity in <xref ref-type="bibr" rid="ref26">Huang et al.&#x2019;s (2025)</xref> models was due to the introduction of noise through POVMs and sample-based estimation. This additional flexibility does not match GPT-5&#x2019;s profile. Overall, the results indicate that GPT-5 is not simply &#x201C;less noisy&#x201D; than humans. Its pattern of errors is qualitatively different. The model maintains a high degree of probabilistic coherence and, when it fails, it does so in a structured and internally consistent manner.</p>
<p>These differences suggest that GPT-5 relies on a computational mechanism for probabilistic assessment that is not isomorphic to the one employed in human cognition, even under task conditions designed to approximate human working-memory constraints. It is possible that such a mechanism is just closer to normative requirement. A noteworthy alternative possibility is that GPT-5 attains better normative behaviour because, across iterations, it avoids inconsistencies due to random variation (e.g., compare with a human judge, who might be more lenient on some days because they are in a good mood, cf. <xref ref-type="bibr" rid="ref28">Kahneman et al., 2021</xref>). Yet another possibility is that the high correlation between the CF and DF rates which is the main source of evidence for concluding that GPT-5 behaves more like the simple quantum model of <xref ref-type="bibr" rid="ref9">Busemeyer et al. (2011)</xref> is due to the floor effects fallacies in GPT-5. Unfortunately, without identifying some situation for which GPT-5 does have high CF and DF rates, we cannot further examine this interesting issue. Finally, our analysis of political-content prompts indicates that GPT-5&#x2019;s probability assignments are informed by a reasonable degree of political sensitivity. The model&#x2019;s forecasts diverge systematically across states and candidates in election scenarios. This demonstrates that its probabilistic behaviour is not uniform across structurally similar prompts.</p>
<p>Having said all the above, there are several limitations. The most important one concerns the research question: our objective was to capture performance of GPT-5 &#x201C;by default.&#x201D; The rationale is that typical GPT-5 users would be interested in whether GPT-5 conducts probabilistic inference in a way analogous to that of humans or not, in typical use. The difficulty lies in that by default for humans often implies a particular context, including a mindset, information, processing constraints etc. We contend that the suitable sense of by default in GPT-5 has to include similar characteristics, that is, characteristics which would plausibly get GPT-5 to approach these probabilistic questions in a way similar to that of humans&#x2014;the persona and memory manipulations were intended to capture some aspects of this.</p>
<p>How much did these manipulations affect results? It is difficult to have confidence of their validity. In the case of the persona matching, it has to be noted that, without this manipulation, the GPT-5 simulations would have been simpler&#x2013;this is because, simply, persona matching required unique GPT-5 runs matched to individual participants. We investigated the rate of conjunction and disjunction fallacies separately, with an ANCOVA model with state triplet (there were two state triplets), gender, and education level (four levels: high school or less, some college, bachelor&#x2019;s degree, postgraduate education) as between-participants independent variables and age as a covariate. State Triplet exerted a strong effect on both disjunction and conjunction fallacy rates (both <italic>p</italic>&#x202F;&#x003C;&#x202F;0.001). In contrast, no robust main effects of demographic persona variables were observed for either fallacy type (all <italic>p</italic>&#x202F;&#x003E;&#x202F;0.13, this includes the covariate, age). A modest interaction between gender and education emerged for disjunction fallacies (<italic>p</italic>&#x202F;=&#x202F;0.041) and marginally for conjunction fallacies (<italic>p</italic>&#x202F;=&#x202F;0.091). Overall, the evidence that persona matching had a measurable impact on GPT-5 behaviour is weak at best, though not completely negligible.</p>
<p>Regarding the memory window, our motivation for the particular length we used was that humans going through a similar task would be making their judgments in the context of other temporally proximal responses. For GPT-5, a choice has to be made regarding the extent of context of previous responses, given that the default setting of having each response given as if the GPT-5 mental state is &#x2018;tabula rasa&#x2019; seems unrealistic. This was our thinking for setting a memory window of a particular size (cf. <xref ref-type="bibr" rid="ref24">Hagendorff et al., 2023</xref>; <xref ref-type="bibr" rid="ref34">Liu et al., 2025</xref>). Note, one way in which memory window might affect probabilistic coherence is if memory of earlier responses helps ensure consistency with later ones. Indeed, in early experimental work on probabilistic fallacies, the relevant questions would be typically presented together or in successive trials (e.g., <xref ref-type="bibr" rid="ref54">Tversky and Kahneman, 1983</xref>). However, in more recent investigations (such as <xref ref-type="bibr" rid="ref26">Huang et al., 2025</xref>), it cannot be assumed that relevant probability responses would be judged together, even with a lengthy working memory window. In general, we would argue that the claim of probabilistic coherence is independent of memory processes and rather about whether probabilities are <italic>generated</italic> in a coherent way. From this perspective, manipulations of memory window would be expected to just have minor contextual influences on responses (for human or artificial agents). While we think this approach is plausible given what we know, we did not systematically evaluate the importance of the memory window length and any conclusions here are conditional on this methodological choice.</p>
<p>Another potential limitation is that the study was carried out well after the 2020&#x202F;US Presidential election. Therefore, the outcome of the election (including performance of the two candidates in different primaries) has been public knowledge that would be, presumably, accessible to GPT-5. With human participants, there is some evidence that, for fact-based propositions, probabilistic inference avoids common probabilistic fallacies (<xref ref-type="bibr" rid="ref13">Collins et al., 2024</xref>; <xref ref-type="bibr" rid="ref30">Karvetski et al., 2013</xref>). Whether this effect might have an analogue in GPT-5 is hard to assess. Nevertheless, empirically, there is a straightforward way to address this possibility. We ran a control whereby the task was framed as a hypothetical future election (the 2028 US Presidential election, which is the next scheduled such election), with as-yet to be identified candidates (instead of Biden and Trump in the original study, the probability questions were framed in terms of the Democratic and Republican candidates). For the purposes of this simulation, we randomly sampled 100 personas from the original set (50 for each triplet of states).</p>
<p>Contrary to expectation, there was an <italic>even lower</italic> rate for both CFs and DFs in the 2028 task vs. the 2020 one (respectively, Welch&#x2019;s <italic>t</italic>(134.24)&#x202F;=&#x202F;4.16, <italic>p</italic>&#x202F;&#x003C;&#x202F;0.001and <italic>t</italic>(160.26)&#x202F;=&#x202F;4.74, <italic>p</italic>&#x202F;&#x003C;&#x202F;0.001). There were only 12.9% CFs and 14.7%DFs in the 2028 task. There were no differences between the two tasks regarding double CFs, double DF, and violations of binary complementarity (all <italic>p</italic>&#x2019;s&#x202F;&#x003E;&#x202F;0.16). Therefore, we can certainly dismiss the possibility that low fallacy rates in 2020 version of the task were driven by post-election factual knowledge. There are many possible reasons for the difference in GPT-5 performance between the 2020 and 2028 task versions, including the fact that in the 2020 version the candidates were specific individuals. Rather than speculate on these differences, we highlight the main conclusion, which is that when testing GPT-5 with the version of the task best aligned with what human participants received, GPT-5 behaviour was closer to normative expectation.</p>
<p>More generally, it is unclear how impactful instructional and procedural manipulations are for GPT-5. In other work of ours, systematically exploring prompt engineering manipulations has had little effect. Likewise, in this case, we think it is likely that the impact of the instructional manipulations we adopted is, at best, minor. While it is tempting to systematically evaluate different variants regarding instructions and procedure, it is impractical to do so and, arguably, of less interest. These considerations are analogous to human experimental work: experimenters often devote considerable effort to fine-tuning instructions and procedures (e.g., randomisation protocols) to very exacting specifications. However, invariably, minor methods differences do not impact behaviour.</p>
<p>Regarding the present results, we believe the above issues are somewhat moot. The logic is this: by getting GPT-5 to behave more like humans (by simulating some characteristics of the participants, having memory limitations etc.), one could argue that we are making it more likely to observe fallacies in GPT-5 (since fallacies are an aspect of human behaviour). However, GPT-5 uniformly produced fewer probabilistic fallacies, compared to humans. That is, we can say that, <italic>conservatively,</italic> GPT-5 conducts probabilistic inference in a way more rational compared to humans.</p>
</sec>
<sec id="sec12">
<label>5</label>
<title>Concluding comments</title>
<p>This study sought to determine whether GPT-5&#x2019;s violations of probabilistic axioms resemble those exhibited by human reasoners or whether they follow a distinct structural pattern. Despite efforts to place GPT-5 in conditions approximating human constraints, the model&#x2019;s behaviour diverged sharply from human patterns. The findings therefore do not support the view that GPT-5 simply approximates &#x201C;more accurate humans.&#x201D; Instead, GPT-5 appears to instantiate a coherent form of probabilistic inference, that resembles a noise-free quantum model rather than the psychologically noisier mechanisms underlying human probability judgments. This distinction is important for interpreting LLM outputs. GPT-5 can provide probability assessments that are, in general, internally consistent and formally well-structured, as far as we can tell from this study.</p>
<p>These differences carry practical implications. As LLMs increasingly serve in advisory roles&#x2014;educational, professional, and personal&#x2014;results regarding their consistently coherent but cognitively non-human probabilistic guidance may influence when users are likely to resort to LLMs (and for what kind of judgments) and the trust they place in LLM output.</p>
<p>Future work should examine whether GPT-5&#x2019;s coherence extends to more complex probabilistic domains, including conditional inference, belief updating, and sequential decision processes. An open question is whether aligning LLM behaviour with human fallacy patterns is desirable&#x2014;or whether the value of such models lies precisely in their departure from human probabilistic limitations. The present study provides an initial foundation of results, showing that GPT-5 represents a distinct and unusually normative mode of probabilistic reasoning that stands apart from both classical human errors and models designed to explain them.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="sec13">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found at: the data is in the study of <xref ref-type="bibr" rid="ref26">Huang et al. (2025)</xref>.</p>
</sec>
<sec sec-type="ethics-statement" id="sec14">
<title>Ethics statement</title>
<p>The studies involving humans were approved by Department of Psychology ethics committee, City St. George&#x2019;s University of London. The studies were conducted in accordance with the local legislation and institutional requirements. Written informed consent for participation was not required from the participants or the participants&#x2019; legal guardians/next of kin in accordance with the national legislation and institutional requirements.</p>
</sec>
<sec sec-type="author-contributions" id="sec15">
<title>Author contributions</title>
<p>PI: Software, Formal analysis, Data curation, Methodology, Investigation, Writing &#x2013; review &#x0026; editing. EP: Project administration, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing, Methodology, Conceptualization, Supervision, Funding acquisition. AW: Investigation, Software, Methodology, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec sec-type="COI-statement" id="sec16">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="sec17">
<title>Generative AI statement</title>
<p>The author(s) declared that Generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="sec18">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bar-Hillel</surname><given-names>M.</given-names></name> <name><surname>Neter</surname><given-names>E.</given-names></name></person-group> (<year>1993</year>). <article-title>How alike is it versus how likely is it: a disjunction fallacy in probability judgments</article-title>. <source>J. Pers. Soc. Psychol.</source> <volume>65</volume>, <fpage>1119</fpage>&#x2013;<lpage>1131</lpage>. doi: <pub-id pub-id-type="doi">10.1037/0022-3514.65.6.1119</pub-id></mixed-citation></ref>
<ref id="ref2"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Binz</surname><given-names>M.</given-names></name> <name><surname>Schulz</surname><given-names>E.</given-names></name></person-group> (<year>2023</year>). <article-title>Using cognitive psychology to understand GPT-3</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>120</volume>:<fpage>e2218523120</fpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.2218523120</pub-id>, <pub-id pub-id-type="pmid">36730192</pub-id></mixed-citation></ref>
<ref id="ref3"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bommasani</surname><given-names>R.</given-names></name> <name><surname>Hudson</surname><given-names>D. A.</given-names></name> <name><surname>Adeli</surname><given-names>E.</given-names></name> <name><surname>Altman</surname><given-names>R.</given-names></name> <name><surname>Arora</surname><given-names>S.</given-names></name> <name><surname>Arx</surname><given-names>S.</given-names><prefix>von</prefix></name> <etal/></person-group> (<year>2021</year>). <article-title>On the opportunities and risks of foundation models</article-title>. <source>ArXiv</source></mixed-citation></ref>
<ref id="ref4"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bright</surname><given-names>J.</given-names></name> <name><surname>Enock</surname><given-names>F. E.</given-names></name> <name><surname>Esnaashari</surname><given-names>S.</given-names></name> <name><surname>Francis</surname><given-names>J.</given-names></name> <name><surname>Hashem</surname><given-names>Y.</given-names></name> <name><surname>Morgan</surname><given-names>D.</given-names></name></person-group> (<year>2024</year>). <article-title>Generative AI is already widespread in the public sector</article-title>. <source>ArXiv</source>.</mixed-citation></ref>
<ref id="ref5"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Brown</surname><given-names>T. B.</given-names></name> <name><surname>Mann</surname><given-names>B.</given-names></name> <name><surname>Ryder</surname><given-names>N.</given-names></name> <name><surname>Subbiah</surname><given-names>M.</given-names></name> <name><surname>Kaplan</surname><given-names>J.</given-names></name> <name><surname>Dhariwal</surname><given-names>P.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Language models are few-shot learners</article-title>. <source>ArXiv</source>.</mixed-citation></ref>
<ref id="ref6"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Budescu</surname><given-names>D. V.</given-names></name> <name><surname>Wallsten</surname><given-names>T. S.</given-names></name> <name><surname>Au</surname><given-names>W. T.</given-names></name></person-group> (<year>1997</year>). <article-title>On the importance of random error in the study of probability judgment. Part II: applying the stochastic judgment model to detect systematic trends</article-title>. <source>J. Behav. Decis. Mak.</source> <volume>10</volume>, <fpage>173</fpage>&#x2013;<lpage>188</lpage>. doi: <pub-id pub-id-type="doi">10.1002/(SICI)1099-0771(199709)10:3&#x003C;173::AID-BDM261&#x003E;3.0.CO;2-6</pub-id></mixed-citation></ref>
<ref id="ref7"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Busemeyer</surname><given-names>J. R.</given-names></name> <name><surname>Bruza</surname><given-names>P. D.</given-names></name></person-group> (<year>2012</year>). <source>Quantum models of cognition and decision</source>. <publisher-loc>Cambridge</publisher-loc>: <publisher-name>Cambridge University Press</publisher-name>.</mixed-citation></ref>
<ref id="ref8"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Busemeyer</surname><given-names>J. R.</given-names></name> <name><surname>Bruza</surname><given-names>P. D.</given-names></name></person-group> (<year>2024</year>). <source>Quantum models of cognition and decision: Principles and applications</source>. <edition>2nd</edition> Edn. <publisher-loc>Cambridge</publisher-loc>: <publisher-name>Cambridge University Press</publisher-name>.</mixed-citation></ref>
<ref id="ref9"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Busemeyer</surname><given-names>J. R.</given-names></name> <name><surname>Pothos</surname><given-names>E. M.</given-names></name> <name><surname>Franco</surname><given-names>R.</given-names></name> <name><surname>Trueblood</surname><given-names>J. S.</given-names></name></person-group> (<year>2011</year>). <article-title>A quantum theoretical explanation for probability judgment errors</article-title>. <source>Psychol. Rev.</source> <volume>118</volume>, <fpage>193</fpage>&#x2013;<lpage>218</lpage>. doi: <pub-id pub-id-type="doi">10.1037/a0022542</pub-id>, <pub-id pub-id-type="pmid">21480739</pub-id></mixed-citation></ref>
<ref id="ref10"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Carlson</surname><given-names>B. W.</given-names></name> <name><surname>Yates</surname><given-names>J. F.</given-names></name></person-group> (<year>1989</year>). <article-title>Disjunction errors in qualitative likelihood judgment</article-title>. <source>Organ. Behav. Hum. Decis. Process.</source> <volume>44</volume>, <fpage>368</fpage>&#x2013;<lpage>379</lpage>. doi: <pub-id pub-id-type="doi">10.1016/0749-5978(89)90014-9</pub-id></mixed-citation></ref>
<ref id="ref11"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ceron</surname><given-names>T.</given-names></name> <name><surname>Falk</surname><given-names>N.</given-names></name> <name><surname>Bari&#x0107;</surname><given-names>A.</given-names></name> <name><surname>Nikolaev</surname><given-names>D.</given-names></name> <name><surname>Pad&#x00F3;</surname><given-names>S.</given-names></name></person-group> (<year>2024</year>). <article-title>Beyond prompt brittleness: evaluating the reliability and consistency of political worldviews in LLMs</article-title>. <source>Trans. Assoc. Comput. Linguis.</source> <volume>12</volume>, <fpage>1378</fpage>&#x2013;<lpage>1400</lpage>. doi: <pub-id pub-id-type="doi">10.1162/tacl_a_00710</pub-id></mixed-citation></ref>
<ref id="ref12"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname><given-names>Y.</given-names></name> <name><surname>Andiappan</surname><given-names>M.</given-names></name> <name><surname>Jenkin</surname><given-names>T.</given-names></name> <name><surname>Ovchinnikov</surname><given-names>A.</given-names></name></person-group> (<year>2023</year>). <article-title>A manager and an AI walk into a Bar: Does ChatGPT make biased decisions like we do?</article-title> <source>SSRN Electron. J.</source> <volume>27</volume>, <fpage>354</fpage>&#x2013;<lpage>368</lpage>. doi: <pub-id pub-id-type="doi">10.2139/ssrn.4380365</pub-id></mixed-citation></ref>
<ref id="ref13"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Collins</surname><given-names>R. N.</given-names></name> <name><surname>Mandel</surname><given-names>D. R.</given-names></name> <name><surname>Karvetski</surname><given-names>C. W.</given-names></name> <name><surname>Wu</surname><given-names>C. M.</given-names></name> <name><surname>Nelson</surname><given-names>J. D.</given-names></name></person-group> (<year>2024</year>). <article-title>The wisdom of the coherent: improving correspondence with coherence-weighted aggregation</article-title>. <source>Decision</source> <volume>11</volume>, <fpage>60</fpage>&#x2013;<lpage>85</lpage>. doi: <pub-id pub-id-type="doi">10.1037/dec0000211</pub-id></mixed-citation></ref>
<ref id="ref14"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Costello</surname><given-names>F.</given-names></name> <name><surname>Watts</surname><given-names>P.</given-names></name></person-group> (<year>2014</year>). <article-title>Surprisingly rational: probability theory plus noise explains biases in judgment</article-title>. <source>Psychol. Rev.</source> <volume>121</volume>, <fpage>463</fpage>&#x2013;<lpage>480</lpage>. doi: <pub-id pub-id-type="doi">10.1037/a0037010</pub-id>, <pub-id pub-id-type="pmid">25090427</pub-id></mixed-citation></ref>
<ref id="ref15"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Costello</surname><given-names>F. J.</given-names></name> <name><surname>Watts</surname><given-names>P.</given-names></name></person-group> (<year>2016</year>). &#x201C;<chapter-title>A test of two models of probability judgment: quantum versus noisy probability</chapter-title>&#x201D; in <source>Proceedings of the annual Meeting of the Cognitive Science Society</source> (<publisher-loc>Oakland, CA</publisher-loc>: <publisher-name>California Digital Library</publisher-name>).</mixed-citation></ref>
<ref id="ref16"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Costello</surname><given-names>F.</given-names></name> <name><surname>Watts</surname><given-names>P.</given-names></name></person-group> (<year>2018</year>). <article-title>Invariants in probabilistic reasoning</article-title>. <source>Cogn. Psychol.</source> <volume>100</volume>, <fpage>1</fpage>&#x2013;<lpage>16</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cogpsych.2017.11.003</pub-id>, <pub-id pub-id-type="pmid">29220640</pub-id></mixed-citation></ref>
<ref id="ref17"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cowan</surname><given-names>N.</given-names></name></person-group> (<year>2001</year>). <article-title>The magical number 4 in short-term memory: a reconsideration of mental storage capacity</article-title>. <source>Behav. Brain Sci.</source> <volume>24</volume>, <fpage>87</fpage>&#x2013;<lpage>114</lpage>. doi: <pub-id pub-id-type="doi">10.1017/S0140525X01003922</pub-id></mixed-citation></ref>
<ref id="ref18"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Crupi</surname><given-names>V.</given-names></name> <name><surname>Elia</surname><given-names>F.</given-names></name> <name><surname>Apr&#x00E0;</surname><given-names>F.</given-names></name> <name><surname>Tentori</surname><given-names>K.</given-names></name></person-group> (<year>2018</year>). <article-title>Double conjunction fallacies in physicians&#x2019; probability judgment</article-title>. <source>Med. Decis. Mak.</source> <volume>38</volume>, <fpage>756</fpage>&#x2013;<lpage>760</lpage>. doi: <pub-id pub-id-type="doi">10.1177/0272989X18786358</pub-id>, <pub-id pub-id-type="pmid">29978726</pub-id></mixed-citation></ref>
<ref id="ref19"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dulany</surname><given-names>D. E.</given-names></name> <name><surname>Hilton</surname><given-names>D. J.</given-names></name></person-group> (<year>1991</year>). <article-title>Conversational Implicature, conscious representation, and the conjunction fallacy</article-title>. <source>Soc. Cogn.</source> <volume>9</volume>, <fpage>85</fpage>&#x2013;<lpage>110</lpage>. doi: <pub-id pub-id-type="doi">10.1521/soco.1991.9.1.85</pub-id></mixed-citation></ref>
<ref id="ref20"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Epping</surname><given-names>G. P.</given-names></name> <name><surname>Busemeyer</surname><given-names>J. R.</given-names></name></person-group> (<year>2023</year>). <article-title>Using diverging predictions from classical and quantum models to dissociate between categorization systems</article-title>. <source>J. Math. Psychol.</source> <volume>112</volume>:<fpage>102738</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jmp.2022.102738</pub-id></mixed-citation></ref>
<ref id="ref21"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fisk</surname><given-names>J. E.</given-names></name></person-group> (<year>2002</year>). <article-title>Judgments under uncertainty: representativeness or potential surprise?</article-title> <source>Br. J. Psychol.</source> <volume>93</volume>, <fpage>431</fpage>&#x2013;<lpage>449</lpage>. doi: <pub-id pub-id-type="doi">10.1348/000712602761381330</pub-id></mixed-citation></ref>
<ref id="ref22"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Griffiths</surname><given-names>T. L.</given-names></name> <name><surname>Kemp</surname><given-names>C.</given-names></name> <name><surname>Tenenbaum</surname><given-names>J. B.</given-names></name></person-group> (<year>2008</year>). &#x201C;<chapter-title>Bayesian models of cognition</chapter-title>&#x201D; in <source>The Cambridge handbook of computational psychology</source> (<publisher-loc>Cambridge</publisher-loc>: <publisher-name>Cambridge University Press</publisher-name>).</mixed-citation></ref>
<ref id="ref23"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Guthrie</surname><given-names>C.</given-names></name> <name><surname>Rachlinski</surname><given-names>J. J.</given-names></name> <name><surname>Wistrich</surname><given-names>A. J.</given-names></name></person-group> (<year>2009</year>). <article-title>The &#x201C;hidden judiciary&#x201D;: an empirical examination of executive branch justice</article-title>. <source>Duke Law J.</source> <volume>58</volume>, <fpage>1477</fpage>&#x2013;<lpage>1530</lpage>. Available at: <ext-link xlink:href="https://scholarship.law.duke.edu/dlj/vol58/iss7/8" ext-link-type="uri">https://scholarship.law.duke.edu/dlj/vol58/iss7/8</ext-link>.</mixed-citation></ref>
<ref id="ref24"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hagendorff</surname><given-names>T.</given-names></name> <name><surname>Fabi</surname><given-names>S.</given-names></name> <name><surname>Kosinski</surname><given-names>M.</given-names></name></person-group> (<year>2023</year>). <article-title>Human-like intuitive behavior and reasoning biases emerged in large language models but disappeared in ChatGPT</article-title>. <source>Nature Comput. Sci.</source> <volume>3</volume>, <fpage>833</fpage>&#x2013;<lpage>838</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s43588-023-00527-x</pub-id>, <pub-id pub-id-type="pmid">38177754</pub-id></mixed-citation></ref>
<ref id="ref25"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hou</surname><given-names>I.</given-names></name> <name><surname>Nguyen</surname><given-names>H. V.</given-names></name> <name><surname>Man</surname><given-names>O.</given-names></name> <name><surname>MacNeil</surname><given-names>S.</given-names></name></person-group> (<year>2024</year>). <article-title>The evolving usage of GenAI by computing students</article-title>. <source>ArXiv</source>.</mixed-citation></ref>
<ref id="ref26"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname><given-names>J.</given-names></name> <name><surname>Busemeyer</surname><given-names>J. R.</given-names></name> <name><surname>Ebelt</surname><given-names>Z.</given-names></name> <name><surname>Pothos</surname><given-names>E. M.</given-names></name></person-group> (<year>2025</year>). <article-title>Bridging the gap between subjective probability and probability judgments: the quantum sequential sampler</article-title>. <source>Psychol. Rev.</source> <volume>132</volume>, <fpage>916</fpage>&#x2013;<lpage>955</lpage>. doi: <pub-id pub-id-type="doi">10.1037/rev0000489</pub-id>, <pub-id pub-id-type="pmid">39298226</pub-id></mixed-citation></ref>
<ref id="ref27"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jung</surname><given-names>M.</given-names></name> <name><surname>Zhang</surname><given-names>A.</given-names></name> <name><surname>Fung</surname><given-names>M.</given-names></name> <name><surname>Lee</surname><given-names>J.</given-names></name> <name><surname>Liang</surname><given-names>P. P.</given-names></name></person-group> (<year>2024</year>). <article-title>Quantitative insights into large language model usage and trust in Academia: an empirical study</article-title>. <source>ArXiv</source>.</mixed-citation></ref>
<ref id="ref28"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kahneman</surname><given-names>D.</given-names></name> <name><surname>Sibony</surname><given-names>O.</given-names></name> <name><surname>Sunstein</surname><given-names>C. R.</given-names></name></person-group> (<year>2021</year>). <article-title>Noise: a flaw in human judgement</article-title>. <source>Econ. Record</source> <volume>98</volume>, <fpage>120</fpage>&#x2013;<lpage>122</lpage>. doi: <pub-id pub-id-type="doi">10.1111/1475-4932.12661</pub-id></mixed-citation></ref>
<ref id="ref29"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kahneman</surname><given-names>D.</given-names></name> <name><surname>Tversky</surname><given-names>A.</given-names></name></person-group> (<year>1979</year>). <article-title>Prospect theory: an analysis of decision under risk</article-title>. <source>Econometrica</source> <volume>47</volume>:<fpage>263</fpage>. doi: <pub-id pub-id-type="doi">10.2307/1914185</pub-id></mixed-citation></ref>
<ref id="ref30"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Karvetski</surname><given-names>C. W.</given-names></name> <name><surname>Olson</surname><given-names>K. C.</given-names></name> <name><surname>Mandel</surname><given-names>D. R.</given-names></name> <name><surname>Twardy</surname><given-names>C. R.</given-names></name></person-group> (<year>2013</year>). <article-title>Probabilistic coherence weighting for optimizing expert forecasts</article-title>. <source>Decis. Anal.</source> <volume>10</volume>, <fpage>305</fpage>&#x2013;<lpage>326</lpage>. doi: <pub-id pub-id-type="doi">10.1287/deca.2013.0279</pub-id></mixed-citation></ref>
<ref id="ref31"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Kolmogorov</surname><given-names>A. N.</given-names></name></person-group> (<year>1950</year>). <source>Foundations of the theory of probability</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>Chelsea Pub. Co</publisher-name>.</mixed-citation></ref>
<ref id="ref32"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Koralus</surname><given-names>P.</given-names></name> <name><surname>Wang-Ma&#x015B;cianica</surname><given-names>V.</given-names></name></person-group> (<year>2023</year>). <article-title>Humans in humans out: on GPT converging toward common sense in both success and failure</article-title>. <source>ArXiv</source>.</mixed-citation></ref>
<ref id="ref33"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lee</surname><given-names>C.</given-names></name> <name><surname>Zeng</surname><given-names>T.</given-names></name> <name><surname>Jeong</surname><given-names>J.</given-names></name> <name><surname>Sohn</surname><given-names>J.</given-names></name> <name><surname>Lee</surname><given-names>K.</given-names></name></person-group> (<year>2026</year>). <article-title>How to correctly report LLM-as-a-judge evaluations</article-title>. <source>ArXiv</source>.</mixed-citation></ref>
<ref id="ref34"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname><given-names>Z.</given-names></name> <name><surname>Gong</surname><given-names>Z.</given-names></name> <name><surname>Ai</surname><given-names>L.</given-names></name> <name><surname>Hui</surname><given-names>Z.</given-names></name> <name><surname>Chen</surname><given-names>R.</given-names></name> <name><surname>Leach</surname><given-names>C. W.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>The mind in the machine: a survey of incorporating psychological theories in LLMs</article-title>. <source>arXiv</source>.</mixed-citation></ref>
<ref id="ref35"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Macchi</surname><given-names>L.</given-names></name> <name><surname>Osherson</surname><given-names>D.</given-names></name> <name><surname>Krantz</surname><given-names>D. H.</given-names></name></person-group> (<year>1999</year>). <article-title>A note on superadditive probability judgment</article-title>. <source>Psychol. Rev.</source> <volume>106</volume>, <fpage>210</fpage>&#x2013;<lpage>214</lpage>. doi: <pub-id pub-id-type="doi">10.1037/0033-295X.106.1.210</pub-id></mixed-citation></ref>
<ref id="ref36"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Macmillan-Scott</surname><given-names>O.</given-names></name> <name><surname>Musolesi</surname><given-names>M.</given-names></name></person-group> (<year>2024</year>). <article-title>(Ir)rationality and cognitive biases in large language models</article-title>. <source>R. Soc. Open Sci.</source> <volume>11</volume>:<fpage>240255</fpage>. doi: <pub-id pub-id-type="doi">10.1098/rsos.240255</pub-id>, <pub-id pub-id-type="pmid">39100158</pub-id></mixed-citation></ref>
<ref id="ref37"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Miller</surname><given-names>G. A.</given-names></name></person-group> (<year>1956</year>). <article-title>The magical number seven, plus or minus two: some limits on our capacity for processing information</article-title>. <source>Psychol. Rev.</source> <volume>63</volume>, <fpage>81</fpage>&#x2013;<lpage>97</lpage>. doi: <pub-id pub-id-type="doi">10.1037/h0043158</pub-id></mixed-citation></ref>
<ref id="ref38"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Moro</surname><given-names>R.</given-names></name></person-group> (<year>2009</year>). <article-title>On the nature of the conjunction fallacy</article-title>. <source>Synthese</source> <volume>171</volume>, <fpage>1</fpage>&#x2013;<lpage>24</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11229-008-9377-8</pub-id></mixed-citation></ref>
<ref id="ref39"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Oaksford</surname><given-names>M.</given-names></name> <name><surname>Chater</surname><given-names>N.</given-names></name></person-group> (<year>2007</year>). <source>Bayesian rationality</source>. <publisher-loc>Oxford</publisher-loc>: <publisher-name>Oxford University Press</publisher-name>.</mixed-citation></ref>
<ref id="ref40"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Oaksford</surname><given-names>M.</given-names></name> <name><surname>Chater</surname><given-names>N.</given-names></name></person-group> (<year>2009</year>). <article-title>Pr&#x00E9;cis of Bayesian rationality: the probabilistic approach to human reasoning</article-title>. <source>Behav. Brain Sci.</source> <volume>32</volume>, <fpage>69</fpage>&#x2013;<lpage>84</lpage>. doi: <pub-id pub-id-type="doi">10.1017/S0140525X09000284</pub-id>, <pub-id pub-id-type="pmid">19210833</pub-id></mixed-citation></ref>
<ref id="ref41"><mixed-citation publication-type="other"><person-group person-group-type="author"><collab id="coll1">OpenAI</collab></person-group>. (<year>2025</year>). <source>GPT-5 [Large language model]</source>. <publisher-loc>San Francisco, CA, USA</publisher-loc>: <publisher-name>OpenAI</publisher-name>.</mixed-citation></ref>
<ref id="ref42"><mixed-citation publication-type="other"><person-group person-group-type="author"><collab id="coll2">OpenAI</collab></person-group>. (<year>2026</year>). Chat completions&#x2014;reasoning_effort. Available online at: <ext-link xlink:href="http://Https://Platform.Openai.Com/Docs/Api-Reference/Chat/Create#chat_create-Reasoning_effort" ext-link-type="uri">Https://Platform.Openai.Com/Docs/Api-Reference/Chat/Create#chat_create-Reasoning_effort</ext-link> (Accessed January 6, 2026)</mixed-citation></ref>
<ref id="ref43"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pothos</surname><given-names>E. M.</given-names></name> <name><surname>Busemeyer</surname><given-names>J. R.</given-names></name></person-group> (<year>2013</year>). <article-title>Can quantum probability provide a new direction for cognitive modeling?</article-title> <source>Behav. Brain Sci.</source> <volume>36</volume>, <fpage>255</fpage>&#x2013;<lpage>274</lpage>. doi: <pub-id pub-id-type="doi">10.1017/S0140525X12001525</pub-id>, <pub-id pub-id-type="pmid">23673021</pub-id></mixed-citation></ref>
<ref id="ref44"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pothos</surname><given-names>E. M.</given-names></name> <name><surname>Busemeyer</surname><given-names>J. R.</given-names></name></person-group> (<year>2022</year>). <article-title>Quantum Cognition</article-title>. <source>Annu. Rev. Psychol.</source> <volume>73</volume>, <fpage>749</fpage>&#x2013;<lpage>778</lpage>. doi: <pub-id pub-id-type="doi">10.1146/annurev-psych-033020-123501</pub-id></mixed-citation></ref>
<ref id="ref45"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pothos</surname><given-names>E. M.</given-names></name> <name><surname>Busemeyer</surname><given-names>J. R.</given-names></name> <name><surname>Shiffrin</surname><given-names>R. M.</given-names></name> <name><surname>Yearsley</surname><given-names>J. M.</given-names></name></person-group> (<year>2017</year>). <article-title>The rational status of quantum cognition</article-title>. <source>J. Exp. Psychol. Gen.</source> <volume>146</volume>, <fpage>968</fpage>&#x2013;<lpage>987</lpage>. doi: <pub-id pub-id-type="doi">10.1037/xge0000312</pub-id>, <pub-id pub-id-type="pmid">28447840</pub-id></mixed-citation></ref>
<ref id="ref46"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Santurkar</surname><given-names>S.</given-names></name> <name><surname>Durmus</surname><given-names>E.</given-names></name> <name><surname>Ladhak</surname><given-names>F.</given-names></name> <name><surname>Lee</surname><given-names>C.</given-names></name> <name><surname>Liang</surname><given-names>P.</given-names></name> <name><surname>Hashimoto</surname><given-names>T.</given-names></name></person-group> (<year>2023</year>). &#x201C;<chapter-title>Whose opinions do language models reflect?</chapter-title>&#x201D; in <source>Proceedings of the 40th international conference on machine learning (ICML 2023)</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>ACM</publisher-name>).</mixed-citation></ref>
<ref id="ref47"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shafir</surname><given-names>E.</given-names></name></person-group> (<year>1993</year>). <article-title>Choosing versus rejecting: why some options are both better and worse than others</article-title>. <source>Mem. Cogn.</source> <volume>21</volume>, <fpage>546</fpage>&#x2013;<lpage>556</lpage>. doi: <pub-id pub-id-type="doi">10.3758/BF03197186</pub-id>, <pub-id pub-id-type="pmid">8350746</pub-id></mixed-citation></ref>
<ref id="ref48"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sides</surname><given-names>A.</given-names></name> <name><surname>Osherson</surname><given-names>D.</given-names></name> <name><surname>Bonini</surname><given-names>N.</given-names></name> <name><surname>Viale</surname><given-names>R.</given-names></name></person-group> (<year>2002</year>). <article-title>On the reality of the conjunction fallacy</article-title>. <source>Mem. Cogn.</source> <volume>30</volume>, <fpage>191</fpage>&#x2013;<lpage>198</lpage>. doi: <pub-id pub-id-type="doi">10.3758/BF03195280</pub-id>, <pub-id pub-id-type="pmid">12035881</pub-id></mixed-citation></ref>
<ref id="ref49"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Suri</surname><given-names>G.</given-names></name> <name><surname>Slater</surname><given-names>L. R.</given-names></name> <name><surname>Ziaee</surname><given-names>A.</given-names></name> <name><surname>Nguyen</surname><given-names>M.</given-names></name></person-group> (<year>2023</year>). <article-title>Do large language models show decision heuristics similar to humans? A case study using {GPT}-3.5</article-title>. <source>ArXiv</source>.</mixed-citation></ref>
<ref id="ref50"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tenenbaum</surname><given-names>J. B.</given-names></name> <name><surname>Kemp</surname><given-names>C.</given-names></name> <name><surname>Griffiths</surname><given-names>T. L.</given-names></name> <name><surname>Goodman</surname><given-names>N. D.</given-names></name></person-group> (<year>2011</year>). <article-title>How to grow a mind: statistics, structure, and abstraction</article-title>. <source>Science</source> <volume>331</volume>, <fpage>1279</fpage>&#x2013;<lpage>1285</lpage>. doi: <pub-id pub-id-type="doi">10.1126/science.1192788</pub-id>, <pub-id pub-id-type="pmid">21393536</pub-id></mixed-citation></ref>
<ref id="ref51"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tentori</surname><given-names>K.</given-names></name> <name><surname>Bonini</surname><given-names>N.</given-names></name> <name><surname>Osherson</surname><given-names>D.</given-names></name></person-group> (<year>2004</year>). <article-title>The conjunction fallacy: a misunderstanding about conjunction?</article-title> <source>Cogn. Sci.</source> <volume>28</volume>, <fpage>467</fpage>&#x2013;<lpage>477</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cogsci.2004.01.001</pub-id></mixed-citation></ref>
<ref id="ref52"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Tversky</surname><given-names>A.</given-names></name> <name><surname>Gati</surname><given-names>I.</given-names></name></person-group> (<year>1978</year>). &#x201C;<chapter-title>Studies of similarity</chapter-title>&#x201D; in <source>Cognition and categorization</source>. eds. <person-group person-group-type="editor"><name><surname>Rosch</surname><given-names>E.</given-names></name> <name><surname>Lloyd</surname><given-names>B. B.</given-names></name></person-group> (<publisher-loc>Mahwah, NJ</publisher-loc>: <publisher-name>Lawrence Elbaum Associates</publisher-name>).</mixed-citation></ref>
<ref id="ref53"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tversky</surname><given-names>A.</given-names></name> <name><surname>Kahneman</surname><given-names>D.</given-names></name></person-group> (<year>1974</year>). <article-title>Judgment under uncertainty: heuristics and biases</article-title>. <source>Science</source> <volume>185</volume>, <fpage>1124</fpage>&#x2013;<lpage>1131</lpage>. doi: <pub-id pub-id-type="doi">10.1126/science.185.4157.1124</pub-id></mixed-citation></ref>
<ref id="ref54"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tversky</surname><given-names>A.</given-names></name> <name><surname>Kahneman</surname><given-names>D.</given-names></name></person-group> (<year>1983</year>). <article-title>Extensional versus intuitive reasoning: the conjunction fallacy in probability judgment</article-title>. <source>Psychol. Rev.</source> <volume>90</volume>, <fpage>293</fpage>&#x2013;<lpage>315</lpage>. doi: <pub-id pub-id-type="doi">10.1037/0033-295X.90.4.293</pub-id></mixed-citation></ref>
<ref id="ref55"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tversky</surname><given-names>A.</given-names></name> <name><surname>Koehler</surname><given-names>D. J.</given-names></name></person-group> (<year>1994</year>). <article-title>Support theory: a nonextensional representation of subjective probability</article-title>. <source>Psychol. Rev.</source> <volume>101</volume>, <fpage>547</fpage>&#x2013;<lpage>567</lpage>. doi: <pub-id pub-id-type="doi">10.1037/0033-295X.101.4.547</pub-id></mixed-citation></ref>
<ref id="ref56"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wallsten</surname><given-names>T. S.</given-names></name> <name><surname>Budescu</surname><given-names>D. V.</given-names></name> <name><surname>Zwick</surname><given-names>R.</given-names></name></person-group> (<year>1993</year>). <article-title>Comparing the calibration and coherence of numerical and verbal probability judgments</article-title>. <source>Manag. Sci.</source> <volume>39</volume>, <fpage>176</fpage>&#x2013;<lpage>190</lpage>. doi: <pub-id pub-id-type="doi">10.1287/mnsc.39.2.176</pub-id></mixed-citation></ref>
<ref id="ref57"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>P.</given-names></name> <name><surname>Xiao</surname><given-names>Z.</given-names></name> <name><surname>Chen</surname><given-names>H.</given-names></name> <name><surname>Oswald</surname><given-names>F. L.</given-names></name></person-group> (<year>2024</year>). <article-title>Will the real Linda please stand up&#x2026;To large language models? Examining the representativeness heuristic in LLMs</article-title>. <source>ArXiv</source>.</mixed-citation></ref>
<ref id="ref58"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Weidinger</surname><given-names>L.</given-names></name> <name><surname>Mellor</surname><given-names>J.</given-names></name> <name><surname>Rauh</surname><given-names>M.</given-names></name> <name><surname>Griffin</surname><given-names>C.</given-names></name> <name><surname>Uesato</surname><given-names>J.</given-names></name> <name><surname>Huang</surname><given-names>P.-S.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Ethical and social risks of language models</article-title>. <source>ArXiv</source>.</mixed-citation></ref>
<ref id="ref59"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wojciechowski</surname><given-names>B. W.</given-names></name> <name><surname>Pothos</surname><given-names>E. M.</given-names></name></person-group> (<year>2018</year>). <article-title>Is there a conjunction fallacy in legal probabilistic decision making?</article-title> <source>Front. Psychol.</source> <volume>9</volume>:<fpage>391</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fpsyg.2018.00391</pub-id>, <pub-id pub-id-type="pmid">29674983</pub-id></mixed-citation></ref>
<ref id="ref60"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yates</surname><given-names>J. F.</given-names></name> <name><surname>Carlson</surname><given-names>B. W.</given-names></name></person-group> (<year>1986</year>). <article-title>Conjunction errors: evidence for multiple judgment procedures, including &#x201C;signed summation.&#x201D;</article-title>. <source>Organ. Behav. Hum. Decis. Process.</source> <volume>37</volume>, <fpage>230</fpage>&#x2013;<lpage>253</lpage>. doi: <pub-id pub-id-type="doi">10.1016/0749-5978(86)90053-1</pub-id></mixed-citation></ref>
<ref id="ref61"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yax</surname><given-names>N.</given-names></name> <name><surname>Anll&#x00F3;</surname><given-names>H.</given-names></name> <name><surname>Palminteri</surname><given-names>S.</given-names></name></person-group> (<year>2024</year>). <article-title>Studying and improving reasoning in humans and machines</article-title>. <source>Commun. Psychol.</source> <volume>2</volume>:<fpage>51</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s44271-024-00091-8</pub-id>, <pub-id pub-id-type="pmid">39242743</pub-id></mixed-citation></ref>
<ref id="ref62"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhu</surname><given-names>J.-Q.</given-names></name> <name><surname>Sanborn</surname><given-names>A. N.</given-names></name> <name><surname>Chater</surname><given-names>N.</given-names></name></person-group> (<year>2020</year>). <article-title>The Bayesian sampler: generic Bayesian inference causes incoherence in human probability judgments</article-title>. <source>Psychol. Rev.</source> <volume>127</volume>, <fpage>719</fpage>&#x2013;<lpage>748</lpage>. doi: <pub-id pub-id-type="doi">10.1037/rev0000190</pub-id></mixed-citation></ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2480517/overview">Kevin Gluck</ext-link>, Florida Institute for Human and Machine Cognition, United States</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1090657/overview">Robert Nathan Collins</ext-link>, Defence Research and Development Canada (DRDC), Canada</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2721335/overview">Jy-yong Sohn</ext-link>, Yonsei University, Republic of Korea</p>
</fn>
</fn-group>
</back>
</article>