<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article article-type="review-article" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Oral Health</journal-id><journal-title-group>
<journal-title>Frontiers in Oral Health</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Oral Health</abbrev-journal-title></journal-title-group>
<issn pub-type="epub">2673-4842</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/froh.2026.1748450</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Systematic Review</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Multimodal large language models for oral lesion diagnosis: a systematic review of diagnostic performance and clinical utility</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes"><name><surname>Hassanein</surname><given-names>Fatma E. A.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x002A;</xref><uri xlink:href="https://loop.frontiersin.org/people/3194559/overview"/><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role></contrib>
<contrib contrib-type="author"><name><surname>Alkabazi</surname><given-names>Malik</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/3277874/overview" /><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Tassoker</surname><given-names>Melek</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/3284658/overview" /><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Ahmed</surname><given-names>Yousra</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role></contrib>
<contrib contrib-type="author" corresp="yes"><name><surname>Alsaeed</surname><given-names>Suliman</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<xref ref-type="aff" rid="aff7"><sup>7</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x002A;</xref><uri xlink:href="https://loop.frontiersin.org/people/2340050/overview" /><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role></contrib>
<contrib contrib-type="author"><name><surname>Abou-Bakr</surname><given-names>Asmaa</given-names></name>
<xref ref-type="aff" rid="aff8"><sup>8</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/3194133/overview" /><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Department of Oral Medicine, Periodontology, and Oral Diagnosis, Faculty of Dentistry, King Salman International University</institution>, <city>El Tur</city>, <state>South Sinai</state>, <country country="eg">Egypt</country></aff>
<aff id="aff2"><label>2</label><institution>Faculty of Dentistry Khalij-Libya</institution>, <city>Tripoli</city>, <country country="ly">Libya</country></aff>
<aff id="aff3"><label>3</label><institution>Department of Dentomaxillofacial Radiology, Faculty of Dentistry, Necmettin Erbakan University</institution>, <city>Meram</city>, <state>Konya</state>, <country country="">T&#x00FC;rkiye</country></aff>
<aff id="aff4"><label>4</label><institution>Department of Prosthetic Dentistry, Removable Prosthodontic Division, Faculty of Dentistry, King Salman International University</institution>, <city>El Tur</city>, <state>South Sinai</state>, <country country="eg">Egypt</country></aff>
<aff id="aff5"><label>5</label><institution>Preventive Dental Sciences Department, College of Dentistry, King Saud Bin Abdulaziz University for Health Sciences</institution>, <city>Riyadh</city>, <country country="sa">Saudi Arabia</country></aff>
<aff id="aff6"><label>6</label><institution>King Abdullah International Medical Research Center</institution>, <city>Riyadh</city>, <country country="sa">Saudi Arabia</country></aff>
<aff id="aff7"><label>7</label><institution>Ministry of the National Guard&#x2014;Health Affairs</institution>, <city>Riyadh</city>, <country country="sa">Saudi Arabia</country></aff>
<aff id="aff8"><label>8</label><institution>Department of Oral Medicine and Periodontology, Faculty of Dentistry, Galala University</institution>, <city>Suez</city>, <country country="eg">Egypt</country></aff>
<author-notes>
<corresp id="cor1"><label>&#x002A;</label><bold>Correspondence:</bold> Fatma E. A. Hassanein <email xlink:href="mailto:fatma.hassanein@ksiu.edu.eg">fatma.hassanein@ksiu.edu.eg</email> Suliman Alsaeed <email xlink:href="mailto:saeedsu@ksau-hs.edu.sa">saeedsu@ksau-hs.edu.sa</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-24"><day>24</day><month>02</month><year>2026</year></pub-date>
<pub-date publication-format="electronic" date-type="collection"><year>2026</year></pub-date>
<volume>7</volume><elocation-id>1748450</elocation-id>
<history>
<date date-type="received"><day>17</day><month>11</month><year>2025</year></date>
<date date-type="rev-recd"><day>29</day><month>01</month><year>2026</year></date>
<date date-type="accepted"><day>02</day><month>02</month><year>2026</year></date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2026 Hassanein, Alkabazi, Tassoker, Ahmed, Alsaeed and Abou-Bakr.</copyright-statement>
<copyright-year>2026</copyright-year><copyright-holder>Hassanein, Alkabazi, Tassoker, Ahmed, Alsaeed and Abou-Bakr</copyright-holder><license><ali:license_ref start_date="2026-02-24">https://creativecommons.org/licenses/by/4.0/</ali:license_ref><license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p></license>
</permissions>
<abstract><sec><title>Background</title>
<p>Diagnosing oral lesions from benign conditions to oral cancer remains challenging due to overlapping visual features and reliance on histopathology. Large language models (LLMs) can integrate textual and visual cues, but their diagnostic accuracy and clinical utility in real decision-making contexts remain uncertain. To systematically evaluate the diagnostic performance, clinical usefulness, and limitations of LLMs in identifying oral lesions.</p>
</sec><sec><title>Methods</title>
<p>PubMed, CINAHL, Embase, Web of Science, and Google Scholar were searched to 20 July 2025. Eligible studies applied LLMs (e.g., ChatGPT, Gemini, DeepSeek, Copilot, Claude) for diagnosis or differential diagnosis of oral lesions using text, images, or multimodal inputs. Outcomes included diagnostic accuracy, agreement metrics, and qualitative assessments of explanation quality and clinical applicability. Risk of bias was assessed using an adapted QUADAS-2. Narrative synthesis was performed due to heterogeneity.</p>
</sec><sec><title>Results</title>
<p>Seventeen studies (&#x003E;1,200 cases) were included. Diagnostic accuracy ranged from 25&#x0025;&#x2013;96&#x0025;, varying by model version, input modality, and lesion complexity. Multimodal inputs consistently improved performance, with Cohen&#x0027;s &#x03BA; up to 0.85&#x2013;0.90. Advanced models (GPT-4o, DeepSeek-R1, o1-preview) outperformed earlier versions and approached expert performance in some tasks, although specialists generally retained superior Top-1 accuracy. Clinical utility was highest when LLMs were used to structure differential reasoning, highlight red-flag features, and support communication, but limited in tasks requiring fine morphological interpretation or severity grading. Overall risk of bias was low to moderate.</p>
</sec><sec><title>Conclusions</title>
<p>LLMs demonstrate variable diagnostic performance and context-dependent supportive utility as adjunctive tools in oral lesion assessment, particularly in multimodal settings. They should complement, rather than replace, expert clinical judgment. Future research should prioritize real-world workflow evaluation, standardized prompting strategies, and prospective clinical validation.</p>
</sec><sec><title>Systematic Review Registration</title>
<p><ext-link ext-link-type="uri" xlink:href="https://www.crd.york.ac.uk/PROSPERO/view/CRD420251090315">https://www.crd.york.ac.uk/PROSPERO/view/CRD420251090315</ext-link>, identifier CRD420251090315.</p>
</sec>
</abstract>
<kwd-group>
<kwd>artificial intelligence in dentistry</kwd>
<kwd>clinical decision support</kwd>
<kwd>diagnostic accuracy</kwd>
<kwd>large language models</kwd>
<kwd>multimodal AI</kwd>
<kwd>oral lesions</kwd>
</kwd-group><funding-group><funding-statement>The author(s) declared that financial support was received for this work and/or its publication. The study was self-funded. King Abdullah International Medical Research Center (KAIMRC), Riyadh, Saudi Arabia, covered the article processing charges. The funder had no role in the study design, data collection, analysis, interpretation, or manuscript preparation.</funding-statement></funding-group><counts>
<fig-count count="3"/>
<table-count count="3"/><equation-count count="0"/><ref-count count="38"/><page-count count="10"/><word-count count="0"/></counts><custom-meta-group><custom-meta><meta-name>section-at-acceptance</meta-name><meta-value>Oral Epidemiology</meta-value></custom-meta></custom-meta-group>
</article-meta>
</front>
<body><sec id="s1" sec-type="intro"><title>Introduction</title>
<p>In clinical practice, prompt and precise diagnosis of oral lesions, which can range from benign conditions to potentially malignant disorders (PMDs) and oral squamous cell carcinoma (OSCC), remains a major challenge. Even for skilled medical professionals, the visual resemblance of numerous benign, dysplastic, and malignant lesions frequently causes diagnostic ambiguity (<xref ref-type="bibr" rid="B1">1</xref>, <xref ref-type="bibr" rid="B2">2</xref>). This is crucial because delayed diagnosis of OSCC is associated with advanced disease stage, complex treatment pathways, and persistently low five-year survival rates (<xref ref-type="bibr" rid="B3">3</xref>, <xref ref-type="bibr" rid="B4">4</xref>).</p>
<p>Histopathological analysis of a tissue biopsy, the current reference standard for conclusive diagnosis, is invasive, time-consuming, and resource-dependent. Therefore, there is an urgent need for accurate, rapid, and accessible decision-support tools to assist initial assessment and triage, particularly in primary care settings where early presentations commonly occur.</p>
<p>The field of medical diagnostics is undergoing rapid evolution with artificial intelligence (AI), particularly deep learning. Convolutional neural networks (CNNs) have demonstrated expert-level image interpretation in dentistry, showing strong performance in detecting periapical lesions, periodontal disease, and caries (<xref ref-type="bibr" rid="B5">5</xref>, <xref ref-type="bibr" rid="B6">6</xref>). AI models have also achieved high sensitivity and specificity in distinguishing benign from malignant oral lesions using clinical photographs (<xref ref-type="bibr" rid="B2">2</xref>, <xref ref-type="bibr" rid="B7">7</xref>&#x2013;<xref ref-type="bibr" rid="B10">10</xref>). Prior systematic reviews have established a strong foundation for AI-assisted diagnostic support across dental specialties (<xref ref-type="bibr" rid="B11">11</xref>, <xref ref-type="bibr" rid="B12">12</xref>).</p>
<p>However, large language models (LLMs) such as GPT-4 (OpenAI), Gemini (Google), and LLaMA (Meta) represent a distinct paradigm shift. Their advanced natural language processing capabilities allow them to synthesize clinical histories, describe reasoning steps, and generate explanations, rather than simply classify images. Multimodal LLMs can now integrate text and visual data, including clinical descriptions, photographic images, and even histopathology reports (<xref ref-type="bibr" rid="B13">13</xref>).</p>
<p>This positions LLMs as potential clinical reasoning tools rather than mere image classifiers. An LLM can take combined inputs (patient history&#x2009;&#x002B;&#x2009;lesion image&#x2009;&#x002B;&#x2009;clinical descriptors) to produce differential diagnoses, justify its reasoning, identify red-flag features, and suggest next steps in management (<xref ref-type="bibr" rid="B14">14</xref>). This aligns directly with real-world oral medicine workflows, where diagnostic accuracy depends on integrating multiple information sources.</p>
<p>Although promising, the diagnostic use of LLMs for oral lesions remains nascent, and existing evidence is fragmented. Previous reviews have focused either on histology-based machine learning for head and neck cancer (<xref ref-type="bibr" rid="B12">12</xref>) or general applications of AI in dentistry (<xref ref-type="bibr" rid="B11">11</xref>, <xref ref-type="bibr" rid="B15">15</xref>). A gap remains regarding the diagnostic accuracy, clinical utility, and limitations of LLMs specifically in oral mucosal disease, including performance across text-only, image-only, and multimodal workflows, comparison to human expertise, and susceptibility to bias.</p>
<p>This systematic review was therefore designed to evaluate the diagnostic performance and clinical utility of LLMs in identifying oral lesions, while also examining their reported limitations and risks of bias. In contrast to prior broad AI reviews, this work adopts a focused scope, emphasizing the unique reasoning capabilities and multimodal diagnostic potential of LLMs.</p>
</sec>
<sec id="s2" sec-type="methods"><title>Methods</title>
<sec id="s2a"><title>Protocol and registration</title>
<p>This review was prospectively registered in PROSPERO (CRD420251090315) on 09/07/2025 and conducted in accordance with the registered protocol. No deviations from the protocol occurred.</p>
</sec>
<sec id="s2b"><title>Eligibility criteria</title>
<p>We defined eligibility criteria <italic>a priori</italic> in line with PRISMA 2020 recommendations.</p>
<sec id="s2b1"><title>Inclusion criteria</title>
<p>Studies applying LLMs (e.g., ChatGPT, Gemini, DeepSeek, Copilot, Claude, LLaMA) to generate a final diagnosis or differential diagnosis for oral mucosal or jaw lesions (benign, potentially malignant, or malignant) were included. Accepted input modalities encompassed text-only, image-only, and multimodal combinations (clinical descriptions, histopathology reports, photographic images). Outcomes included diagnostic accuracy or expert agreement. Both real patient cases and standardized vignette-based evaluations were eligible when the diagnostic task reflected real decision-making.</p>
</sec>
<sec id="s2b2"><title>Exclusion criteria</title>
<p>Studies were excluded if they: (1) relied solely on conventional machine learning/CNN-based imaging models without an LLM; (2) did not involve diagnostic inference (e.g., education-only applications); or (3) were review articles, commentaries, or non&#x2013;patient-derived reports.</p>
<p>Eligibility is summarized in <xref ref-type="table" rid="T1">Table&#x00A0;1</xref> (PICOS framework).</p>
<table-wrap id="T1" position="float"><label>Table&#x00A0;1</label>
<caption><p>Eligibility criteria based on PICOS framework.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Domain</th>
<th valign="top" align="center">Specification</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Population (P)</td>
<td valign="top" align="left">Patients with oral mucosal or jaw lesions (benign, premalignant, or malignant), are described through clinical, histological, photographic, or multimodal datasets.</td>
</tr>
<tr>
<td valign="top" align="left">Intervention (I)</td>
<td valign="top" align="left">Application of large language models (LLMs) chatbots (e.g., GPT, Gemini, DeepSeek, LLaMA, copilot, Claude) for generating diagnostic or differential diagnosis outputs.</td>
</tr>
<tr>
<td valign="top" align="left">Comparator (C)</td>
<td valign="top" align="left">Human expert diagnosis (oral medicine specialists, pathologists, or dentists) and/or reference standards (e.g., histopathology, established diagnostic criteria). Studies without comparators were eligible if diagnostic outcomes were reported.</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="2">Outcomes (O)</td>
<td valign="top" align="left">Primary: Objective diagnostic performance and reliability metrics, including accuracy, sensitivity, specificity, PPV, NPV, AUC, Top-k accuracy, and agreement coefficients (e.g., Cohen&#x0027;s &#x03BA;, Gwet&#x0027;s AC1), reflecting diagnostic validity relative to the reference standard.</td>
</tr>
<tr>
<td valign="top" align="left">Secondary: Subjective and perception-based measures of model output quality and usability, including explanation quality, plausibility ratings, interpretability, narrative usefulness, and perceived clinical utility, which do not represent diagnostic correctness.</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="2">Study Design (S)</td>
<td valign="top" align="left">Diagnostic accuracy studies (prospective or retrospective), vignette-based experimental evaluations, and multimodal validation studies.</td>
</tr>
<tr>
<td valign="top" align="left">Excluded: reviews, editorials, commentaries, surveys of educational settings, or studies outside lesion diagnosis.</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s2c"><title>Outcomes</title>
<p>Outcomes were predefined and categorized into two distinct domains. Primary outcomes included objective diagnostic performance and reliability measures, such as accuracy, sensitivity, specificity, positive and negative predictive values, area under the receiver operating characteristic curve (AUC), Top-k accuracy, and agreement coefficients (e.g., Cohen&#x0027;s &#x03BA; and Gwet&#x0027;s AC1), reflecting diagnostic validity relative to the applied reference standard.</p>
<p>Secondary outcomes comprised subjective and perception-based measures of model output quality and usability, including explanation quality, plausibility ratings, interpretability, narrative usefulness, and perceived clinical utility. These outcomes represent qualitative assessments of reasoning support and user experience and do not constitute measures of diagnostic correctness.</p>
<p>Primary and secondary outcome domains were analyzed and reported separately to avoid conceptual overlap and overinterpretation of clinical relevance.</p>
</sec>
<sec id="s2d"><title>Search strategy</title>
<p>A comprehensive search was performed in PubMed, CINAHL, Embase, Web of Science, and ScienceDirect covering the period from January 2022 to 20 July 2025, using MeSH terms and free-text keywords related to large language models and oral lesions. No date limits were applied. English-language restriction was applied due to the limited comparability of cross-language LLM performance. Full search strategies are provided in <xref ref-type="sec" rid="s11">Supplementary Appendix 1</xref> (PRISMA-S).</p>
</sec>
<sec id="s2e"><title>Study selection</title>
<p>Records were deduplicated in EndNote and screened independently by two reviewers in Rayyan. Discrepancies were resolved by a third reviewer. Inter-reviewer agreement was calculated using Cohen&#x0027;s &#x03BA; for both title/abstract and full-text stages. The single included study with overlapping authorship was evaluated exclusively by reviewers without overlapping authorship, with discrepancies resolved by an independent third reviewer.</p>
</sec>
<sec id="s2f"><title>Data extraction and management</title>
<p>Data extraction was conducted independently using a standardized, piloted form and verified by a third reviewer. Extracted items included study design, sample characteristics, model version, input modality, comparator, lesion type, and outcomes (accuracy, sensitivity, specificity, AUC, agreement coefficients, explanation quality).</p>
</sec>
<sec id="s2g"><title>Risk of bias assessment</title>
<p>Risk of bias was appraised using an adapted QUADAS-2 tailored for LLM-based diagnostic evaluations, assessing case selection, index test, reference standard, and flow/timing. Judgments were categorized as low, high, or some concerns. Summary and traffic-light visualizations were generated (<xref ref-type="sec" rid="s11">Supplementary Appendix 2</xref>).</p>
</sec>
<sec id="s2h"><title>Data synthesis</title>
<p>Due to anticipated clinical and methodological heterogeneity across large language model families, input modalities (text-only, image-only, and multimodal), lesion domains (oral mucosal and jaw lesions), comparator reference standards, and outcome definitions, meta-analysis was not undertaken. Instead, a structured narrative synthesis was performed. Findings were organized according to: (1) LLM family, (2) input modality, (3) comparator reference standard, and (4) lesion category. Descriptive statistics were used to summarize, as distinct outcome domains, study-level diagnostic performance metrics, agreement/reliability measures, and utility-related or subjective outcomes as reported in the original studies. No pooled estimates (e.g., means or summary effect sizes) were calculated; results were presented descriptively using individual study values and ranges, as appropriate, to avoid inappropriate aggregation across heterogeneous study designs.</p>
</sec>
</sec>
<sec id="s3" sec-type="results"><title>Results</title>
<sec id="s3a"><title>Study selection</title>
<p>The search identified 1,178 records (PubMed 476; CINAHL 172; EMBASE 50; Web of Science 380; Google Scholar 100). After removing 291 duplicates, 887 records were screened; 867 were excluded. 20 full texts were sought; 1 was not retrieved and 2 were excluded (wrong setting/population). 17 studies were included for qualitative synthesis (<xref ref-type="fig" rid="F1">Figure&#x00A0;1</xref>).</p>
<fig id="F1" position="float"><label>Figure&#x00A0;1</label>
<caption><p>PRISMA chart.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="froh-07-1748450-g001.tif"><alt-text content-type="machine-generated">PRISMA flow diagram illustrating the study selection process: 1,178 records identified from five databases, 291 duplicates removed, 887 records screened, 867 excluded, 20 reports sought, 1 not retrieved, 2 excluded for eligibility reasons, and 17 studies included in the review.</alt-text>
</graphic>
</fig>
<p>Inter-reviewer agreement was substantial, with a Cohen&#x0027;s &#x03BA; of 0.82 for title and abstract screening and 0.88 for full-text eligibility assessment.</p>
</sec>
<sec id="s3b"><title>Study characteristics</title>
<p>The studies (published 2024&#x2013;2025) evaluated a range of large language models (LLMs), including ChatGPT-3.5/4/4o/4&#x2005;V, o1-preview, DeepSeek-V3/R1, Gemini 1.5 Pro/Flash, Claude 3/3.5, LLaMA 3.2, and Copilot (<xref ref-type="table" rid="T2">Table&#x00A0;2</xref>).</p>
<table-wrap id="T2" position="float"><label>Table&#x00A0;2</label>
<caption><p>Characteristics of included studies (<italic>n</italic>&#x2009;&#x003D;&#x2009;17).</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="center"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Study</th>
<th valign="top" align="center">Model(s)</th>
<th valign="top" align="center">Clinical Domain</th>
<th valign="top" align="center">Modality</th>
<th valign="top" align="center"><italic>n</italic></th>
<th valign="top" align="center">Reference Standard</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Silva et al. (<xref ref-type="bibr" rid="B16">16</xref>)</td>
<td valign="top" align="left">ChatGPT-3.5</td>
<td valign="top" align="left">Radiolucent jaw lesions (DMFR)</td>
<td valign="top" align="left">Text</td>
<td valign="top" align="center">28</td>
<td valign="top" align="left">Histopathology</td>
</tr>
<tr>
<td valign="top" align="left">Schmidl et al. (<xref ref-type="bibr" rid="B24">24</xref>)</td>
<td valign="top" align="left">ChatGPT-4</td>
<td valign="top" align="left">OSCC/Leukoplakia/Benign oral lesions</td>
<td valign="top" align="left">Image; Text; Multimodal</td>
<td valign="top" align="center">45</td>
<td valign="top" align="left">Histopathology</td>
</tr>
<tr>
<td valign="top" align="left">Rewthamrongsris et al. (<xref ref-type="bibr" rid="B27">27</xref>)</td>
<td valign="top" align="left">GPT-4o, Gemini, Claude, LLaMA</td>
<td valign="top" align="left">OLP vs. non-OLP</td>
<td valign="top" align="left">Image</td>
<td valign="top" align="center">1,142</td>
<td valign="top" align="left">Histopathology</td>
</tr>
<tr>
<td valign="top" align="left">Pradhan (<xref ref-type="bibr" rid="B20">20</xref>)</td>
<td valign="top" align="left">ChatGPT-3.5/4/4o; Gemini</td>
<td valign="top" align="left">OPMLs &#x0026; OSCC</td>
<td valign="top" align="left">Text; Multimodal</td>
<td valign="top" align="center">42</td>
<td valign="top" align="left">Expert consensus</td>
</tr>
<tr>
<td valign="top" align="left">Maia-Lima et al. (<xref ref-type="bibr" rid="B21">21</xref>)</td>
<td valign="top" align="left">GPT-4 transformer</td>
<td valign="top" align="left">Syndromic orofacial presentations</td>
<td valign="top" align="left">Multimodal</td>
<td valign="top" align="center">26</td>
<td valign="top" align="left">Expert consensus</td>
</tr>
<tr>
<td valign="top" align="left">Kaygisiz et al. (<xref ref-type="bibr" rid="B31">31</xref>)</td>
<td valign="top" align="left">GPT-4; DeepSeek-V3</td>
<td valign="top" align="left">Benign vs. malignant oral lesions</td>
<td valign="top" align="left">Text</td>
<td valign="top" align="center">16</td>
<td valign="top" align="left">Expert consensus</td>
</tr>
<tr>
<td valign="top" align="left">Yu et al. (<xref ref-type="bibr" rid="B28">28</xref>)</td>
<td valign="top" align="left">GPT-4o; Claude; Chat-Diagrams</td>
<td valign="top" align="left">OLP</td>
<td valign="top" align="left">Image</td>
<td valign="top" align="center">128</td>
<td valign="top" align="left">Histopathology</td>
</tr>
<tr>
<td valign="top" align="left">Tomo et al. (<xref ref-type="bibr" rid="B18">18</xref>)</td>
<td valign="top" align="left">ChatGPT-3.5/4; Human clinicians</td>
<td valign="top" align="left">11 oral lesion types</td>
<td valign="top" align="left">Text</td>
<td valign="top" align="center">37</td>
<td valign="top" align="left">Composite clinical criteria</td>
</tr>
<tr>
<td valign="top" align="left">Tassoker (<xref ref-type="bibr" rid="B17">17</xref>)</td>
<td valign="top" align="left">ChatGPT-4o; Experts</td>
<td valign="top" align="left">Mixed oral mucosal lesions</td>
<td valign="top" align="left">Multimodal</td>
<td valign="top" align="center">123</td>
<td valign="top" align="left">Expert consensus</td>
</tr>
<tr>
<td valign="top" align="left">Su&#x00E1;rez et al. (<xref ref-type="bibr" rid="B22">22</xref>)</td>
<td valign="top" align="left">ChatGPT-4o</td>
<td valign="top" align="left">Oral/labial mucosal lesions</td>
<td valign="top" align="left">Image</td>
<td valign="top" align="center">30</td>
<td valign="top" align="left">Expert consensus</td>
</tr>
<tr>
<td valign="top" align="left">Hassanein et al. (<xref ref-type="bibr" rid="B25">25</xref>)</td>
<td valign="top" align="left">ChatGPT-4o; DeepSeek-V3; Experts</td>
<td valign="top" align="left">Mixed oral mucosal lesions</td>
<td valign="top" align="left">Multimodal</td>
<td valign="top" align="center">80</td>
<td valign="top" align="left">Histopathology</td>
</tr>
<tr>
<td valign="top" align="left">Diniz-Freitas et al. (<xref ref-type="bibr" rid="B23">23</xref>)</td>
<td valign="top" align="left">ChatGPT-4V</td>
<td valign="top" align="left">Oral diseases (NEJM&#x2009;&#x002B;&#x2009;Oral Dis.)</td>
<td valign="top" align="left">Image; Text; Multimodal</td>
<td valign="top" align="center">57 (36&#x2009;&#x002B;&#x2009;21)</td>
<td valign="top" align="left">Published case solutions</td>
</tr>
<tr>
<td valign="top" align="left">Diniz-Freitas et al. (<xref ref-type="bibr" rid="B19">19</xref>)</td>
<td valign="top" align="left">GPT-4o; DeepSeek-R1</td>
<td valign="top" align="left">Oral diseases (benchmarks)</td>
<td valign="top" align="left">Text</td>
<td valign="top" align="center">36</td>
<td valign="top" align="left">Published case solutions</td>
</tr>
<tr>
<td valign="top" align="left">Danesh et al. (<xref ref-type="bibr" rid="B29">29</xref>)</td>
<td valign="top" align="left">ChatGPT-3.5/4; o1-preview</td>
<td valign="top" align="left">Mixed dental/oral cases</td>
<td valign="top" align="left">Text</td>
<td valign="top" align="center">50</td>
<td valign="top" align="left">Published case solutions</td>
</tr>
<tr>
<td valign="top" align="left">Danesh et al. (<xref ref-type="bibr" rid="B38">38</xref>)</td>
<td valign="top" align="left">ChatGPT-3.5/4</td>
<td valign="top" align="left">Mixed dental/oral cases</td>
<td valign="top" align="left">Text</td>
<td valign="top" align="center">50</td>
<td valign="top" align="left">Published case solutions</td>
</tr>
<tr>
<td valign="top" align="left">Cuevas-Nu&#x00F1;ez et al. (<xref ref-type="bibr" rid="B30">30</xref>)</td>
<td valign="top" align="left">ChatGPT-4</td>
<td valign="top" align="left">OMFP cases</td>
<td valign="top" align="left">Text</td>
<td valign="top" align="center">102</td>
<td valign="top" align="left">Expert consensus</td>
</tr>
<tr>
<td valign="top" align="left">AlFarabi Ali et al. (<xref ref-type="bibr" rid="B26">26</xref>)</td>
<td valign="top" align="left">ChatGPT-4; Copilot; Experts</td>
<td valign="top" align="left">Oral medicine clinical scenarios</td>
<td valign="top" align="left">Text</td>
<td valign="top" align="center">50</td>
<td valign="top" align="left">Histopathology&#x2009;&#x002B;&#x2009;expert decision</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TF1"><p>OLP, oral lichen planus; OSCC, oral squamous cell carcinoma; OPMLs, oral potentially malignant lesions; OMFP, oral &#x0026; maxillofacial pathology; Multi, multimodal. Reference standards: Histopathology 35&#x0025; (6/17), Expert consensus 35&#x0025; (6/17), Published cases 24&#x0025; (4/17).</p></fn>
</table-wrap-foot>
</table-wrap>
<p>The included studies (2024&#x2013;2025) evaluated a range of large language models, including ChatGPT-3.5/4/4o/4V, o1-preview, DeepSeek-V3/R1, Gemini 1.5 Pro/Flash, Claude 3/3.5, LLaMA 3.2, and Microsoft Copilot. Input modalities varied, with text-only approaches in 11 studies, image-only in 6 studies, and multimodal configurations in 6 studies, with some evaluating more than one modality. Lesion focus ranged from single-entity cohorts, such as OLP, OPMLs, leukoplakia, and syndromic lesions, to broader mixed oral mucosal case sets. Reference standards included histopathology/biopsy in four studies, expert consensus in seven studies, published clinical solutions in four studies, and hybrid standards in two studies. Sample sizes ranged from 16 to 1,142 cases (<xref ref-type="table" rid="T2">Table&#x00A0;2</xref>).</p>
</sec>
<sec id="s3c"><title>Diagnostic performance</title>
<p>Comparative performance statements reported in this section are restricted to within-study evaluations; cross-study differences are presented descriptively and should not be interpreted as direct model rankings. <xref ref-type="table" rid="T3">Table&#x00A0;3</xref> organizes outcomes into two distinct domains: diagnostic performance metrics (objective measures of accuracy and agreement) and subjective perception-based outcomes (plausibility and reasoning quality assessments). This separation prevents conceptual overlap and reduces the risk of overinterpretation of clinical relevance. Objective diagnostic performance varied substantially according to model family, input modality, lesion complexity, and study design (<xref ref-type="table" rid="T3">Table&#x00A0;3</xref>).</p>
<table-wrap id="T3" position="float"><label>Table&#x00A0;3</label>
<caption><p>Diagnostic performance and methodological characteristics of included studies.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Study</th>
<th valign="top" align="center">Model(s)</th>
<th valign="top" align="center">Metric</th>
<th valign="top" align="center">Performance</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="4">A. Diagnostic accuracy</td>
</tr>
<tr>
<td valign="top" align="left">Silva et al. (<xref ref-type="bibr" rid="B16">16</xref>)</td>
<td valign="top" align="left">ChatGPT-3.5</td>
<td valign="top" align="left">Top-1/Top-2/Top-3 accuracy</td>
<td valign="top" align="left">25&#x0025;/57&#x0025;/68&#x0025;</td>
</tr>
<tr>
<td valign="top" align="left">Maia-Lima et al. (<xref ref-type="bibr" rid="B21">21</xref>)</td>
<td valign="top" align="left">GPT-4 multimodal</td>
<td valign="top" align="left">Top-1/Top-2 accuracy</td>
<td valign="top" align="left">81&#x0025;/96&#x0025;</td>
</tr>
<tr>
<td valign="top" align="left">Tassoker (<xref ref-type="bibr" rid="B17">17</xref>)</td>
<td valign="top" align="left">ChatGPT-4o; Experts</td>
<td valign="top" align="left">Top-1 accuracy</td>
<td valign="top" align="left">GPT-4o: 78&#x0025;; Experts: 100&#x0025;</td>
</tr>
<tr>
<td valign="top" align="left">Hassanein et al. (<xref ref-type="bibr" rid="B25">25</xref>)</td>
<td valign="top" align="left">GPT-4o; DeepSeek-V3; Experts</td>
<td valign="top" align="left">Top-1/Top-3/Top-5 accuracy</td>
<td valign="top" align="left">GPT-4o: 38/76/84&#x0025;; DeepSeek-V3: 51/71/83&#x0025;; Experts: 59/71/86&#x0025;</td>
</tr>
<tr>
<td valign="top" align="left">Danesh et al. (<xref ref-type="bibr" rid="B29">29</xref>)</td>
<td valign="top" align="left">ChatGPT-3.5/4; o1-preview</td>
<td valign="top" align="left">First correct diagnosis</td>
<td valign="top" align="left">GPT-3.5 40&#x0025;; GPT-4 62&#x0025;; o1-preview 80&#x0025;</td>
</tr>
<tr>
<td valign="top" align="left">Rewthamrongsris et al. (<xref ref-type="bibr" rid="B27">27</xref>)</td>
<td valign="top" align="left">GPT-4o; Gemini; Claude; LLaMA</td>
<td valign="top" align="left">Accuracy (zero-shot vs. prompt-guided)</td>
<td valign="top" align="left">GPT-4o 67&#x0025;&#x2013;69&#x0025;; Gemini Flash 69.7&#x0025;&#x2013;80.5&#x0025;; Others 55&#x0025;&#x2013;65&#x0025;</td>
</tr>
<tr>
<td valign="top" align="left">Pradhan (<xref ref-type="bibr" rid="B20">20</xref>)</td>
<td valign="top" align="left">ChatGPT-3.5/4/4o; Gemini</td>
<td valign="top" align="left">Overall accuracy</td>
<td valign="top" align="left">36&#x0025;&#x2013;67&#x0025; (models); Experts 71&#x0025;&#x2013;74&#x0025;</td>
</tr>
<tr>
<td valign="top" align="left">Yu et al. (<xref ref-type="bibr" rid="B28">28</xref>)</td>
<td valign="top" align="left">GPT-4o; Claude; Chat-Diagrams</td>
<td valign="top" align="left">Accuracy (trained vs. untrained)</td>
<td valign="top" align="left">GPT-4o: 59&#x0025;&#x2013;77&#x0025;; Claude 15&#x0025;&#x2013;50&#x0025;; Chat-Diagrams 64&#x0025;&#x2013;68&#x0025;</td>
</tr>
<tr>
<td valign="top" align="left">Tomo et al. (<xref ref-type="bibr" rid="B18">18</xref>)</td>
<td valign="top" align="left">ChatGPT-3.5/4; Experts</td>
<td valign="top" align="left">Overall accuracy</td>
<td valign="top" align="left">GPT-3.5: 65&#x0025;; GPT-4: 80&#x0025;; Experts: 87&#x0025;</td>
</tr>
<tr>
<td valign="top" align="left">Su&#x00E1;rez et al. (<xref ref-type="bibr" rid="B22">22</xref>)</td>
<td valign="top" align="left">ChatGPT-4o</td>
<td valign="top" align="left">Image-only accuracy</td>
<td valign="top" align="left">58&#x0025;</td>
</tr>
<tr>
<td valign="top" align="left">Diniz-Freitas et al. (<xref ref-type="bibr" rid="B23">23</xref>)</td>
<td valign="top" align="left">ChatGPT-4V</td>
<td valign="top" align="left">Accuracy by modality</td>
<td valign="top" align="left">Text 52&#x0025;&#x2013;81&#x0025;; Image 29&#x0025;&#x2013;33&#x0025;; Multimodal up to 86&#x0025;</td>
</tr>
<tr>
<td valign="top" align="left">Diniz-Freitas et al. (<xref ref-type="bibr" rid="B19">19</xref>)</td>
<td valign="top" align="left">GPT-4o; DeepSeek-R1</td>
<td valign="top" align="left">Overall accuracy</td>
<td valign="top" align="left">GPT-4o: 89&#x0025;; DeepSeek-R1: 92&#x0025;; Human baseline: 39&#x0025;</td>
</tr>
<tr>
<td valign="top" align="left">Cuevas-Nu&#x00F1;ez et al. (<xref ref-type="bibr" rid="B30">30</xref>)</td>
<td valign="top" align="left">ChatGPT-4</td>
<td valign="top" align="left">Overall accuracy</td>
<td valign="top" align="left">60&#x0025;</td>
</tr>
<tr>
<td valign="top" align="left">AlFarabi Ali et al. (<xref ref-type="bibr" rid="B26">26</xref>)</td>
<td valign="top" align="left">ChatGPT-4; Copilot; Experts</td>
<td valign="top" align="left">FDx/DDx</td>
<td valign="top" align="left">GPT-4: 70&#x0025;/74&#x0025;; Copilot: 46&#x0025;/60&#x0025;; Experts: 80&#x0025;/60&#x0025;</td>
</tr>
<tr>
<td valign="top" align="left">Schmidl et al. (<xref ref-type="bibr" rid="B24">24</xref>)</td>
<td valign="top" align="left">ChatGPT-4</td>
<td valign="top" align="left">Accuracy by modality</td>
<td valign="top" align="left">Image: 27&#x0025;&#x2013;87&#x0025;; Text: 20&#x0025;&#x2013;80&#x0025;; Multimodal: 73&#x0025;&#x2013;93&#x0025;</td>
</tr>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="4">B. Agreement metrics</td>
</tr>
<tr>
<td valign="top" align="left">Tomo et al. (<xref ref-type="bibr" rid="B18">18</xref>)</td>
<td valign="top" align="left">ChatGPT-3.5/4; Experts</td>
<td valign="top" align="left">Cohen&#x0027;s &#x03BA;</td>
<td valign="top" align="left">Primary diagnosis: 0.532 (GPT-3.5), 0.533 (GPT-4); Alternative diagnosis: 0.337 (GPT-3.5), 0.367 (GPT-4)</td>
</tr>
<tr>
<td valign="top" align="left">Su&#x00E1;rez et al. (<xref ref-type="bibr" rid="B22">22</xref>)</td>
<td valign="top" align="left">ChatGPT-4o</td>
<td valign="top" align="left">Gwet&#x0027;s AC; Percent agreement (Diagnosis)</td>
<td valign="top" align="left">AC 0.834; PA 0.922</td>
</tr>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="4">C. Subjective/Perception-Based Outcomes</td>
</tr>
<tr>
<td valign="top" align="left">Kaygisiz et al. (<xref ref-type="bibr" rid="B31">31</xref>)</td>
<td valign="top" align="left">GPT-4; DeepSeek-V3</td>
<td valign="top" align="left">Plausibility (Likert)</td>
<td valign="top" align="left">GPT-4: 3.1; DeepSeek-V3: 4.0</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TF2"><p>FDx, final diagnosis; DDx, differential diagnosis; Multi, multimodal. Outcomes are grouped by domain. Sections A and B report objective diagnostic performance and reliability metrics related to diagnostic validity relative to the reference standard. Section C reports subjective and perception-based evaluations of output quality and usability, which do not represent diagnostic accuracy or clinical validity<italic>.</italic></p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3d"><title>Diagnostic accuracy</title>
<p>Diagnostic accuracy varied substantially across models, input modalities, and prompting strategies (<xref ref-type="table" rid="T3">Table&#x00A0;3</xref>). Early-generation models such as ChatGPT-3.5 demonstrated modest Top-1 performance (25&#x0025;&#x2013;65&#x0025;), whereas newer models showed variable accuracy ranging from 36&#x0025;&#x2013;89&#x0025; across different studies and lesion types, with ChatGPT-4 and ChatGPT-4o achieving peak performance of 78&#x0025;&#x2013;89&#x0025; in some clinical datasets, compared with 87&#x0025;&#x2013;100&#x0025; for expert reference standards (<xref ref-type="bibr" rid="B16">16</xref>&#x2013;<xref ref-type="bibr" rid="B20">20</xref>).</p>
<p>Multimodal input was associated with the largest performance gains. GPT-4 multimodal achieved Top-1 and Top-2 accuracies of 81&#x0025; and 96&#x0025;, while multimodal implementations of ChatGPT-4 and ChatGPT-4&#x2005;V reached peak accuracies of 86&#x0025;&#x2013;93&#x0025;, exceeding performance observed under text-only (20&#x0025;&#x2013;81&#x0025;) and image-only (27&#x0025;&#x2013;87&#x0025;) conditions (<xref ref-type="bibr" rid="B21">21</xref>&#x2013;<xref ref-type="bibr" rid="B24">24</xref>).</p>
<p>Top-k analysis further demonstrated improved diagnostic coverage. In heterogeneous oral lesion cohorts, ChatGPT-4o achieved Top-1, Top-3, and Top-5 accuracies of 38&#x0025;, 76&#x0025;, and 84&#x0025;, with comparable performance reported for DeepSeek-V3 (<xref ref-type="bibr" rid="B25">25</xref>). Differential diagnosis tasks showed ChatGPT-4 outperforming Microsoft Copilot (FDx/DDx: 70&#x0025;/74&#x0025; vs. 46&#x0025;/60&#x0025;), while remaining below expert-level final diagnosis performance (<xref ref-type="bibr" rid="B26">26</xref>).</p>
<p>Prompt-guided and trained configurations improved accuracy across multiple models, with GPT-4o achieving 67&#x0025;&#x2013;69&#x0025; accuracy and Gemini Flash up to 80.5&#x0025; under example-guided prompting, and trained GPT-4o achieving 59&#x0025;&#x2013;77&#x0025; accuracy compared with lower performance for Claude (15&#x0025;&#x2013;50&#x0025;) and Chat-Diagrams (64&#x0025;&#x2013;68&#x0025;) (<xref ref-type="bibr" rid="B27">27</xref>, <xref ref-type="bibr" rid="B28">28</xref>). Progressive performance gains across model generations were also observed, with correct first diagnosis rates increasing from 40&#x0025; (ChatGPT-3.5) to 62&#x0025; (ChatGPT-4) and 80&#x0025; (o1-preview) (<xref ref-type="bibr" rid="B29">29</xref>). Additional classification tasks reported approximately 60&#x0025; accuracy for ChatGPT-4 (<xref ref-type="bibr" rid="B30">30</xref>).</p>
</sec>
<sec id="s3e"><title>Agreement metrics</title>
<p>Only a limited number of studies formally evaluated diagnostic output consistency. Tomo et al. reported moderate repeatability for primary diagnostic hypotheses (Cohen&#x0027;s <italic>&#x03BA;</italic>&#x2009;&#x2248;&#x2009;0.53) and fair consistency for alternative diagnostic classifications (<xref ref-type="bibr" rid="B18">18</xref>). In a separate multimodal evaluation, Su&#x00E1;rez et al. demonstrated substantial diagnostic repeatability for ChatGPT-4o using Gwet&#x0027;s AC (AC&#x2009;&#x003D;&#x2009;0.834) across repeated runs (<xref ref-type="bibr" rid="B22">22</xref>). Collectively, these findings indicate that structured clinical context improves output stability; however, variability in final diagnostic classification remains evident.</p>
</sec>
<sec id="s3f"><title>Subjective/perception-based outcomes</title>
<p>Subjective and perception-based evaluations primarily assessed output plausibility, reasoning coherence, and perceived usability rather than objective diagnostic performance. Single study incorporating expert Likert-scale scoring reported higher perceived plausibility and contextual grounding of diagnostic reasoning when multimodal inputs were used compared with image-only conditions (<xref ref-type="bibr" rid="B31">31</xref>). Importantly, these outcomes reflect expert perception of output quality and should not be interpreted as measures of diagnostic accuracy or clinical validity.</p>
</sec>
<sec id="s3g"><title>Clinical utility</title>
<p>Clinical utility was primarily described in terms of decision-support functionality rather than autonomous diagnostic use. Multimodal studies reported that LLM outputs assisted with structuring differential diagnoses, summarizing key diagnostic features, and organizing clinical information to support preliminary clinical reasoning and educational workflows (<xref ref-type="bibr" rid="B22">22</xref>, <xref ref-type="bibr" rid="B24">24</xref>). Output stability across repeated runs was considered favorable for training and triage-oriented applications (<xref ref-type="bibr" rid="B22">22</xref>). Expert plausibility scoring indicated moderate-to-high perceived usefulness of diagnostic reasoning outputs, with higher ratings observed for more contextually grounded responses (<xref ref-type="bibr" rid="B31">31</xref>). However, limitations related to prompt sensitivity, reduced performance in complex cases, and lower interpretability under image-only conditions were consistently highlighted (<xref ref-type="bibr" rid="B22">22</xref>, <xref ref-type="bibr" rid="B24">24</xref>). Overall, LLMs were positioned as adjunctive tools that may support clinical reasoning but should not replace expert clinical judgment.</p>
</sec>
<sec id="s3h"><title>Risk of bias</title>
<p>Based on the QUADAS-2 assessment, two studies were rated as high risk of bias, primarily due to non-representative or synthetic data sources. Six studies were rated as low risk, with clear case definitions, appropriate reference standards, and consistent test procedures. The remaining nine studies were judged to have some concerns, most commonly due to convenience sampling, non-uniform reference standards, or incomplete reporting. These findings reflect reasonable but variable methodological quality across the included literature (<xref ref-type="fig" rid="F2">Figures&#x00A0;2</xref>&#x2013;<xref ref-type="fig" rid="F3">3</xref>).</p>
<fig id="F2" position="float"><label>Figure&#x00A0;2</label>
<caption><p>Risk of bias traffic light plot (QUADAS-2) for individual studies.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="froh-07-1748450-g002.tif"><alt-text content-type="machine-generated">Table summarizing the risk of bias assessment for fifteen studies across four domains: patient selection, index test, reference standard, and flow and timing. Most studies show yellow circles for some concerns, green circles for low risk, and red Xs for high risk, particularly in the Silva 2024 and Kaygisiz 2025 studies, which have high overall risk. A legend defines domain categories and color codes for judgement.</alt-text>
</graphic>
</fig>
<fig id="F3" position="float"><label>Figure&#x00A0;3</label>
<caption><p>Risk of bias summary (QUADAS-2) across all included studies.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="froh-07-1748450-g003.tif"><alt-text content-type="machine-generated">Stacked bar chart displaying risk of bias by category: most segments are green for low risk, yellow for some concerns, and a small red portion for high risk. Categories include patient selection, index test, reference standard, flow and timing, and overall risk of bias. A color key is provided.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="s4" sec-type="discussion"><title>Discussion</title>
<p>This systematic review synthesizes recent evidence evaluating large language models (LLMs) for oral lesion diagnosis and identifies three consistent patterns: progressive performance improvement across model generations, substantial gains associated with multimodal input integration, and persistent performance gaps relative to expert clinicians in primary diagnostic accuracy. Although several models demonstrated high benchmark performance under controlled experimental conditions, these findings do not equate to clinical readiness. Instead, current evidence supports the role of LLMs as adjunctive decision-support tools that may enhance clinical reasoning and triage workflows when structured clinical context and imaging data are jointly available, rather than as autonomous diagnostic systems. Accordingly, objective diagnostic performance, reliability metrics, and subjective perception-based outcomes are interpreted separately to avoid conflation of technical performance with clinical utility.</p>
<p>Across included studies, diagnostic performance was strongly influenced by both model architecture and input modality. Earlier-generation models, such as ChatGPT-3.5, consistently demonstrated lower diagnostic accuracy, whereas newer iterations, including ChatGPT-4 and ChatGPT-4o, achieved improved performance, particularly in multimodal settings. However, performance gains were not uniform across tasks, with marked declines observed in image-only diagnostic scenarios. This pattern highlights that architectural improvements alone are insufficient to ensure reliable diagnostic performance and underscores the critical role of clinical context in supporting meaningful reasoning and output stability (<xref ref-type="bibr" rid="B32">32</xref>, <xref ref-type="bibr" rid="B33">33</xref>).</p>
<p>A central and reproducible finding across studies was the superiority of multimodal input over unimodal approaches. Combining clinical history with visual data substantially improved diagnostic accuracy, agreement, and perceived output plausibility compared with text-only or image-only inputs. Agreement metrics, including Cohen&#x0027;s &#x03BA; and Gwet&#x0027;s AC, further supported this trend by demonstrating improved diagnostic repeatability under multimodal conditions. These observations align with emerging evidence that multimodal foundation models more closely approximate human clinical reasoning by integrating visual pattern recognition with contextual symptom interpretation (<xref ref-type="bibr" rid="B34">34</xref>). Nevertheless, agreement metrics reflect output consistency rather than diagnostic correctness, and high repeatability should not be interpreted as evidence of clinical validity.</p>
<p>Several studies highlighted the potential clinical value of LLMs as adjunctive diagnostic aids, particularly in settings with limited access to specialist expertise. LLMs demonstrated strengths in structuring differential diagnoses, summarizing discriminative clinical features, and supporting triage-oriented decision-making. These capabilities suggest potential utility in preliminary assessment and educational contexts. However, reasoning depth and diagnostic reliability were often reduced in complex, ambiguous, or atypical cases. Sensitivity to prompt formulation and incomplete contextual interpretation further limit unsupervised deployment. Collectively, these findings reinforce that LLMs may augment clinician reasoning but cannot replace expert judgment.</p>
<p>The present findings extend prior reviews that focused on single-model evaluations or narrow clinical domains. While Panwar and Gupta (2024) examined the diagnostic role of ChatGPT in oral pathology (<xref ref-type="bibr" rid="B35">35</xref>) and Liu et al. (2025) focused on dentomaxillofacial radiology applications (<xref ref-type="bibr" rid="B34">34</xref>), this review systematically compared multiple LLM families across oral medicine, pathology, radiology, and syndromic diagnostic tasks. This broader scope enabled identification of cross-model performance trends and highlighted multimodal integration as a dominant driver of diagnostic performance, independent of specific model architecture.</p>
<p>Comparison with conventional deep learning approaches further contextualizes the role of LLMs in oral diagnostics. Convolutional neural network&#x2013;based classifiers have demonstrated high sensitivity and specificity for image-based oral lesion detection (<xref ref-type="bibr" rid="B36">36</xref>, <xref ref-type="bibr" rid="B37">37</xref>). However, such models primarily operate on visual pattern recognition. In contrast, LLMs provide complementary capabilities, including explanatory reasoning, clinical feature synthesis, and structured differential diagnosis generation. Rather than competing directly with image classifiers, LLMs may be better positioned as integrative reasoning layers that support broader diagnostic workflows and clinical decision-making.</p>
<sec id="s4a"><title>Methodological limitations and bias</title>
<p>Despite encouraging trends, the current evidence base is constrained by substantial methodological heterogeneity and threats to validity. Many studies relied on curated benchmark datasets, synthetic case vignettes, or highly selected clinical examples, introducing spectrum bias and case enrichment effects that may inflate apparent diagnostic performance. Reference standards varied widely across studies, ranging from histopathology-confirmed diagnoses to expert consensus and published case solutions, introducing verification bias and subjective variability. Additional heterogeneity in prompting strategies, case presentation formats, lesion categorization schemes, and reported outcome definitions further limited cross-study comparability and precluded quantitative meta-analysis. These findings highlight the urgent need for standardized evaluation frameworks guided by emerging reporting standards such as STARD-AI and QUADAS-AI.</p>
</sec>
<sec id="s4b"><title>Real-world applicability and implementation challenges</title>
<p>Translation of experimental performance into real-world clinical deployment remains uncertain. Practical implementation barriers include workflow integration challenges, variable image quality, incomplete clinical histories, regulatory considerations, and the risk of model drift over time. Furthermore, routine oral diagnostic practice involves diagnostic uncertainty, multimorbidity, and atypical lesion presentations that are underrepresented in current evaluation datasets. As such, high benchmark accuracy should not be interpreted as evidence of chairside diagnostic reliability. Prospective, multicenter clinical validation studies will be essential to establish safety, generalizability, and clinical impact.</p>
</sec>
<sec id="s4c"><title>Clinical and educational implications</title>
<p>From a clinical and educational perspective, LLMs show promise as adjunctive decision-support tools, particularly in primary care and resource-limited settings where specialist access is constrained. They may assist clinicians in generating differential diagnoses, identifying red flags, and prioritizing further investigations. In dental education, LLMs may support diagnostic reasoning training and case-based learning. However, safeguards are required to mitigate automation bias, hallucination risk, and overreliance on model-generated output.</p>
</sec>
<sec id="s4d"><title>Future directions</title>
<p>Future research should prioritize prospective multicenter validation, blinded reader comparison studies, standardized benchmarking protocols, and the development of native multimodal architectures capable of jointly processing clinical text, photographs, radiographs, and histopathology. Additional emphasis should be placed on explainability, bias mitigation, and continuous external validation frameworks to support safe clinical translation.</p>
</sec>
</sec>
<sec id="s5" sec-type="conclusions"><title>Conclusions</title>
<p>Large language models represent a rapidly evolving class of decision-support tools with emerging potential in oral lesion assessment, particularly when multimodal clinical and visual inputs are available. However, current evidence remains heterogeneous, highly context dependent, and methodologically limited by non-uniform reference standards, curated datasets, and variable evaluation designs. Across most head-to-head comparisons, LLMs do not consistently match expert performance in primary diagnostic accuracy and should not be considered autonomous diagnostic systems at this stage. Accordingly, their current role is best positioned as adjunctive tools to support clinical reasoning, triage, and educational applications rather than as replacements for expert judgment. Continued methodological standardization, prospective validation, and real-world clinical evaluation are essential to define their future clinical utility.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability"><title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s11">Supplementary Material</xref>, further inquiries can be directed to the corresponding authors.</p>
</sec>
<sec id="s7" sec-type="author-contributions"><title>Author contributions</title>
<p>FH: Supervision, Investigation, Data curation, Writing &#x2013; review &#x0026; editing, Conceptualization, Methodology, Formal analysis, Validation, Writing &#x2013; original draft. MA: Methodology, Writing &#x2013; original draft, Investigation, Formal analysis, Writing &#x2013; review &#x0026; editing. MT: Formal analysis, Methodology, Validation, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. YA: Visualization, Methodology, Supervision, Formal analysis, Writing &#x2013; review &#x0026; editing, Writing &#x2013; original draft. SA: Methodology, Visualization, Writing &#x2013; review &#x0026; editing, Writing &#x2013; original draft. AA-B: Methodology, Writing &#x2013; original draft, Formal analysis, Data curation, Investigation, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<ack><title>Acknowledgments</title>
<p>The authors would like to thank all colleagues and institutions who provided support during the preparation of this review.</p>
</ack>
<sec id="s9" sec-type="COI-statement"><title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="ai-statement"><title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec id="s12" sec-type="disclaimer"><title>Publisher&#x0027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s11" sec-type="supplementary-material"><title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/froh.2026.1748450/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/froh.2026.1748450/full&#x0023;supplementary-material</ext-link></p>
<supplementary-material xlink:href="Table1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
<supplementary-material xlink:href="Table2.docx" id="SM2" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
</sec>
<ref-list><title>References</title>
<ref id="B1"><label>1.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Warnakulasuriya</surname> <given-names>S</given-names></name> <name><surname>Kujan</surname> <given-names>O</given-names></name> <name><surname>Aguirre-Urizar</surname> <given-names>JM</given-names></name> <name><surname>Bagan</surname> <given-names>JV</given-names></name> <name><surname>Gonz&#x00E1;lez-Moles</surname> <given-names>M</given-names></name> <name><surname>Kerr</surname> <given-names>AR</given-names></name><etal/></person-group> <article-title>Oral potentially malignant disorders: a consensus report from an international seminar on nomenclature and classification, convened by the WHO collaborating centre for oral cancer</article-title>. <source>Oral Dis</source>. (<year>2021</year>) <volume>27</volume>(<issue>8</issue>):<fpage>1862</fpage>&#x2013;<lpage>1880</lpage>. <pub-id pub-id-type="doi">10.1111/odi.13704</pub-id><pub-id pub-id-type="pmid">33128420</pub-id></mixed-citation></ref>
<ref id="B2"><label>2.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hassanein</surname> <given-names>FEA</given-names></name> <name><surname>Ahmed</surname> <given-names>Y</given-names></name> <name><surname>Maher</surname> <given-names>S</given-names></name> <name><surname>Barbary</surname> <given-names>AE</given-names></name> <name><surname>Abou-Bakr</surname> <given-names>A</given-names></name></person-group>. <article-title>Prompt-dependent performance of multimodal AI model in oral diagnosis: a comprehensive analysis of accuracy, narrative quality, calibration, and latency versus human experts</article-title>. <source>Sci Rep</source>. (<year>2025</year>) <volume>15</volume>(<issue>1</issue>):<fpage>37932</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-025-22979-z</pub-id><pub-id pub-id-type="pmid">41168327</pub-id></mixed-citation></ref>
<ref id="B3"><label>3.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chi</surname> <given-names>AC</given-names></name> <name><surname>Day</surname> <given-names>TA</given-names></name> <name><surname>Neville</surname> <given-names>BW</given-names></name></person-group>. <article-title>Oral cavity and oropharyngeal squamous cell carcinoma&#x2013;an update</article-title>. <source>CA Cancer J Clin</source>. (<year>2015</year>) <volume>65</volume>(<issue>5</issue>):<fpage>401</fpage>&#x2013;<lpage>421</lpage>. <pub-id pub-id-type="doi">10.3322/caac.21293</pub-id><pub-id pub-id-type="pmid">26215712</pub-id></mixed-citation></ref>
<ref id="B4"><label>4.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shield</surname> <given-names>KD</given-names></name> <name><surname>Ferlay</surname> <given-names>J</given-names></name> <name><surname>Jemal</surname> <given-names>A</given-names></name> <name><surname>Sankaranarayanan</surname> <given-names>R</given-names></name> <name><surname>Chaturvedi</surname> <given-names>AK</given-names></name> <name><surname>Bray</surname> <given-names>F</given-names></name><etal/></person-group> <article-title>The global incidence of lip, oral cavity, and pharyngeal cancers by subsite in 2012</article-title>. <source>CA Cancer J Clin</source>. (<year>2017</year>) <volume>67</volume>(<issue>1</issue>):<fpage>51</fpage>&#x2013;<lpage>64</lpage>. <pub-id pub-id-type="doi">10.3322/caac.21384</pub-id><pub-id pub-id-type="pmid">28076666</pub-id></mixed-citation></ref>
<ref id="B5"><label>5.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schwendicke</surname> <given-names>F</given-names></name> <name><surname>Samek</surname> <given-names>W</given-names></name> <name><surname>Krois</surname> <given-names>J</given-names></name></person-group>. <article-title>Artificial intelligence in dentistry: chances and challenges</article-title>. <source>J Dent Res</source>. (<year>2020</year>) <volume>99</volume>(<issue>7</issue>):<fpage>769</fpage>&#x2013;<lpage>774</lpage>. <pub-id pub-id-type="doi">10.1177/0022034520915714</pub-id><pub-id pub-id-type="pmid">32315260</pub-id></mixed-citation></ref>
<ref id="B6"><label>6.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>C</given-names></name> <name><surname>Wang</surname> <given-names>J</given-names></name> <name><surname>Wang</surname> <given-names>S</given-names></name> <name><surname>Zhang</surname> <given-names>Y</given-names></name></person-group>. <article-title>A review of deep learning in dentistry</article-title>. <source>Neurocomputing</source>. (<year>2023</year>) <volume>554</volume>:<fpage>126629</fpage>. <pub-id pub-id-type="doi">10.1016/j.neucom.2023.126629</pub-id></mixed-citation></ref>
<ref id="B7"><label>7.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Warin</surname> <given-names>K</given-names></name> <name><surname>Limprasert</surname> <given-names>W</given-names></name> <name><surname>Suebnukarn</surname> <given-names>S</given-names></name> <name><surname>Jinaporntham</surname> <given-names>S</given-names></name> <name><surname>Jantana</surname> <given-names>P</given-names></name> <name><surname>Vicharueang</surname> <given-names>S</given-names></name></person-group>. <article-title>AI-based analysis of oral lesions using novel deep convolutional neural networks for early detection of oral cancer</article-title>. <source>PLoS One</source>. (<year>2022</year>) <volume>17</volume>(<issue>8</issue>):<fpage>e0273508</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0273508</pub-id><pub-id pub-id-type="pmid">36001628</pub-id></mixed-citation></ref>
<ref id="B8"><label>8.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Warin</surname> <given-names>K</given-names></name> <name><surname>Suebnukarn</surname> <given-names>S</given-names></name></person-group>. <article-title>Deep learning in oral cancer- a systematic review</article-title>. <source>BMC Oral Health</source>. (<year>2024</year>) <volume>24</volume>(<issue>1</issue>):<fpage>212</fpage>. <pub-id pub-id-type="doi">10.1186/s12903-024-03993-5</pub-id><pub-id pub-id-type="pmid">38341571</pub-id></mixed-citation></ref>
<ref id="B9"><label>9.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Abou-Bakr</surname> <given-names>A</given-names></name> <name><surname>El Barbary</surname> <given-names>A</given-names></name> <name><surname>Hassanein</surname> <given-names>FEA</given-names></name></person-group>. <article-title>ChatGPT-5 vs oral medicine experts for rank-based differential diagnosis of oral lesions: a prospective, biopsy-validated comparison</article-title>. <source>Odontology</source>. (<year>2025</year>). <pub-id pub-id-type="doi">10.1007/s10266-025-01242-x</pub-id><pub-id pub-id-type="pmid">41247661</pub-id></mixed-citation></ref>
<ref id="B10"><label>10.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hassanein</surname> <given-names>FEA</given-names></name> <name><surname>Hussein</surname> <given-names>RR</given-names></name> <name><surname>Almalahy</surname> <given-names>HG</given-names></name> <name><surname>Sarhan</surname> <given-names>S</given-names></name> <name><surname>Ahmed</surname> <given-names>Y</given-names></name> <name><surname>Abou-Bakr</surname> <given-names>A</given-names></name></person-group>. <article-title>Vision-based diagnostic gain of ChatGPT-5 and Gemini 2.5 pro compared with human experts in oral lesion assessment</article-title>. <source>Sci Rep</source>. (<year>2025</year>) <volume>15</volume>:<fpage>43279</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-025-28862-1</pub-id><pub-id pub-id-type="pmid">41350570</pub-id></mixed-citation></ref>
<ref id="B11"><label>11.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Patil</surname> <given-names>S</given-names></name> <name><surname>Albogami</surname> <given-names>S</given-names></name> <name><surname>Hosmani</surname> <given-names>J</given-names></name> <name><surname>Mujoo</surname> <given-names>S</given-names></name> <name><surname>Kamil</surname> <given-names>MA</given-names></name> <name><surname>Mansour</surname> <given-names>MA</given-names></name><etal/></person-group> <article-title>Artificial intelligence in the diagnosis of oral diseases: applications and pitfalls</article-title>. <source>Diagnostics (Basel)</source>. (<year>2022</year>) <volume>12</volume>(<issue>5</issue>):<fpage>1029</fpage>. <pub-id pub-id-type="doi">10.3390/diagnostics12051029</pub-id><pub-id pub-id-type="pmid">35626185</pub-id></mixed-citation></ref>
<ref id="B12"><label>12.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mahmood</surname> <given-names>H</given-names></name> <name><surname>Shaban</surname> <given-names>M</given-names></name> <name><surname>Rajpoot</surname> <given-names>N</given-names></name> <name><surname>Khurram</surname> <given-names>SA</given-names></name></person-group>. <article-title>Artificial intelligence-based methods in head and neck cancer diagnosis: an overview</article-title>. <source>Br J Cancer</source>. (<year>2021</year>) <volume>124</volume>(<issue>12</issue>):<fpage>1934</fpage>&#x2013;<lpage>1940</lpage>. <pub-id pub-id-type="doi">10.1038/s41416-021-01386-x</pub-id><pub-id pub-id-type="pmid">33875821</pub-id></mixed-citation></ref>
<ref id="B13"><label>13.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Moor</surname> <given-names>M</given-names></name> <name><surname>Banerjee</surname> <given-names>O</given-names></name> <name><surname>Abad</surname> <given-names>ZSH</given-names></name> <name><surname>Krumholz</surname> <given-names>HM</given-names></name> <name><surname>Leskovec</surname> <given-names>J</given-names></name> <name><surname>Topol</surname> <given-names>EJ</given-names></name><etal/></person-group> <article-title>Foundation models for generalist medical artificial intelligence</article-title>. <source>Nature</source>. (<year>2023</year>) <volume>616</volume>(<issue>7956</issue>):<fpage>259</fpage>&#x2013;<lpage>265</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-023-05881-4</pub-id><pub-id pub-id-type="pmid">37045921</pub-id></mixed-citation></ref>
<ref id="B14"><label>14.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Thirunavukarasu</surname> <given-names>AJ</given-names></name> <name><surname>Ting</surname> <given-names>DSJ</given-names></name> <name><surname>Elangovan</surname> <given-names>K</given-names></name> <name><surname>Gutierrez</surname> <given-names>L</given-names></name> <name><surname>Tan</surname> <given-names>TF</given-names></name> <name><surname>Ting</surname> <given-names>DSW</given-names></name></person-group>. <article-title>Large language models in medicine</article-title>. <source>Nat Med</source>. (<year>2023</year>) <volume>29</volume>(<issue>8</issue>):<fpage>1930</fpage>&#x2013;<lpage>1940</lpage>. <pub-id pub-id-type="doi">10.1038/s41591-023-02448-8</pub-id><pub-id pub-id-type="pmid">37460753</pub-id></mixed-citation></ref>
<ref id="B15"><label>15.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bonny</surname> <given-names>T</given-names></name> <name><surname>Al Nassan</surname> <given-names>W</given-names></name> <name><surname>Obaideen</surname> <given-names>K</given-names></name> <name><surname>Al Mallahi</surname> <given-names>MN</given-names></name> <name><surname>Mohammad</surname> <given-names>Y</given-names></name> <name><surname>El-Damanhoury</surname> <given-names>HM</given-names></name></person-group>. <article-title>Contemporary role and applications of artificial intelligence in dentistry</article-title>. <source>F1000Res</source>. (<year>2023</year>) <volume>12</volume>:<fpage>1179</fpage>. <pub-id pub-id-type="doi">10.12688/f1000research.140204.1</pub-id><pub-id pub-id-type="pmid">37942018</pub-id></mixed-citation></ref>
<ref id="B16"><label>16.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Silva</surname> <given-names>TP</given-names></name> <name><surname>Andrade-Bortoletto</surname> <given-names>MFS</given-names></name> <name><surname>Ocampo</surname> <given-names>TSC</given-names></name> <name><surname>Alencar-Palha</surname> <given-names>C</given-names></name> <name><surname>Bornstein</surname> <given-names>MM</given-names></name> <name><surname>Oliveira-Santos</surname> <given-names>C</given-names></name><etal/></person-group> <article-title>Performance of a commercially available generative pre-trained transformer (GPT) in describing radiolucent lesions in panoramic radiographs and establishing differential diagnoses</article-title>. <source>Clin Oral Investig</source>. (<year>2024</year>) <volume>28</volume>(<issue>3</issue>):<fpage>204</fpage>. <pub-id pub-id-type="doi">10.1007/s00784-024-05587-5</pub-id><pub-id pub-id-type="pmid">38459362</pub-id></mixed-citation></ref>
<ref id="B17"><label>17.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tassoker</surname> <given-names>M</given-names></name></person-group>. <article-title>Exploring ChatGPT&#x2019;s potential in diagnosing oral and maxillofacial pathologies: a study of 123 challenging cases</article-title>. <source>BMC Oral Health</source>. (<year>2025</year>) <volume>25</volume>(<issue>1</issue>):<fpage>1187</fpage>. <pub-id pub-id-type="doi">10.1186/s12903-025-06444-x</pub-id><pub-id pub-id-type="pmid">40676533</pub-id></mixed-citation></ref>
<ref id="B18"><label>18.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tomo</surname> <given-names>S</given-names></name> <name><surname>Lechien</surname> <given-names>JR</given-names></name> <name><surname>Bueno</surname> <given-names>HS</given-names></name> <name><surname>Cantieri-Debortoli</surname> <given-names>DF</given-names></name> <name><surname>Simonato</surname> <given-names>LE</given-names></name></person-group>. <article-title>Accuracy and consistency of ChatGPT-3.5 and -4 in providing differential diagnoses in oral and maxillofacial diseases: a comparative diagnostic performance analysis</article-title>. <source>Clin Oral Investig</source>. (<year>2024</year>) <volume>28</volume>(<issue>10</issue>):<fpage>544</fpage>. <pub-id pub-id-type="doi">10.1007/s00784-024-05939-1</pub-id><pub-id pub-id-type="pmid">39316174</pub-id></mixed-citation></ref>
<ref id="B19"><label>19.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Diniz-Freitas</surname> <given-names>M</given-names></name> <name><surname>Diz-Dios</surname> <given-names>P</given-names></name></person-group>. <article-title>Deepseek: another step forward in the diagnosis of oral lesions</article-title>. <source>J Dent Sci</source>. (<year>2025</year>) <volume>20</volume>:<fpage>1904</fpage>&#x2013;<lpage>1907</lpage>. <pub-id pub-id-type="doi">10.1016/j.jds.2025.02.023</pub-id><pub-id pub-id-type="pmid">40654453</pub-id></mixed-citation></ref>
<ref id="B20"><label>20.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pradhan</surname> <given-names>P</given-names></name></person-group>. <article-title>Accuracy of ChatGPT 3.5, 4.0, 4o and Gemini in diagnosing oral potentially malignant lesions based on clinical case reports and image recognition</article-title>. <source>Med Oral Patol Oral Cir Bucal</source>. (<year>2025</year>) <volume>30</volume>(<issue>2</issue>):<fpage>e224</fpage>&#x2013;<lpage>e231</lpage>. <pub-id pub-id-type="doi">10.4317/medoral.26824</pub-id><pub-id pub-id-type="pmid">39864088</pub-id></mixed-citation></ref>
<ref id="B21"><label>21.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Maia-Lima</surname> <given-names>MP</given-names></name> <name><surname>de Medeiros Carvalho</surname> <given-names>LI</given-names></name> <name><surname>de Ara&#x00FA;jo</surname> <given-names>EG</given-names></name> <name><surname>Martins</surname> <given-names>HD</given-names></name> <name><surname>Machado</surname> <given-names>RA</given-names></name> <name><surname>Sobrinho</surname> <given-names>LM</given-names></name><etal/></person-group> <article-title>Performance of a virtual assistant based on ChatGPT-4 in the diagnosis of syndromes with orofacial manifestations</article-title>. <source>Oral Surg Oral Med Oral Pathol Oral Radiol</source>. (<year>2025</year>) <volume>140</volume>(<issue>3</issue>):<fpage>322</fpage>&#x2013;<lpage>329</lpage>. <pub-id pub-id-type="doi">10.1016/j.oooo.2025.04.002</pub-id><pub-id pub-id-type="pmid">40340214</pub-id></mixed-citation></ref>
<ref id="B22"><label>22.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Su&#x00E1;rez</surname> <given-names>A</given-names></name> <name><surname>Freire</surname> <given-names>Y</given-names></name> <name><surname>Su&#x00E1;rez</surname> <given-names>M</given-names></name> <name><surname>D&#x00ED;az-Flores Garc&#x00ED;a</surname> <given-names>V</given-names></name> <name><surname>Andreu-V&#x00E1;zquez</surname> <given-names>C</given-names></name> <name><surname>Thuissard Vasallo</surname> <given-names>IJ</given-names></name><etal/></person-group> <article-title>Diagnostic performance of multimodal large language models in the analysis of oral pathology</article-title>. <source>Oral Dis</source>. (<year>2025</year>) <volume>140</volume>:<fpage>322</fpage>&#x2013;<lpage>329</lpage>. <pub-id pub-id-type="doi">10.1111/odi.70009</pub-id></mixed-citation></ref>
<ref id="B23"><label>23.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Diniz-Freitas</surname> <given-names>M</given-names></name> <name><surname>Lago-M&#x00E9;ndez</surname> <given-names>L</given-names></name> <name><surname>Limeres-Posse</surname> <given-names>J</given-names></name> <name><surname>Diz-Dios</surname> <given-names>P</given-names></name></person-group>. <article-title>Challenging ChatGPT-4V for the diagnosis of oral diseases and conditions</article-title>. <source>Oral Dis</source>. (<year>2024</year>) <volume>31</volume>(<issue>2</issue>):<fpage>701</fpage>&#x2013;<lpage>706</lpage>. <pub-id pub-id-type="doi">10.1111/odi.15169</pub-id><pub-id pub-id-type="pmid">39450689</pub-id></mixed-citation></ref>
<ref id="B24"><label>24.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schmidl</surname> <given-names>B</given-names></name> <name><surname>H&#x00FC;tten</surname> <given-names>T</given-names></name> <name><surname>Pigorsch</surname> <given-names>S</given-names></name> <name><surname>St&#x00F6;gbauer</surname> <given-names>F</given-names></name> <name><surname>Hoch</surname> <given-names>CC</given-names></name> <name><surname>Hussain</surname> <given-names>T</given-names></name><etal/></person-group> <article-title>Artificial intelligence for image recognition in diagnosing oral and oropharyngeal cancer and leukoplakia</article-title>. <source>Sci Rep</source>. (<year>2025</year>) <volume>15</volume>(<issue>1</issue>):<fpage>3625</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-025-85920-4</pub-id><pub-id pub-id-type="pmid">39880876</pub-id></mixed-citation></ref>
<ref id="B25"><label>25.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hassanein</surname> <given-names>FEA</given-names></name> <name><surname>El Barbary</surname> <given-names>A</given-names></name> <name><surname>Hussein</surname> <given-names>RR</given-names></name> <name><surname>Ahmed</surname> <given-names>Y</given-names></name> <name><surname>El-Guindy</surname> <given-names>J</given-names></name> <name><surname>Sarhan</surname> <given-names>S</given-names></name><etal/></person-group> <article-title>Diagnostic performance of ChatGPT-4o and DeepSeek-3 differential diagnosis of complex oral lesions: a multimodal imaging and case difficulty analysis</article-title>. <source>Oral Dis</source>. (<year>2025</year>). <pub-id pub-id-type="doi">10.1111/odi.70007</pub-id><pub-id pub-id-type="pmid">40589366</pub-id></mixed-citation></ref>
<ref id="B26"><label>26.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>AlFarabi Ali</surname> <given-names>S</given-names></name> <name><surname>AlDehlawi</surname> <given-names>H</given-names></name> <name><surname>Jazzar</surname> <given-names>A</given-names></name> <name><surname>Ashi</surname> <given-names>H</given-names></name> <name><surname>Esam Abuzinadah</surname> <given-names>N</given-names></name> <name><surname>AlOtaibi</surname> <given-names>M</given-names></name><etal/></person-group> <article-title>The diagnostic performance of large language models and oral medicine consultants for identifying oral lesions in text-based clinical scenarios: prospective comparative study</article-title>. <source>JMIR AI</source>. (<year>2025</year>) <volume>4</volume>(<issue>1</issue>):<fpage>e70566</fpage>. <pub-id pub-id-type="doi">10.2196/70566</pub-id><pub-id pub-id-type="pmid">40605790</pub-id></mixed-citation></ref>
<ref id="B27"><label>27.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rewthamrongsris</surname> <given-names>P</given-names></name> <name><surname>Burapacheep</surname> <given-names>J</given-names></name> <name><surname>Phattarataratip</surname> <given-names>E</given-names></name> <name><surname>Kulthanaamondhita</surname> <given-names>P</given-names></name> <name><surname>Tichy</surname> <given-names>A</given-names></name> <name><surname>Schwendicke</surname> <given-names>F</given-names></name><etal/></person-group> <article-title>Image-based diagnostic performance of LLMs vs CNNs for oral lichen planus: example-guided and differential diagnosis</article-title>. <source>Int Dent J</source>. (<year>2025</year>) <volume>75</volume>(<issue>4</issue>):<fpage>100848</fpage>. <pub-id pub-id-type="doi">10.1016/j.identj.2025.100848</pub-id><pub-id pub-id-type="pmid">40482575</pub-id></mixed-citation></ref>
<ref id="B28"><label>28.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>S</given-names></name> <name><surname>Sun</surname> <given-names>W</given-names></name> <name><surname>Mi</surname> <given-names>D</given-names></name> <name><surname>Jin</surname> <given-names>S</given-names></name> <name><surname>Wu</surname> <given-names>X</given-names></name> <name><surname>Xin</surname> <given-names>B</given-names></name><etal/></person-group> <article-title>Artificial intelligence diagnosing of oral lichen planus: a comparative study</article-title>. <source>Bioengineering</source>. (<year>2024</year>) <volume>11</volume>:<fpage>1159</fpage>. <pub-id pub-id-type="doi">10.3390/bioengineering11111159</pub-id><pub-id pub-id-type="pmid">39593819</pub-id></mixed-citation></ref>
<ref id="B29"><label>29.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Danesh</surname> <given-names>A</given-names></name> <name><surname>Danesh</surname> <given-names>A</given-names></name> <name><surname>Danesh</surname> <given-names>F</given-names></name></person-group>. <article-title>Advancing dental diagnostics with OpenAI&#x2019;s o1-preview: a follow-up evaluation of ChatGPT&#x2019;s performance on diagnostic challenges</article-title>. <source>J Am Dent Assoc</source>. (<year>2025</year>) <volume>156</volume>(<issue>7</issue>):<fpage>555</fpage>&#x2013;<lpage>562.e553</lpage>. <pub-id pub-id-type="doi">10.1016/j.adaj.2025.04.003</pub-id><pub-id pub-id-type="pmid">40633994</pub-id></mixed-citation></ref>
<ref id="B30"><label>30.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cuevas-Nunez</surname> <given-names>M</given-names></name> <name><surname>Silberberg</surname> <given-names>VIA</given-names></name> <name><surname>Arregui</surname> <given-names>M</given-names></name> <name><surname>Jham</surname> <given-names>BC</given-names></name> <name><surname>Ballester-Victoria</surname> <given-names>R</given-names></name> <name><surname>Koptseva</surname> <given-names>I</given-names></name><etal/></person-group> <article-title>Diagnostic performance of ChatGPT-4.0 in histopathological description analysis of oral and maxillofacial lesions: a comparative study with pathologists</article-title>. <source>Oral Surg Oral Med Oral Pathol Oral Radiol</source>. (<year>2025</year>) <volume>139</volume>(<issue>4</issue>):<fpage>453</fpage>&#x2013;<lpage>461</lpage>. <pub-id pub-id-type="doi">10.1016/j.oooo.2024.11.087</pub-id><pub-id pub-id-type="pmid">39709300</pub-id></mixed-citation></ref>
<ref id="B31"><label>31.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kaygisiz</surname> <given-names>&#x00D6;F</given-names></name> <name><surname>Teke</surname> <given-names>MT</given-names></name></person-group>. <article-title>Can DeepSeek and ChatGPT be used in the diagnosis of oral pathologies?</article-title> <source>BMC Oral Health</source>. (<year>2025</year>) <volume>25</volume>(<issue>1</issue>):<fpage>638</fpage>. <pub-id pub-id-type="doi">10.1186/s12903-025-06034-x</pub-id><pub-id pub-id-type="pmid">40281436</pub-id></mixed-citation></ref>
<ref id="B32"><label>32.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>J</given-names></name> <name><surname>Liang</surname> <given-names>X</given-names></name> <name><surname>Fang</surname> <given-names>D</given-names></name> <name><surname>Zheng</surname> <given-names>J</given-names></name> <name><surname>Yin</surname> <given-names>C</given-names></name> <name><surname>Xie</surname> <given-names>H</given-names></name><etal/></person-group> <article-title>The diagnostic ability of GPT-3.5 and GPT-4.0 in surgery: comparative analysis</article-title>. <source>J Med Internet Res</source>. (<year>2024</year>) <volume>26</volume>:<fpage>e54985</fpage>. <pub-id pub-id-type="doi">10.2196/54985</pub-id><pub-id pub-id-type="pmid">39255016</pub-id></mixed-citation></ref>
<ref id="B33"><label>33.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Currie</surname> <given-names>G</given-names></name> <name><surname>Robbie</surname> <given-names>S</given-names></name> <name><surname>Tually</surname> <given-names>P</given-names></name></person-group>. <article-title>Chatgpt and patient information in nuclear medicine: GPT-3.5 versus GPT-4</article-title>. <source>J Nucl Med Technol</source>. (<year>2023</year>) <volume>51</volume>(<issue>4</issue>):<fpage>307</fpage>&#x2013;<lpage>313</lpage>. <pub-id pub-id-type="doi">10.2967/jnmt.123.266151</pub-id><pub-id pub-id-type="pmid">37699647</pub-id></mixed-citation></ref>
<ref id="B34"><label>34.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>Z</given-names></name> <name><surname>Nalley</surname> <given-names>A</given-names></name> <name><surname>Hao</surname> <given-names>J</given-names></name> <name><surname>Ai</surname> <given-names>QY</given-names></name> <name><surname>Yeung</surname> <given-names>AW</given-names></name> <name><surname>Tanaka</surname> <given-names>R</given-names></name><etal/></person-group> <article-title>The performance of large language models in dentomaxillofacial radiology: a systematic review</article-title>. <source>Dentomaxillofac Radiol</source>. (<year>2025</year>) <volume>54</volume>:<fpage>613</fpage>&#x2013;<lpage>631</lpage>. <pub-id pub-id-type="doi">10.1093/dmfr/twaf060</pub-id><pub-id pub-id-type="pmid">40796316</pub-id></mixed-citation></ref>
<ref id="B35"><label>35.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Panwar</surname> <given-names>P</given-names></name> <name><surname>Gupta</surname> <given-names>S</given-names></name></person-group>. <article-title>A review: exploring the role of ChatGPT in the diagnosis and treatment of oral pathologies</article-title>. <source>Oral Oncol Rep</source>. (<year>2024</year>) <volume>10</volume>:<fpage>100225</fpage>. <pub-id pub-id-type="doi">10.1016/j.oor.2024.100225</pub-id></mixed-citation></ref>
<ref id="B36"><label>36.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rokhshad</surname> <given-names>R</given-names></name> <name><surname>Mohammad-Rahimi</surname> <given-names>H</given-names></name> <name><surname>Price</surname> <given-names>JB</given-names></name> <name><surname>Shoorgashti</surname> <given-names>R</given-names></name> <name><surname>Abbasiparashkouh</surname> <given-names>Z</given-names></name> <name><surname>Esmaeili</surname> <given-names>M</given-names></name><etal/></person-group> <article-title>Artificial intelligence for classification and detection of oral mucosa lesions on photographs: a systematic review and meta-analysis</article-title>. <source>Clin Oral Investig</source>. (<year>2024</year>) <volume>28</volume>(<issue>1</issue>):<fpage>88</fpage>. <pub-id pub-id-type="doi">10.1007/s00784-023-05475-4</pub-id><pub-id pub-id-type="pmid">38217733</pub-id></mixed-citation></ref>
<ref id="B37"><label>37.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Di Fede</surname> <given-names>O</given-names></name> <name><surname>La Mantia</surname> <given-names>G</given-names></name> <name><surname>Parola</surname> <given-names>M</given-names></name> <name><surname>Maniscalco</surname> <given-names>L</given-names></name> <name><surname>Matranga</surname> <given-names>D</given-names></name> <name><surname>Tozzo</surname> <given-names>P</given-names></name><etal/></person-group> <article-title>Automated detection of oral malignant lesions using deep learning: scoping review and meta-analysis</article-title>. <source>Oral Dis</source>. (<year>2025</year>) <volume>31</volume>(<issue>4</issue>):<fpage>1054</fpage>&#x2013;<lpage>1064</lpage>. <pub-id pub-id-type="doi">10.1111/odi.15188</pub-id><pub-id pub-id-type="pmid">39489724</pub-id></mixed-citation></ref>
<ref id="B38"><label>38.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Danesh</surname> <given-names>A</given-names></name> <name><surname>Danesh</surname> <given-names>A</given-names></name> <name><surname>Danesh</surname> <given-names>F</given-names></name></person-group>. <article-title>Innovating dental diagnostics: ChatGPT&#x0027;s accuracy on diagnostic challenges</article-title>. <source>Oral Dis</source>. (<year>2024</year>) <volume>31</volume>(<issue>3</issue>):<fpage>911</fpage>&#x2013;<lpage>917</lpage>. <pub-id pub-id-type="doi">10.1111/odi.15082</pub-id><pub-id pub-id-type="pmid">39039720</pub-id></mixed-citation></ref></ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by"><p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2845235/overview">Rodrigo Resende</ext-link>, Fluminense Federal University, Brazil</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by"><p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/742655/overview">Ali-Farid Safi</ext-link>, Craniologicum&#x2014;Center for Craniomaxillofacial Surgery, Switzerland</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3006106/overview">Shaul Hameed Kolarkodi</ext-link>, Qassim University, Saudi Arabia</p></fn>
</fn-group>
</back>
</article>