<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article article-type="research-article" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Oral Health</journal-id><journal-title-group>
<journal-title>Frontiers in Oral Health</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Oral Health</abbrev-journal-title></journal-title-group>
<issn pub-type="epub">2673-4842</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/froh.2025.1737114</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Performance of five free large language models in dental trauma: a 30-day longitudinal benchmark study</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author"><name><surname>Lisboa</surname><given-names>Rafaela Mancini</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Braido</surname><given-names>Arian</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>de-Jesus-Soares</surname><given-names>Adriana</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Tewari</surname><given-names>Nitesh</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/1827777/overview"/><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Soares</surname><given-names>Carlos Jos&#x00E9;</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author" corresp="yes"><name><surname>Paranhos</surname><given-names>Luiz Renato</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x002A;</xref><uri xlink:href="https://loop.frontiersin.org/people/996053/overview" /><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Vieira</surname><given-names>Walbert A.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/1633967/overview"/><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Departament of Dentistry, Centro Universit&#x00E1;rio das Faculdades Associadas de Ensino &#x2013; UNIFAE</institution>, <city>S&#x00E3;o Jo&#x00E3;o da Boa Vista</city>, <country country="br">Brazil</country></aff>
<aff id="aff2"><label>2</label><institution>Division of Endodontics, Department of Restorative Dentistry, Piracicaba Dental School, Universidade Estadual de Campinas - UNICAMP</institution>, <city>Piracicaba</city>, <country country="br">Brazil</country></aff>
<aff id="aff3"><label>3</label><institution>Division of Pediatric and Preventive Dentistry, Centre for Dental Education and Research, All India Institute of Medical Sciences</institution>, <city>Delhi</city>, <country country="in">India</country></aff>
<aff id="aff4"><label>4</label><institution>Department of Operative Dentistry and Dental Materials, School of Dentistry, Universidade Federal de Uberl&#x00E2;ndia</institution>, <city>Uberl&#x00E2;ndia</city>, <country country="br">Brazil</country></aff>
<aff id="aff5"><label>5</label><institution>Department of Orthodontics, Universidade Federal de Uberl&#x00E2;ndia</institution>, <city>Uberl&#x00E2;ndia</city>, <country country="br">Brazil</country></aff>
<author-notes>
<corresp id="cor1"><label>&#x002A;</label><bold>Correspondence:</bold> Luiz Renato Paranhos <email xlink:href="mailto:paranhos.lrp@gmail.com">paranhos.lrp@gmail.com</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2025-12-15"><day>15</day><month>12</month><year>2025</year></pub-date>
<pub-date publication-format="electronic" date-type="collection"><year>2025</year></pub-date>
<volume>6</volume><elocation-id>1737114</elocation-id>
<history>
<date date-type="received"><day>01</day><month>11</month><year>2025</year></date>
<date date-type="rev-recd"><day>28</day><month>11</month><year>2025</year></date>
<date date-type="accepted"><day>30</day><month>11</month><year>2025</year></date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2025 Lisboa, Braido, de-Jesus-Soares, Tewari, Soares, Paranhos and Vieira.</copyright-statement>
<copyright-year>2025</copyright-year><copyright-holder>Lisboa, Braido, de-Jesus-Soares, Tewari, Soares, Paranhos and Vieira</copyright-holder><license><ali:license_ref start_date="2025-12-15">https://creativecommons.org/licenses/by/4.0/</ali:license_ref><license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p></license>
</permissions>
<abstract><sec><title>Objective</title>
<p>To compare the accuracy and consistency of five large language models (LLMs) in generating responses about dental trauma.</p>
</sec><sec><title>Materials and methods</title>
<p>Sixty dichotomous (true/false) questions were submitted daily to each LLM (ChatGPT, Google Gemini, Microsoft Copilot, DeepSeek, and Meta AI) for 30 days, totaling 18,000 responses. All interactions were performed under two prompting conditions (zero-shot and zero-shot with context). LLM responses were compared against the International Association of Dental Traumatology (IADT) guidelines. Statistical analysis was conducted using a generalized linear mixed model (GLMM) with a binomial distribution (<italic>&#x03B1;</italic>&#x2009;&#x003D;&#x2009;0.05), alongside calculation of sensitivity, specificity, accuracy, and area under the ROC curve (AUC) based on the 60-item set. Temporal stability was assessed using the intraclass correlation coefficient ICC.</p>
</sec><sec><title>Results</title>
<p>All LLMs achieved accuracy above 85&#x0025;, with Microsoft Copilot (91.1&#x0025;) and DeepSeek (90&#x0025;) performing best; no significant difference was observed between them (<italic>p</italic>&#x2009;&#x003E;&#x2009;0.05), but both outperformed the other models (<italic>p</italic>&#x2009;&#x003C;&#x2009;0.05). DeepSeek and Microsoft Copilot also showed the highest consistency over 30 days (ICC&#x2009;&#x003E;&#x2009;0.90).</p>
</sec><sec><title>Conclusion</title>
<p>All evaluated LLMs, particularly Copilot and DeepSeek, demonstrated high accuracy in providing information on dental trauma, with stable performance over time. While the use of a context prompt did not significantly affect accuracy or stability.</p>
</sec>
</abstract>
<kwd-group>
<kwd>artificial intelligence</kwd>
<kwd>chatbot</kwd>
<kwd>dental trauma</kwd>
<kwd>large language models</kwd>
<kwd>traumatic dental injuries</kwd>
</kwd-group><funding-group><award-group id="gs1"><funding-source id="sp1"><institution-wrap><institution>Conselho Nacional de Desenvolvimento Cient&#x00ED;fico e Tecnol&#x00F3;gico</institution><institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open_funder_registry">10.13039/501100003593</institution-id></institution-wrap></funding-source></award-group><award-group id="gs2"><funding-source id="sp2"><institution-wrap><institution>Funda&#x00E7;&#x00E3;o de Amparo &#x00E0; Pesquisa do Estado de Minas Gerais</institution><institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open_funder_registry">10.13039/501100004901</institution-id></institution-wrap></funding-source></award-group><funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This study was financed in part by the Coordena&#x00E7;&#x00E3;o de Aperfei&#x00E7;oamento de Pessoal de N&#x00ED;vel Superior - Brazil (CAPES) - Finance Code 001. We are thankful for the support of Conselho Nacional de Desenvolvimento Cient&#x00ED;fico e Tecnol&#x00F3;gico - Brazil (CNPq, INCT 406840/2022-9 and CNPq grant number 305356/2024-0) and of Funda&#x00E7;&#x00E3;o de Amparo &#x00E0; Pesquisa do Estado de Minas Gerais - Brazil (FAPEMIG, APQ-02105-18 and RED-00204-23).</funding-statement></funding-group><counts>
<fig-count count="3"/>
<table-count count="2"/><equation-count count="0"/><ref-count count="54"/><page-count count="9"/><word-count count="0"/></counts><custom-meta-group><custom-meta><meta-name>section-at-acceptance</meta-name><meta-value>Oral Health Promotion</meta-value></custom-meta></custom-meta-group>
</article-meta>
</front>
<body><sec id="s1" sec-type="intro"><label>1</label><title>Introduction</title>
<p>Large language models (LLMs) are AI systems that generate human-like responses using natural language processing and deep neural networks (<xref ref-type="bibr" rid="B1">1</xref>). Unlike search engines, they deliver information conversationally, aiding understanding of complex topics (<xref ref-type="bibr" rid="B2">2</xref>).</p>
<p>The popularization of LLMs began in late 2022 with the public release of ChatGPT by OpenAI (<xref ref-type="bibr" rid="B3">3</xref>). Since then, their use as accessible sources of medical guidance has been considered promising (<xref ref-type="bibr" rid="B4">4</xref>&#x2013;<xref ref-type="bibr" rid="B7">7</xref>). However, limitations such as hallucinations (incorrect but convincing answers) and bias in training data remain significant challenges (<xref ref-type="bibr" rid="B8">8</xref>).</p>
<p>Dental trauma accounts for a considerable proportion of dental emergencies (<xref ref-type="bibr" rid="B9">9</xref>, <xref ref-type="bibr" rid="B10">10</xref>) and is recognized as a global public health problem (<xref ref-type="bibr" rid="B11">11</xref>, <xref ref-type="bibr" rid="B12">12</xref>). Its severity varies, causing pain, functional, phonetic, and esthetic impairment, and may ultimately lead to tooth loss (<xref ref-type="bibr" rid="B13">13</xref>, <xref ref-type="bibr" rid="B14">14</xref>). Immediate management is critical for prognosis (<xref ref-type="bibr" rid="B15">15</xref>, <xref ref-type="bibr" rid="B16">16</xref>), yet misinformation among both professionals and laypeople remains a barrier (<xref ref-type="bibr" rid="B17">17</xref>&#x2013;<xref ref-type="bibr" rid="B19">19</xref>). Limited clinical exposure during undergraduate training, combined with controversial and weakly evidence-based recommendations, further increases the risk of mismanagement (<xref ref-type="bibr" rid="B20">20</xref>, <xref ref-type="bibr" rid="B21">21</xref>).</p>
<p>To address these issues, treatment guidelines and professional orientation platforms have been developed to disseminate knowledge and standardize management worldwide (<xref ref-type="bibr" rid="B22">22</xref>&#x2013;<xref ref-type="bibr" rid="B25">25</xref>). Given the potential of LLMs as tools for health information dissemination, several studies have already evaluated their accuracy in the context of dental trauma (<xref ref-type="bibr" rid="B26">26</xref>) and general dentistry (<xref ref-type="bibr" rid="B27">27</xref>&#x2013;<xref ref-type="bibr" rid="B31">31</xref>). In past 2 years, several research groups (<xref ref-type="bibr" rid="B32">32</xref>&#x2013;<xref ref-type="bibr" rid="B36">36</xref>) have tested the adequacy of responses of different LLMs with variability in the number and types of questions asked, the LLM models tested, and the outcome variables assessed. One study assessed Gemini using questions derived from the European Society of Endodontology (ESE) guidelines, reporting an accuracy of 80.8&#x0025; (<xref ref-type="bibr" rid="B35">35</xref>). Another compared six chatbots&#x2014;ChatGPT 3.5, ChatGPT 4.0, Gemini, Copilot, Perplexity, and ChatGPT 4.0 Plus&#x2014;across 972 interactions with 18 questions on tooth avulsion (<xref ref-type="bibr" rid="B33">33</xref>). ChatGPT 4.0 Plus achieved the highest accuracy (95.6&#x0025;), whereas Perplexity showed the lowest (67.2&#x0025;) (<xref ref-type="bibr" rid="B33">33</xref>). Despite their relevance, these studies did not assess the longitudinal consistency of responses and excluded newer models such as ChatGPT-4o, DeepSeek 3.0, and MetaAI (<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B35">35</xref>).</p>
<p>With the rapid evolution of LLM algorithms and architectures, continuous updating of these data is essential, incorporating more recent and robust models. Therefore, the primary objective of this study was to compare the accuracy and longitudinal consistency (30 days) of five modern, freely accessible LLMs (Copilot, DeepSeek, ChatGPT-4o, Gemini, and MetaAI) as sources of information on dental trauma, using IADT guidelines as the gold standard. The secondary objective was to assess whether interaction strategies (zero-shot vs. few-shot learning) influence their performance.</p>
</sec>
<sec id="s2" sec-type="methods"><label>2</label><title>Materials and methods</title>
<p>This observational longitudinal study did not involve human subjects or identifiable data. Ethics approval was therefore not required. Reporting followed TRIPOD-LLM guidelines (<xref ref-type="bibr" rid="B37">37</xref>) (<xref ref-type="sec" rid="s11">Supplementary Material 1</xref>) and the CHART statement (<xref ref-type="bibr" rid="B38">38</xref>).</p>
<sec id="s2a"><label>2.1</label><title>Benchmark development</title>
<p>Two endodontists (WAV and AB) with five years&#x0027; dental trauma experience developed a 60-question true/false benchmark. Items covered diagnosis and management of traumatic dental injuries in primary and permanent teeth, based on the latest IADT guidelines (<xref ref-type="sec" rid="s11">Supplementary Material 2</xref>). <italic>Ground-truth</italic> answers were set by consensus between the two dentists.</p>
<p>The IADT guidelines were selected as the primary reference because they represent expert consensus and evidence-based recommendations in dental trauma. Three external experts (two endodontists, one pediatric dentist; &#x003E;5 years of experience) validated the benchmark questions for clarity and objectivity. Questions targeted key clinical decisions, differential diagnoses, complications, and emergency care. The dichotomous format minimized ambiguity and allowed objective accuracy measurement. The benchmark enabled both cross-sectional and longitudinal comparisons.</p>
</sec>
<sec id="s2b"><label>2.2</label><title>Evaluation protocols</title>
<p>For each interaction, new accounts were created for the five different LLMs included in this study, selected for their wide free availability and diverse architectures (<xref ref-type="table" rid="T1">Table&#x00A0;1</xref>). All models were accessed through the Google Chrome browser (no active history, cache, or logged-in chat history) to prevent any influence from prior interactions. All interactions were conducted in Portuguese to ensure linguistic consistency and to assess performance in a non-English clinical context. Testing took place between February and March 2025, and no frozen or version-locked API endpoints were available during data collection; responses therefore reflect the publicly deployed model versions at that time.</p>
<table-wrap id="T1" position="float"><label>Table&#x00A0;1</label>
<caption><p>Specifications of the LLMs used in this study.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Model</th>
<th valign="top" align="center">Platafor</th>
<th valign="top" align="center">Access type</th>
<th valign="top" align="center">Architecture</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">ChatGPT-4o</td>
<td valign="top" align="left">OpenAI (<ext-link ext-link-type="uri" xlink:href="https://www.chat.openai.com">https://www.chat.openai.com</ext-link>)</td>
<td valign="top" align="left">Free/Proprietary</td>
<td valign="top" align="left">Transformer, Instruct-tuned</td>
</tr>
<tr>
<td valign="top" align="left">Gemini 2.5 Flash</td>
<td valign="top" align="left">Google (<ext-link ext-link-type="uri" xlink:href="https://www.gemini.google.com">https://www.gemini.google.com</ext-link>)</td>
<td valign="top" align="left">Free/Proprietary</td>
<td valign="top" align="left">Transformer multimodal</td>
</tr>
<tr>
<td valign="top" align="left">Microsoft Copilot</td>
<td valign="top" align="left">Microsoft (<ext-link ext-link-type="uri" xlink:href="https://www.copilot.microsoft.com">https://www.copilot.microsoft.com</ext-link>)</td>
<td valign="top" align="left">Free/Integrated</td>
<td valign="top" align="left">GPT-4 based</td>
</tr>
<tr>
<td valign="top" align="left">DeepSeek 3.0</td>
<td valign="top" align="left">DeepSeek (<ext-link ext-link-type="uri" xlink:href="https://www.deepseek.com">https://www.deepseek.com</ext-link>)</td>
<td valign="top" align="left">Free/Open API</td>
<td valign="top" align="left">Chinese LLM (bilingual)</td>
</tr>
<tr>
<td valign="top" align="left">Meta AI (LLaMA 4)</td>
<td valign="top" align="left">Meta (<ext-link ext-link-type="uri" xlink:href="https://www.meta.ai">https://www.meta.ai</ext-link>)</td>
<td valign="top" align="left">Free/Proprietary</td>
<td valign="top" align="left">LLaMA-based, fine-tuned</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>A single trained researcher (RML) generated 60 responses daily for 30 consecutive days, always in the late afternoon (4&#x2013;5 p.m.), to minimize potential performance variability due to server load or update cycles. A fresh conversation window was initiated before each testing block, and session data were cleared to ensure that no memory from previous interactions was retained. Models were tested under two conditions:
<list list-type="simple">
<list-item>
<p>Zero-shot: questions presented directly, without context (e.g., &#x201C;An avulsed primary tooth should never be replanted. True or false?&#x201D;).</p></list-item>
<list-item>
<p>Zero-shot with context: questions preceded by the instruction &#x201C;Answer as a dentist, following the most recent IADT guidelines. An avulsed primary tooth should never be replanted. True or false?&#x201D;</p></list-item>
</list>To fully isolate the two conditions, the contextual interactions were always conducted in a new chat, independent from the zero-shot session of the same day.</p>
<p>This approach simulated two usage scenarios: a lay user and a professional familiar with prompt engineering. It allowed us to observe the impact of calibration on model behavior (<italic>prompt conditioning</italic>). In total, 18,000 interactions were collected (60 questions&#x2009;&#x00D7;&#x2009;5 models&#x2009;&#x00D7;&#x2009;2 conditions&#x2009;&#x00D7;&#x2009;30 days). Responses were stored in a purpose-built Excel spreadsheet (Microsoft, Redmond, USA) and compared with the reference key. The same researcher who generated the responses also tabulated the data. A second researcher, blinded to the chatbot and not involved in data collection, evaluated each response as &#x201C;correct&#x201D; or &#x201C;incorrect&#x201D; based on the predefined <italic>ground truth</italic>.</p>
</sec>
<sec id="s2c"><label>2.3</label><title>Statistical analysis</title>
<p>Data were analyzed in R (v4.5, R Core Team, Vienna, Austria). Sensitivity, specificity, and AUC were computed from the pooled binary responses obtained across all testing days. This aggregation was used to represent the overall diagnostic performance of each model. Accuracy and 95&#x0025; confidence intervals (95&#x0025; CI) were estimated using the 60 unique items as the unit of analysis. For each Model&#x2009;&#x00D7;&#x2009;Prompt cell, accuracy was first computed at the item level (proportion of correct responses for each of the 60 items across the 30 repeated interactions). The overall accuracy for each cell was then obtained as the means of these 60 item-level accuracies. Precision and 95&#x0025; CI were quantified using the standard error of a proportion based on 60 items.</p>
<p>Chatbot accuracy was compared using a Generalized Linear Mixed Model (GLMM) with binomial distribution and logistic link. Fixed effects included <italic>LLM</italic> (five levels) and <italic>Prompt</italic> (two levels), as well as their interaction (<italic>LLM</italic>&#x2009;<italic>&#x00D7;</italic>&#x2009;<italic>Prompt</italic>), forming a 5&#x2009;&#x00D7;&#x2009;2 factorial design. Random effects included questions (to account for variation in topics) and day (to account for temporal variation). Differences were tested with Wald &#x03C7;<sup>2</sup> and likelihood ratio tests (LRT). <italic>Post hoc</italic> pairwise comparisons were conducted using Tukey-adjusted contrasts, and results were reported as odds ratios (OR) with 95&#x0025; CIs. Effect sizes were computed using Cohen&#x0027;s h, appropriate for proportional data, and interpreted as small (0.20), medium (0.50), or large (0.80) according to Cohen (<xref ref-type="bibr" rid="B39">39</xref>). Finally, A <italic>post hoc</italic> power analysis was performed using the standard errors of the GLMM fixed effects to estimate the minimum effect detectable with 80&#x0025; power at <italic>&#x03B1;</italic>&#x2009;&#x003D;&#x2009;0.05. For each fixed effect (<italic>Model, Prompt, and Model</italic>&#x2009;<italic>&#x00D7;</italic>&#x2009;<italic>Prompt</italic>), the minimum detectable log-odds difference was computed as 2.8&#x2009;&#x00D7;&#x2009;SE and converted to odds ratios. These values were compared with the magnitude of observed effects to determine whether the study had sufficient power to detect realistic differences among LLMs.</p>
<p>Day-to-day agreement was evaluated using the intraclass correlation coefficient (ICC) treating items as targets and days as raters (two-way random effects, absolute agreement, single measures) and its 95&#x0025; CI. ICC values &#x2265;0.75 were interpreted as good reliability, 0.5&#x2013;0.75 as moderate, and &#x003C;0.5 as poor stability.</p>
</sec>
</sec>
<sec id="s3" sec-type="results"><label>3</label><title>Results</title>
<p>A total of 18,000 responses were generated, and none was excluded from analysis. Copilot showed the highest accuracy [zero-shot: 0.91 (95&#x0025; CI: 0.84;0.98); with context: 0.91 (95&#x0025; CI: 0.83;0.98)] followed by DeepSeek [zero-shot: 0.90 (95&#x0025; CI: 0.82; 0.98); with context: 0.90 (95&#x0025; CI: 0.82; 0.97)], MetaAI [zero-shot: 0.88 (95&#x0025; CI: 0.80;0.96); with context: 0.88 (95&#x0025; CI: 0.80;0.96)], ChatGPT (zero-shot: 0.88 (95&#x0025; CI: 0.79; 0.96); with context: 0.87 (95&#x0025; CI: 0.78; 0.95), and Gemini [zero-shot: 0.86 (95&#x0025; CI: 0.77;0.95); with context: 0.86 (95&#x0025; CI: 0.77;0.95)] ((<xref ref-type="fig" rid="F1">Figure&#x00A0;1</xref>). Performance metrics are detailed in <xref ref-type="fig" rid="F2">Figure&#x00A0;2</xref>. Copilot achieved the highest sensitivity (0.93) and AUC (0.90), while Meta AI showed the best specificity (0.92&#x2013;0.91).</p>
<fig id="F1" position="float"><label>Figure&#x00A0;1</label>
<caption><p>Proportion of correct and incorrect responses for each LLM under both prompting conditions.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="froh-06-1737114-g001.tif"><alt-text content-type="machine-generated">Bar chart comparing response accuracy of AI models: Copilot, DeepSeek, Meta AI, ChatGPT, and Gemini. Colors indicate response types: incorrect few-shot (pink), incorrect zero-shot (yellow), correct few-shot (teal), and correct zero-shot (dark green). Copilot: few-shot 1629, zero-shot 1640; DeepSeek: few-shot 1618, zero-shot 1620; Meta AI: both few-shot and zero-shot 1586; ChatGPT: few-shot 1565, zero-shot 1576; Gemini: few-shot 1543, zero-shot 1544. Incorrect responses are fewer than correct in all models.</alt-text>
</graphic>
</fig>
<fig id="F2" position="float"><label>Figure&#x00A0;2</label>
<caption><p>Confusion matrices, performance metrics, and ROC curve for each LLM. ACC, accuracy; Sens, sensitivity; Spec, specificity; CP, with prompt; SP, without prompt.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="froh-06-1737114-g002.tif"><alt-text content-type="machine-generated">Six confusion matrices compare chatbot responses&#x2014;Copilot, ChatGPT, Meta AI, DeepSeek, and Gemini&#x2014;on true and false outcomes with few-shot and zero-shot methods. Metrics include accuracy, sensitivity, and specificity. A ROC curve shows different models' sensitivity versus specificity. Legends indicate chat models' AUC values, with Copilot achieving the highest at zero-shot. Color gradient from light to dark green represents count.</alt-text>
</graphic>
</fig>
<p>The factorial GLMM significantly improved model fit compared to the null model [&#x03C7;<sup>2</sup>(9)&#x2009;&#x003D;&#x2009;148.6, <italic>p</italic>&#x2009;&#x003C;&#x2009;0.0001]. The main effect of LLMs was significant (&#x03C7;<sup>2</sup>&#x2009;&#x003D;&#x2009;75.66, <italic>p</italic>&#x2009;&#x003C;&#x2009;0.001), whereas Prompt (&#x03C7;<sup>2</sup>&#x2009;&#x003D;&#x2009;0.76, <italic>p</italic>&#x2009;&#x003D;&#x2009;0.38) and the LLMs&#x2009;&#x00D7;&#x2009;Prompt interaction (&#x03C7;<sup>2</sup>&#x2009;&#x003D;&#x2009;0.99, <italic>p</italic>&#x2009;&#x003D;&#x2009;0.91) were not. Random effect variances indicated substantial question-level heterogeneity (<italic>&#x03C3;</italic><sup>2</sup>_question&#x2009;&#x003D;&#x2009;23.12; SD&#x2009;&#x003D;&#x2009;4.81) and minimal day-to-day variability (<italic>&#x03C3;</italic><sup>2</sup>_day&#x2009;&#x003D;&#x2009;0.10; SD&#x2009;&#x003D;&#x2009;0.32).</p>
<p>Pairwise Tukey-adjusted comparisons revealed that Copilot and DeepSeek were superior to ChatGPT and Gemini (<italic>p</italic>&#x2009;&#x003C;&#x2009;0.01) (<xref ref-type="table" rid="T2">Table&#x00A0;2</xref>). Interactions with context showed similar results (<xref ref-type="table" rid="T2">Table&#x00A0;2</xref>). Effect size analysis indicated that differences between models were small (cohen&#x0027;s <italic>d</italic> range: 0.02&#x2013;0.17). The <italic>post hoc</italic> power analysis indicated that the study had &#x003E;80&#x0025; power to detect main effects of <italic>LLMs</italic>, <italic>Prompt</italic>, and <italic>Model</italic>&#x2009;<italic>&#x00D7;</italic>&#x2009;<italic>Prompt</italic> interaction effects, confirming that the sample size was sufficient to detect effects of realistic magnitude (<xref ref-type="sec" rid="s11">Supplementary Material 3</xref>).</p>
<table-wrap id="T2" position="float"><label>Table&#x00A0;2</label>
<caption><p>Pairwise comparisons between different types of chatbots with tukey adjustment and Cohen&#x0027;s h effect size.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left" rowspan="2">Chatbot</th>
<th valign="top" align="center" colspan="3">Zero-shot</th>
<th valign="top" align="center" colspan="3">Zero-shot with context</th>
</tr>
<tr>
<th valign="top" align="center">OR (95&#x0025; CI)</th>
<th valign="top" align="center"><italic>p</italic>-value</th>
<th valign="top" align="center">Cohen&#x0027;s h</th>
<th valign="top" align="center">OR (95&#x0025; CI)</th>
<th valign="top" align="center"><italic>p</italic>-value</th>
<th valign="top" align="center">Cohen&#x0027;s h</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="7">Reference: ChatGPT</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Copilot</td>
<td valign="top" align="center">2.64 (1.51; 4.62)</td>
<td valign="top" align="center">&#x003C;.01</td>
<td valign="top" align="center">0.115</td>
<td valign="top" align="center">2.50 (1.45; 4.30)</td>
<td valign="top" align="center">&#x003C;.01</td>
<td valign="top" align="center">0.113</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;DeepSeek</td>
<td valign="top" align="center">1.88 (1.09; 3.22)</td>
<td valign="top" align="center">&#x003C;.01</td>
<td valign="top" align="center">0.077</td>
<td valign="top" align="center">2.09 (1.23; 3.55)</td>
<td valign="top" align="center">&#x003C;.01</td>
<td valign="top" align="center">0.092</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Gemini</td>
<td valign="top" align="center">0.68 (0.41; 1.11)</td>
<td valign="top" align="center">0.28</td>
<td valign="top" align="center">0.052</td>
<td valign="top" align="center">0.77 (0.47;1.25)</td>
<td valign="top" align="center">0.80</td>
<td valign="top" align="center">0.035</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;MetaAI</td>
<td valign="top" align="center">1.14 (0.68; 1.90)</td>
<td valign="top" align="center">1.00</td>
<td valign="top" align="center">0.016</td>
<td valign="top" align="center">1.31 (0.79; 2.18)</td>
<td valign="top" align="center">0.81</td>
<td valign="top" align="center">0.035</td>
</tr>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="7">Reference: Copilot</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;DeepSeek</td>
<td valign="top" align="center">0.71 (0.40; 1.28)</td>
<td valign="top" align="center">0.71</td>
<td valign="top" align="center">0.038</td>
<td valign="top" align="center">0.83 (0.47; 1.48)</td>
<td valign="top" align="center">0.99</td>
<td valign="top" align="center">0.021</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Gemini</td>
<td valign="top" align="center">0.26 (0.15; 0.45)</td>
<td valign="top" align="center">&#x003C;.01</td>
<td valign="top" align="center">0.167</td>
<td valign="top" align="center">0.31 (0.18; 0.53)</td>
<td valign="top" align="center">&#x003C;.01</td>
<td valign="top" align="center">0.149</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;MetaAI</td>
<td valign="top" align="center">0.43 (0.25; 0.76)</td>
<td valign="top" align="center">&#x003C;.01</td>
<td valign="top" align="center">0.099</td>
<td valign="top" align="center">0.53 (0.30; 0.91)</td>
<td valign="top" align="center">&#x003C;.01</td>
<td valign="top" align="center">0.077</td>
</tr>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="7">Reference: DeepSeek</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Gemini</td>
<td valign="top" align="center">0.36 (0.21; 0.61)</td>
<td valign="top" align="center">&#x003C;.01</td>
<td valign="top" align="center">0.130</td>
<td valign="top" align="center">0.37 (0.22; 0.62)</td>
<td valign="top" align="center">&#x003C;.01</td>
<td valign="top" align="center">0.128</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;MetaAI</td>
<td valign="top" align="center">0.61 (0.35; 1.05)</td>
<td valign="top" align="center">0.11</td>
<td valign="top" align="center">0.061</td>
<td valign="top" align="center">0.62 (0.36; 1.08)</td>
<td valign="top" align="center">0.17</td>
<td valign="top" align="center">0.057</td>
</tr>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="7">Reference: Gemini</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;MetaAI</td>
<td valign="top" align="center">1.68 (1.02; 2.77)</td>
<td valign="top" align="center">0.03</td>
<td valign="top" align="center">0.069</td>
<td valign="top" align="center">1.70 (1.03; 2.80)</td>
<td valign="top" align="center">0.03</td>
<td valign="top" align="center">0.071</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TF1"><p>OR, odds ratio; CI, confidence interval.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>All models showed good consistency over the 30 days, with Deepseek (zero-shot) (ICC: 0.95; 95&#x0025; CI: 0.94&#x2013;0.97) and Copilot (with context) (ICC: 0.94; 95&#x0025; CI: 0.91; 0.96) standing out (<xref ref-type="fig" rid="F3">Figure&#x00A0;3</xref>). The use of the prompt did not significantly alter the consistency of any LLM.</p>
<fig id="F3" position="float"><label>Figure&#x00A0;3</label>
<caption><p>Mean responses (0 &#x2013; false; 1 &#x2013; true) of each LLM with zero-shot or with context prompt. Day-to-day agreement was evaluated using the intraclass correlation coefficient (ICC). ZS, zero shot; WC, with context.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="froh-06-1737114-g003.tif"><alt-text content-type="machine-generated">Line graph comparing the mean responses of different chatbots over thirty days. The y-axis represents the mean response, while the x-axis represents the day. Various colored lines denote different chatbots and prompts: Copilot, Deepseek, Gemini, ChatGPT, and MetaAI, each with with-context (WC) and zero-shot (ZS) conditions. Each chatbot&#x2019;s intraclass correlation coefficient (ICC) and confidence intervals are listed in a legend on the right. The graph shows similar trends for all lines, indicating consistency in performance across days.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s4" sec-type="discussion"><label>4</label><title>Discussion</title>
<p>This study evaluated the performance of five AI-based chatbots as information sources for dental trauma decision-making, using two interaction modes. All models showed high accuracy, with Microsoft Copilot and DeepSeek performing best. The use of context prompts did not significantly affect accuracy.</p>
<p>All models achieved accuracy above 85&#x0025;, indicating that current LLMs are reliable sources for dental trauma information. Previous studies reported wider variability, with accuracy starting from 40&#x0025; (<xref ref-type="bibr" rid="B32">32</xref>&#x2013;<xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B40">40</xref>, <xref ref-type="bibr" rid="B41">41</xref>). Our results suggest an upward trend in chatbot accuracy compared to earlier versions (<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B35">35</xref>, <xref ref-type="bibr" rid="B36">36</xref>), likely due to continuous model updates (<xref ref-type="bibr" rid="B41">41</xref>). In comparison with other fields of dentistry, our study showed accuracy levels consistent with those reported in prior research (<xref ref-type="bibr" rid="B30">30</xref>, <xref ref-type="bibr" rid="B31">31</xref>).</p>
<p>That study showed that Microsoft Copilot achieved the best performance across multiple evaluation metrics. This result aligns with previous research (<xref ref-type="bibr" rid="B30">30</xref>), which demonstrated Copilot&#x0027;s superiority as an information source in answering multiple choice questions in dentistry. Another study reported higher accuracy (94.4&#x0025;) for Microsoft Copilot in resolving complex osteoarticular infection cases compared with ChatGPT-4o (85.7&#x0025;) and Gemini 2.0 Flash (86.5&#x0025;) (<xref ref-type="bibr" rid="B42">42</xref>). In the context of dental trauma, our results are consistent with Mustulo&#x011F;lu et al. (<xref ref-type="bibr" rid="B33">33</xref>), where Microsoft Copilot (80.8&#x0025;) outperformed the free version of ChatGPT-4 (79.6&#x0025;) and Google Gemini (78.3&#x0025;). However, in the same study, Copilot performed worse than the paid version of ChatGPT-4o (95.6&#x0025;). This performance may be explained by recent technical updates to Copilot, including access to medical information sources and refined embeddings for healthcare terminology (<xref ref-type="bibr" rid="B42">42</xref>).</p>
<p>Our study also found that DeepSeek achieved significantly better results than ChatGPT-4o and Gemini 2.5 Flash. DeepSeek uses a transformer architecture, is trained on a specialized multilingual corpus, and incorporates information sources in English and Chinese, expanding its training for clinical scenarios (128k tokens) (<xref ref-type="bibr" rid="B43">43</xref>). DeepSeek-R1 has shown higher accuracy (86.2&#x0025;) in complex ophthalmology cases compared with Gemini 2.0 Pro (71.5&#x0025;) and ChatGPT (69.2&#x0025;) (<xref ref-type="bibr" rid="B44">44</xref>). In dentistry, ChatGPT and DeepSeek have demonstrated superior accuracy compared with Gemini for multiple-choice questions on fixed prosthodontics (<xref ref-type="bibr" rid="B45">45</xref>). This is the first study to assess DeepSeek for dental trauma. Further research should use varied Q&#x0026;A methods to verify its effectiveness and review response quality and readability (<xref ref-type="bibr" rid="B46">46</xref>).</p>
<p>In this study, specificity was calculated to evaluate the models&#x0027; ability to identify false alternatives. This metric is important because widely used chatbots are trained on internet data, making them susceptible to disseminating incorrect information (<xref ref-type="bibr" rid="B46">46</xref>, <xref ref-type="bibr" rid="B47">47</xref>). All models achieved specificity above 80&#x0025;, with Meta AI reaching 91.8&#x0025;. These findings suggest that chatbots can effectively identify and classify false information, showing promise in reducing the spread of misinformation on clinical practices in dental trauma. Previous research has highlighted the relatively low accuracy of Llama-based chatbots compared to ChatGPT when addressing multiple-choice questions in prosthodontic and restorative dentistry (<xref ref-type="bibr" rid="B28">28</xref>). These findings emphasize the variability in performance among different LLMs depending on the dental specialty and question format. Notably, this study is the first to apply dichotomous question evaluation metrics to LLMs in the context of dental trauma. By employing this approach, the study provides a new perspective on chatbot performance within this specific clinical scenario. Further research is recommended to validate these results and extend the assessment to other dental specialties, ensuring comprehensive understanding of LLM effectiveness across various contexts in dentistry.</p>
<p>A key contribution of this study was the longitudinal analysis of response consistency over 30 consecutive days. Models such as Microsoft Copilot and DeepSeek showed high stability throughout the period, consistent with previous reports (<xref ref-type="bibr" rid="B32">32</xref>, <xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B35">35</xref>, <xref ref-type="bibr" rid="B41">41</xref>). This outcome is crucial as it indicates that these models provide consistent information in routine clinical situations, ensuring reliability in dental trauma guidance.</p>
<p>Although calibration prompts improved consistency for some LLMs, they did not significantly enhance accuracy for any chatbot, contrary to findings in other medical studies (<xref ref-type="bibr" rid="B48">48</xref>, <xref ref-type="bibr" rid="B49">49</xref>). This result may be due to technical factors, such as variable contextualization capacity or training data bias, or methodological factors, such as insufficient prompt specificity or model adaptation (<xref ref-type="bibr" rid="B50">50</xref>). Future research should investigate optimized and adaptive prompt engineering for each LLM architecture to improve accuracy in complex clinical contexts, including dental trauma (<xref ref-type="bibr" rid="B51">51</xref>, <xref ref-type="bibr" rid="B52">52</xref>).</p>
<p>This study has limitations. First, we evaluated only free, web-accessible chatbot versions, so future studies should include a wider range of LLMs to enhance generalizability. The dichotomous question-and-answer format may not capture the full complexity of clinical reasoning, especially when multiple management options exist (<xref ref-type="bibr" rid="B30">30</xref>, <xref ref-type="bibr" rid="B31">31</xref>), and may inflate apparent accuracy. A recent systematic review with network meta-analysis highlighted substantial variability in LLM accuracy depending on question type: ChatGPT-4o (SUCRA&#x2009;&#x003D;&#x2009;0.9207) demonstrated strong performance in terms of accuracy for objective questions, while ChatGPT-4 (SUCRA&#x2009;&#x003D;&#x2009;0.8708) and followed by Claude 2.1 (SUCRA&#x2009;&#x003D;&#x2009;0.7796) excelled at answering open-ended questions (<xref ref-type="bibr" rid="B41">41</xref>). Therefore, future studies should include multiple-choice questions and simulated clinical cases (image and text) to broaden applicability (<xref ref-type="bibr" rid="B30">30</xref>, <xref ref-type="bibr" rid="B31">31</xref>, <xref ref-type="bibr" rid="B55">55</xref>) specially in considering dental trauma context (<xref ref-type="bibr" rid="B32">32</xref>).</p>
<p>Additionally, results were limited to Portuguese interactions. The choice to conduct this study exclusively in Portuguese was made to accurately reflect typical user interactions in Brazil, where large language models are frequently accessed in the native language. A previous study observed that while accuracy appears similar in Portuguese (63.1&#x0025;) and English (60.2&#x0025;), interaction patterns differed noticeably between languages (<xref ref-type="bibr" rid="B53">53</xref>). This observation underscores the need for further multilingual research to evaluate chatbot performance across diverse cultural and linguistic contexts.</p>
<p>Lastly, although this study performed 18,000 interactions over 30 consecutive days, accuracy estimates were ultimately bounded by the 60 unique items included in the benchmark. To address this limitation, future iterations of the benchmark will include a larger and more varied selection of items. This expansion aims to improve the breadth of content coverage while still allowing for meaningful longitudinal comparisons (<xref ref-type="bibr" rid="B30">30</xref>, <xref ref-type="bibr" rid="B31">31</xref>). Nonetheless, the <italic>post hoc</italic> power analysis conducted for our study indicated that the current sample size was sufficient to achieve greater than 80&#x0025; statistical power for detecting significant differences within the GLMM framework. This finding supports the robustness of the study&#x0027;s conclusions despite the limited number of benchmark items.</p>
<p>On the other hand, this study is the first to evaluate the performance of new LLMs, such as DeepSeek and MetaAI, in the comprehensive context of dental trauma. The 30-day longitudinal evaluation captured algorithmic variations and updates and captures temporal stability, a dimension absent from prior single-shot evaluations. This study also pioneered the assessment of calibration prompt influence on LLM accuracy and consistency in dental trauma. Finally, robust statistical analysis, using multiple performance metrics and generalized linear mixed models, provided a thorough understanding of the findings.</p>
<p>Clinically, the tested chatbots show promise as supplementary sources for dental trauma. They may be especially useful in urgent situations for professionals in public health networks who require immediate guidance, particularly in areas with limited specialist access, by providing rapid recommendations based on international guidelines such as IADT. They also have potential in educational settings, assisting parents, teachers, and students in adopting appropriate initial measures (<xref ref-type="bibr" rid="B21">21</xref>). Future applications will require integration with specialized databases and prospective validation. A multimodal system combining natural language processing and image analysis could overcome current accuracy limitations.</p>
<p>The rapid advancement and continual refinement of LLMs necessitate careful consideration of complex ethical issues, particularly concerning data privacy and data provenance in healthcare environments (<xref ref-type="bibr" rid="B54">54</xref>). It is essential for clinicians, patients, and developers to remain vigilant regarding the rights associated with personal data. These include the right to data ownership, the right not to be subjected to decisions based solely on automated processing, and the right to restrict the processing of their information. To address these challenges, governments and scientific societies must develop effective strategies that promote robust data protection measures. At the same time, these measures should facilitate the safe and ethical integration of LLMs into medical practice, ensuring that advancements in AI support rather than compromise patient rights and data security.</p>
</sec>
<sec id="s5" sec-type="conclusions"><label>5</label><title>Conclusion</title>
<p>Microsoft Copilot and DeepSeek-V3 showed the highest accuracy and consistency over 30 days. Calibration prompts did not significantly increase accuracy or consistency of any model. Free LLMs currently available have potential as complementary tools for disseminating dental trauma information, provided their use is guided by reliable scientific sources and professional supervision.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability"><title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s11">Supplementary Material</xref>, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s7" sec-type="author-contributions"><title>Author contributions</title>
<p>RL: Conceptualization, Formal analysis, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. AB: Investigation, Methodology, Writing &#x2013; review &#x0026; editing. AJ-S: Methodology, Validation, Writing &#x2013; review &#x0026; editing. NT: Validation, Writing &#x2013; review &#x0026; editing. CS: Funding acquisition, Supervision, Writing &#x2013; review &#x0026; editing. LP: Data curation, Formal analysis, Funding acquisition, Supervision, Writing &#x2013; review &#x0026; editing. WV: Conceptualization, Data curation, Formal analysis, Methodology, Project administration, Supervision, Validation, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec id="s9" sec-type="COI-statement"><title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="ai-statement"><title>Generative AI statement</title>
<p>The author(s) declared that generative AI was used in the creation of this manuscript. ChatGPT was used for grammar checking.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec id="s12" sec-type="disclaimer"><title>Publisher&#x0027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s11" sec-type="supplementary-material"><title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/froh.2025.1737114/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/froh.2025.1737114/full&#x0023;supplementary-material</ext-link></p>
<supplementary-material xlink:href="Table1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
</sec>
<ref-list><title>References</title>
<ref id="B1"><label>1.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chiarello</surname> <given-names>F</given-names></name> <name><surname>Giordano</surname> <given-names>V</given-names></name> <name><surname>Spada</surname> <given-names>I</given-names></name> <name><surname>Barandoni</surname> <given-names>S</given-names></name> <name><surname>Fantoni</surname> <given-names>G</given-names></name></person-group>. <article-title>Future applications of generative large language models: a data-driven case study on ChatGPT</article-title>. <source>Technovation</source>. (<year>2024</year>) <volume>133</volume>:<fpage>103002</fpage>. <pub-id pub-id-type="doi">10.1016/j.technovation.2024.103002</pub-id></mixed-citation></ref>
<ref id="B2"><label>2.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fern&#x00E1;ndez-Pichel</surname> <given-names>M</given-names></name> <name><surname>Pichel</surname> <given-names>JC</given-names></name> <name><surname>Losada</surname> <given-names>DE</given-names></name></person-group>. <article-title>Evaluating search engines and large language models for answering health questions</article-title>. <source>NPJ Digit Med</source>. (<year>2025</year>) <volume>8</volume>(<issue>1</issue>):<fpage>153</fpage>. <pub-id pub-id-type="doi">10.1038/s41746-025-01546-w</pub-id></mixed-citation></ref>
<ref id="B3"><label>3.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ray</surname> <given-names>PP</given-names></name></person-group>. <article-title>ChatGPT: a comprehensive review on background, applications, key challenges, bias, ethics, limitations and future scope</article-title>. <source>Internet Things Cyber Phys Syst</source>. (<year>2023</year>) <volume>3</volume>:<fpage>121</fpage>&#x2013;<lpage>54</lpage>. <pub-id pub-id-type="doi">10.1016/j.iotcps.2023.04.003</pub-id></mixed-citation></ref>
<ref id="B4"><label>4.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Agaronnik</surname> <given-names>ND</given-names></name> <name><surname>Davis</surname> <given-names>J</given-names></name> <name><surname>Manz</surname> <given-names>CR</given-names></name> <name><surname>Tulsky</surname> <given-names>JA</given-names></name> <name><surname>Lindvall</surname> <given-names>C</given-names></name></person-group>. <article-title>Large language models to identify advance care planning in patients with advanced cancer</article-title>. <source>J Pain Symptom Manage</source>. (<year>2025</year>) <volume>69</volume>(<issue>3</issue>):<fpage>243</fpage>&#x2013;<lpage>50.e1</lpage>. <pub-id pub-id-type="doi">10.1016/j.jpainsymman.2024.11.016</pub-id><pub-id pub-id-type="pmid">39586429</pub-id></mixed-citation></ref>
<ref id="B5"><label>5.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kaiser</surname> <given-names>KN</given-names></name> <name><surname>Hughes</surname> <given-names>AJ</given-names></name> <name><surname>Yang</surname> <given-names>AD</given-names></name> <name><surname>Mohanty</surname> <given-names>S</given-names></name> <name><surname>Maatman</surname> <given-names>TK</given-names></name> <name><surname>Gonzalez</surname> <given-names>AA</given-names></name><etal/></person-group> <article-title>Use of large language models as clinical decision support tools for management pancreatic adenocarcinoma using national comprehensive cancer network guidelines</article-title>. <source>Surgery</source>. (<year>2025</year>) <volume>182</volume>:<fpage>109267</fpage>. <pub-id pub-id-type="doi">10.1016/j.surg.2025.109267</pub-id><pub-id pub-id-type="pmid">40055080</pub-id></mixed-citation></ref>
<ref id="B6"><label>6.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>J</given-names></name> <name><surname>Chang</surname> <given-names>C</given-names></name> <name><surname>Li</surname> <given-names>Y</given-names></name> <name><surname>Cui</surname> <given-names>S</given-names></name> <name><surname>Yuan</surname> <given-names>F</given-names></name> <name><surname>Li</surname> <given-names>Z</given-names></name><etal/></person-group> <article-title>Large language Models&#x2019; responses to spinal cord injury: a comparative study of performance</article-title>. <source>J Med Syst</source>. (<year>2025</year>) <volume>49</volume>(<issue>1</issue>):<fpage>39</fpage>. <pub-id pub-id-type="doi">10.1007/s10916-025-02170-7</pub-id><pub-id pub-id-type="pmid">40128385</pub-id></mixed-citation></ref>
<ref id="B7"><label>7.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>Z</given-names></name> <name><surname>Yan</surname> <given-names>C</given-names></name> <name><surname>Cao</surname> <given-names>Y</given-names></name> <name><surname>Gong</surname> <given-names>A</given-names></name> <name><surname>Li</surname> <given-names>F</given-names></name> <name><surname>Zeng</surname> <given-names>R</given-names></name></person-group>. <article-title>Evaluating performance of large language models for atrial fibrillation management using different prompting strategies and languages</article-title>. <source>Sci Rep</source>. (<year>2025</year>) <volume>15</volume>(<issue>1</issue>):<fpage>19028</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-025-04309-5</pub-id><pub-id pub-id-type="pmid">40447746</pub-id></mixed-citation></ref>
<ref id="B8"><label>8.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ourang</surname> <given-names>SA</given-names></name> <name><surname>Sohrabniya</surname> <given-names>F</given-names></name> <name><surname>Mohammad-Rahimi</surname> <given-names>H</given-names></name> <name><surname>Dianat</surname> <given-names>O</given-names></name> <name><surname>Aminoshariae</surname> <given-names>A</given-names></name> <name><surname>Nagendrababu</surname> <given-names>V</given-names></name><etal/></person-group> <article-title>Artificial intelligence in endodontics: fundamental principles, workflow, and tasks</article-title>. <source>Int Endodontic J</source>. (<year>2024</year>) <volume>57</volume>(<issue>11</issue>):<fpage>1546</fpage>&#x2013;<lpage>65</lpage>. <pub-id pub-id-type="doi">10.1111/iej.14127</pub-id></mixed-citation></ref>
<ref id="B9"><label>9.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Amilcar</surname> <given-names>ALL</given-names></name> <name><surname>Vieira</surname> <given-names>WA</given-names></name> <name><surname>Matta</surname> <given-names>ACG</given-names></name> <name><surname>De Almeida Gomes</surname> <given-names>BPF</given-names></name> <name><surname>Da Silva</surname> <given-names>MAM</given-names></name> <name><surname>De Almeida</surname> <given-names>JFA</given-names></name><etal/></person-group> <article-title>Epidemiological profile of luxations injuries with or without dental fractures in permanent teeth: a 10-years retrospective study</article-title>. <source>Dent Traumatol</source>. (<year>2024</year>) <volume>40</volume>(<issue>5</issue>):<fpage>530</fpage>&#x2013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.1111/edt.12953</pub-id><pub-id pub-id-type="pmid">38576382</pub-id></mixed-citation></ref>
<ref id="B10"><label>10.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Vieira</surname> <given-names>WA</given-names></name> <name><surname>Pereira</surname> <given-names>AC</given-names></name> <name><surname>Lazzari</surname> <given-names>J</given-names></name> <name><surname>Pecorari</surname> <given-names>VGA</given-names></name> <name><surname>Gomes</surname> <given-names>BPFA</given-names></name> <name><surname>Almeida</surname> <given-names>JFA</given-names></name><etal/></person-group> <article-title>Epidemiology and severity of traumatic dental injuries in permanent teeth: a 20-year retrospective study</article-title>. <source>Braz Dent J</source>. (<year>2023</year>) <volume>34</volume>(<issue>3</issue>):<fpage>1</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1590/0103-6440202305257</pub-id><pub-id pub-id-type="pmid">37466516</pub-id></mixed-citation></ref>
<ref id="B11"><label>11.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Petti</surname> <given-names>S</given-names></name> <name><surname>Glendor</surname> <given-names>U</given-names></name> <name><surname>Andersson</surname> <given-names>L</given-names></name></person-group>. <article-title>World traumatic dental injury prevalence and incidence, a meta-analysis&#x2014;one billion living people have had traumatic dental injuries</article-title>. <source>Dent Traumatol</source>. (<year>2018</year>) <volume>34</volume>(<issue>2</issue>):<fpage>71</fpage>&#x2013;<lpage>86</lpage>. <pub-id pub-id-type="doi">10.1111/edt.12389</pub-id><pub-id pub-id-type="pmid">29455471</pub-id></mixed-citation></ref>
<ref id="B12"><label>12.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Vieira</surname> <given-names>WA</given-names></name> <name><surname>Pecorari</surname> <given-names>VGA</given-names></name> <name><surname>Figueiredo-de-Almeida</surname> <given-names>R</given-names></name> <name><surname>Carvas Junior</surname> <given-names>N</given-names></name> <name><surname>Vargas-Neto</surname> <given-names>J</given-names></name> <name><surname>Santos</surname> <given-names>ECA</given-names></name><etal/></person-group> <article-title>Prevalence of dental trauma in Brazilian children and adolescents: a systematic review and meta-analysis</article-title>. <source>Cad Saude Publica</source>. (<year>2021</year>) <volume>37</volume>(<issue>12</issue>):<fpage>e00015920</fpage>. <pub-id pub-id-type="doi">10.1590/0102-311x00015920</pub-id><pub-id pub-id-type="pmid">34909926</pub-id></mixed-citation></ref>
<ref id="B13"><label>13.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jogezai</surname> <given-names>U</given-names></name> <name><surname>Kalsi</surname> <given-names>A</given-names></name></person-group>. <article-title>Long-term complications and management of dental trauma in the adult patient&#x2014;part 1: fractured teeth, pulpal complications and resorption</article-title>. <source>Br Dent J</source>. (<year>2024</year>) <volume>237</volume>(<issue>2</issue>):<fpage>95</fpage>&#x2013;<lpage>105</lpage>. <pub-id pub-id-type="doi">10.1038/s41415-024-7641-z</pub-id><pub-id pub-id-type="pmid">39060587</pub-id></mixed-citation></ref>
<ref id="B14"><label>14.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fernandez</surname> <given-names>MS</given-names></name> <name><surname>Pauli</surname> <given-names>LA</given-names></name> <name><surname>Ara&#x00FA;jo</surname> <given-names>ABG</given-names></name> <name><surname>Demarco</surname> <given-names>GT</given-names></name> <name><surname>Azevedo</surname> <given-names>MS</given-names></name> <name><surname>Da Costa</surname> <given-names>VPP</given-names></name><etal/></person-group> <article-title>Dental pain in Brazilian preschool children: association with the severity of dental caries and impact on oral health-related quality of life</article-title>. <source>Eur Arch Paediatr Dent</source>. (<year>2024</year>) <volume>25</volume>(<issue>4</issue>):<fpage>481</fpage>&#x2013;<lpage>90</lpage>. <pub-id pub-id-type="doi">10.1007/s40368-024-00906-6</pub-id><pub-id pub-id-type="pmid">38761358</pub-id></mixed-citation></ref>
<ref id="B15"><label>15.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Brandini</surname> <given-names>DA</given-names></name> <name><surname>Carvalho De Souza Cant&#x00E3;o</surname> <given-names>AB</given-names></name> <name><surname>Levin</surname> <given-names>L</given-names></name></person-group>. <article-title>Public health policies in dental traumatology: a call for action!</article-title>. <source>Dent Traumatol</source>. (<year>2024</year>) <volume>40</volume>(<issue>6</issue>):<fpage>612</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1111/edt.12967</pub-id><pub-id pub-id-type="pmid">38742753</pub-id></mixed-citation></ref>
<ref id="B16"><label>16.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Budak</surname> <given-names>L</given-names></name> <name><surname>Levin</surname> <given-names>L</given-names></name></person-group>. <article-title>The importance of immediate dental trauma care: comprehensive education, treatment approaches, and their profound impact on patients&#x2019; quality of life</article-title>. <source>Dent Traumatol</source>. (<year>2024</year>) <volume>40</volume>(<issue>5</issue>):<fpage>477</fpage>&#x2013;<lpage>81</lpage>. <pub-id pub-id-type="doi">10.1111/edt.12987</pub-id><pub-id pub-id-type="pmid">39559841</pub-id></mixed-citation></ref>
<ref id="B17"><label>17.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tewari</surname> <given-names>N</given-names></name> <name><surname>Goel</surname> <given-names>S</given-names></name> <name><surname>Srivastav</surname> <given-names>S</given-names></name> <name><surname>Mathur</surname> <given-names>VP</given-names></name> <name><surname>Rahul</surname> <given-names>M</given-names></name> <name><surname>Haldar</surname> <given-names>P</given-names></name><etal/></person-group> <article-title>Global status of knowledge of parents for emergency management of traumatic dental injuries: a systematic review and meta-analysis</article-title>. <source>Evid Based Dent</source>. (<year>2023</year>) <volume>24</volume>(<issue>2</issue>):<fpage>91</fpage>&#x2013;<lpage>91</lpage>. <pub-id pub-id-type="doi">10.1038/s41432-023-00883-7</pub-id><pub-id pub-id-type="pmid">37188922</pub-id></mixed-citation></ref>
<ref id="B18"><label>18.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tewari</surname> <given-names>N</given-names></name> <name><surname>Jonna</surname> <given-names>I</given-names></name> <name><surname>Mathur</surname> <given-names>VP</given-names></name> <name><surname>Goel</surname> <given-names>S</given-names></name> <name><surname>Ritwik</surname> <given-names>P</given-names></name> <name><surname>Rahul</surname> <given-names>M</given-names></name><etal/></person-group> <article-title>Global status of knowledge for the prevention and emergency management of traumatic dental injuries among non-dental healthcare professionals: a systematic review and meta-analysis</article-title>. <source>Injury</source>. (<year>2021</year>) <volume>52</volume>(<issue>8</issue>):<fpage>2025</fpage>&#x2013;<lpage>37</lpage>. <pub-id pub-id-type="doi">10.1016/j.injury.2021.06.006</pub-id><pub-id pub-id-type="pmid">34176635</pub-id></mixed-citation></ref>
<ref id="B19"><label>19.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tewari</surname> <given-names>N</given-names></name> <name><surname>Sultan</surname> <given-names>F</given-names></name> <name><surname>Mathur</surname> <given-names>VP</given-names></name> <name><surname>Rahul</surname> <given-names>M</given-names></name> <name><surname>Goel</surname> <given-names>S</given-names></name> <name><surname>Bansal</surname> <given-names>K</given-names></name><etal/></person-group> <article-title>Global status of knowledge for prevention and emergency management of traumatic dental injuries in dental professionals: systematic review and meta-analysis</article-title>. <source>Dent Traumatol</source>. (<year>2021</year>) <volume>37</volume>(<issue>2</issue>):<fpage>161</fpage>&#x2013;<lpage>76</lpage>. <pub-id pub-id-type="doi">10.1111/edt.12621</pub-id><pub-id pub-id-type="pmid">33180997</pub-id></mixed-citation></ref>
<ref id="B20"><label>20.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Andreasen</surname> <given-names>JO</given-names></name> <name><surname>Lauridsen</surname> <given-names>E</given-names></name> <name><surname>Andreasen</surname> <given-names>FM</given-names></name></person-group>. <article-title>Contradictions in the treatment of traumatic dental injuries and ways to proceed in dental trauma research&#x002A;</article-title>. <source>Dent Traumatol</source>. (<year>2010</year>) <volume>26</volume>(<issue>1</issue>):<fpage>16</fpage>&#x2013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1111/j.1600-9657.2009.00818.x</pub-id><pub-id pub-id-type="pmid">19811511</pub-id></mixed-citation></ref>
<ref id="B21"><label>21.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>de Gregorio</surname> <given-names>C</given-names></name> <name><surname>Tewari</surname> <given-names>N</given-names></name></person-group>. <article-title>Management of complications in dental traumatology</article-title>. <source>Dent Traumatol</source>. (<year>2025</year>) <volume>41</volume>(<issue>Suppl 1</issue>):<fpage>64</fpage>&#x2013;<lpage>71</lpage>. <pub-id pub-id-type="doi">10.1111/edt.13018</pub-id><pub-id pub-id-type="pmid">39578670</pub-id></mixed-citation></ref>
<ref id="B22"><label>22.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Abbott</surname> <given-names>PV</given-names></name> <name><surname>Tewari</surname> <given-names>N</given-names></name> <name><surname>Mills</surname> <given-names>SC</given-names></name> <name><surname>Stasiuk</surname> <given-names>H</given-names></name> <name><surname>Roettger</surname> <given-names>M</given-names></name> <name><surname>O&#x2019;Connell</surname> <given-names>AC</given-names></name><etal/></person-group> <article-title>The international association of dental traumatology (IADT) and the academy for sports dentistry (ASD) guidelines for prevention of traumatic dental injuries: part 7: orthodontics for the prevention of dental and oral trauma</article-title>. <source>Dent Traumatol</source>. (<year>2024</year>) <volume>40</volume>(<issue>S1</issue>):<fpage>16</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1111/edt.12927</pub-id><pub-id pub-id-type="pmid">38363699</pub-id></mixed-citation></ref>
<ref id="B23"><label>23.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Abbott</surname> <given-names>PV</given-names></name> <name><surname>Tewari</surname> <given-names>N</given-names></name> <name><surname>O&#x2019;Connell</surname> <given-names>AC</given-names></name> <name><surname>Mills</surname> <given-names>SC</given-names></name> <name><surname>Stasiuk</surname> <given-names>H</given-names></name> <name><surname>Roettger</surname> <given-names>M</given-names></name><etal/></person-group> <article-title>The international association of dental traumatology (IADT) and the academy for sports dentistry (ASD) guidelines for prevention of traumatic dental injuries: part 3: mouthguards for the prevention of dental and oral trauma</article-title>. <source>Dent Traumatol</source>. (<year>2024</year>) <volume>40</volume>(<issue>S1</issue>):<fpage>7</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1111/edt.12925</pub-id><pub-id pub-id-type="pmid">38363704</pub-id></mixed-citation></ref>
<ref id="B24"><label>24.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>O&#x2019;Connell</surname> <given-names>AC</given-names></name> <name><surname>Abbott</surname> <given-names>PV</given-names></name> <name><surname>Tewari</surname> <given-names>N</given-names></name> <name><surname>Mills</surname> <given-names>SC</given-names></name> <name><surname>Stasiuk</surname> <given-names>H</given-names></name> <name><surname>Roettger</surname> <given-names>M</given-names></name><etal/></person-group> <article-title>The international association of dental traumatology (IADT) and the academy for sports dentistry (ASD) guidelines for prevention of traumatic dental injuries: part 2: primary prevention of dental trauma across the life course</article-title>. <source>Dent Traumatol</source>. (<year>2024</year>) <volume>40</volume>(<issue>S1</issue>):<fpage>4</fpage>&#x2013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.1111/edt.12924</pub-id></mixed-citation></ref>
<ref id="B25"><label>25.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tewari</surname> <given-names>N</given-names></name> <name><surname>Abbott</surname> <given-names>PV</given-names></name> <name><surname>O&#x2019;Connell</surname> <given-names>AC</given-names></name> <name><surname>Mills</surname> <given-names>SC</given-names></name> <name><surname>Stasiuk</surname> <given-names>H</given-names></name> <name><surname>Roettger</surname> <given-names>M</given-names></name><etal/></person-group> <article-title>The international association of dental traumatology (IADT) and the academy for sports dentistry (ASD) guidelines for prevention of traumatic dental injuries: part 5: secondary prevention of dental injuries</article-title>. <source>Dent Traumatol</source>. (<year>2024</year>) <volume>40</volume>(<issue>S1</issue>):<fpage>12</fpage>&#x2013;<lpage>3</lpage>. <pub-id pub-id-type="doi">10.1111/edt.12929</pub-id><pub-id pub-id-type="pmid">38363700</pub-id></mixed-citation></ref>
<ref id="B26"><label>26.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bubna</surname> <given-names>DP</given-names></name> <name><surname>Felipe De Jesus Freitas</surname> <given-names>P</given-names></name> <name><surname>Ferraz</surname> <given-names>AX</given-names></name> <name><surname>Abuabara</surname> <given-names>A</given-names></name> <name><surname>Baratto-Filho</surname> <given-names>F</given-names></name> <name><surname>Marques De Mattos De Araujo</surname> <given-names>B</given-names></name><etal/></person-group> <article-title>Dental trauma evo&#x2014;development of an artificial intelligence-powered chatbot to support professional management of dental trauma</article-title>. <source>J Endod</source>. (<year>2025</year>) <volume>51</volume>(<issue>9</issue>):<fpage>1229</fpage>&#x2013;<lpage>34</lpage>. <pub-id pub-id-type="doi">10.1016/j.joen.2025.05.012</pub-id><pub-id pub-id-type="pmid">40449881</pub-id></mixed-citation></ref>
<ref id="B27"><label>27.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chau</surname> <given-names>RCW</given-names></name> <name><surname>Thu</surname> <given-names>KM</given-names></name> <name><surname>Yu</surname> <given-names>OY</given-names></name> <name><surname>Hsung</surname> <given-names>RT</given-names></name> <name><surname>Lo</surname> <given-names>ECM</given-names></name> <name><surname>Lam</surname> <given-names>WYH</given-names></name></person-group>. <article-title>Performance of generative artificial intelligence in dental licensing examinations</article-title>. <source>Int Dent J</source>. (<year>2024</year>) <volume>74</volume>(<issue>3</issue>):<fpage>616</fpage>&#x2013;<lpage>21</lpage>. <pub-id pub-id-type="doi">10.1016/j.identj.2023.12.007</pub-id><pub-id pub-id-type="pmid">38242810</pub-id></mixed-citation></ref>
<ref id="B28"><label>28.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chau</surname> <given-names>RCW</given-names></name> <name><surname>Thu</surname> <given-names>KM</given-names></name> <name><surname>Yu</surname> <given-names>OY</given-names></name> <name><surname>Hsung</surname> <given-names>RT</given-names></name> <name><surname>Wang</surname> <given-names>DCP</given-names></name> <name><surname>Man</surname> <given-names>MWH</given-names></name><etal/></person-group> <article-title>Evaluation of chatbot responses to text-based multiple-choice questions in prosthodontic and restorative dentistry</article-title>. <source>Dent J</source>. (<year>2025</year>) <volume>13</volume>(<issue>7</issue>):<fpage>279</fpage>. <pub-id pub-id-type="doi">10.3390/dj13070279</pub-id></mixed-citation></ref>
<ref id="B29"><label>29.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rokhshad</surname> <given-names>R</given-names></name> <name><surname>Khoury</surname> <given-names>ZH</given-names></name> <name><surname>Mohammad-Rahimi</surname> <given-names>H</given-names></name> <name><surname>Motie</surname> <given-names>P</given-names></name> <name><surname>Price</surname> <given-names>JB</given-names></name> <name><surname>Tavares</surname> <given-names>T</given-names></name><etal/></person-group> <article-title>Efficacy and empathy of AI chatbots in answering frequently asked questions on oral oncology</article-title>. <source>Oral Surg Oral Med Oral Pathol Oral Radiol</source>. (<year>2025</year>) <volume>139</volume>(<issue>6</issue>):<fpage>719</fpage>&#x2013;<lpage>28</lpage>. <pub-id pub-id-type="doi">10.1016/j.oooo.2024.12.028</pub-id><pub-id pub-id-type="pmid">39843286</pub-id></mixed-citation></ref>
<ref id="B30"><label>30.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nguyen</surname> <given-names>HC</given-names></name> <name><surname>Dang</surname> <given-names>HP</given-names></name> <name><surname>Nguyen</surname> <given-names>TL</given-names></name> <name><surname>Hoang</surname> <given-names>V</given-names></name> <name><surname>Nguyen</surname> <given-names>VA</given-names></name></person-group>. <article-title>Accuracy of latest large language models in answering multiple choice questions in dentistry: a comparative study</article-title>. <source>PLoS One</source>. (<year>2025</year>) <volume>20</volume>(<issue>1</issue>):<fpage>e0317423</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0317423</pub-id><pub-id pub-id-type="pmid">39879192</pub-id></mixed-citation></ref>
<ref id="B31"><label>31.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nguyen</surname> <given-names>VA</given-names></name> <name><surname>Vuong</surname> <given-names>TQT</given-names></name> <name><surname>Nguyen</surname> <given-names>VH</given-names></name></person-group>. <article-title>Comparative performance of deep-reasoning and lightweight large language models on oral implantology multiple-choice questions</article-title>. <source>Int J Prosthodont</source>. (<year>2025</year>) <volume>0</volume>(<issue>0</issue>):<fpage>1</fpage>&#x2013;<lpage>20</lpage>. <pub-id pub-id-type="doi">10.11607/ijp.9504</pub-id><pub-id pub-id-type="pmid">41037736</pub-id></mixed-citation></ref>
<ref id="B32"><label>32.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>&#x00C7;ege</surname> <given-names>EE</given-names></name> <name><surname>C&#x00F6;mert</surname> <given-names>H</given-names></name> <name><surname>Akal</surname> <given-names>N</given-names></name> <name><surname>&#x00D6;lmez</surname> <given-names>A</given-names></name></person-group>. <article-title>Evaluation of the performance of artificial intelligence based chatbots in providing first aid information on dental trauma according to the ToothSOS application</article-title>. <source>Dent Traumatol</source>. (<year>2025</year>) <volume>41</volume>(<issue>6</issue>):<fpage>696</fpage>&#x2013;<lpage>705</lpage>. <pub-id pub-id-type="doi">10.1111/edt.13078</pub-id></mixed-citation></ref>
<ref id="B33"><label>33.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mustulo&#x011F;lu</surname> <given-names>&#x015E;</given-names></name> <name><surname>Deniz</surname> <given-names>BP</given-names></name></person-group>. <article-title>Evaluation of chatbots in the emergency management of avulsion injuries</article-title>. <source>Dent Traumatol</source>. (<year>2025</year>) 41(4):<fpage>437</fpage>&#x2013;<lpage>44</lpage>. <pub-id pub-id-type="doi">10.1111/edt.13041</pub-id></mixed-citation></ref>
<ref id="B34"><label>34.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ozden</surname> <given-names>I</given-names></name> <name><surname>Gokyar</surname> <given-names>M</given-names></name> <name><surname>Ozden</surname> <given-names>ME</given-names></name> <name><surname>Sazak Ovecoglu</surname> <given-names>H</given-names></name></person-group>. <article-title>Assessment of artificial intelligence applications in responding to dental trauma</article-title>. <source>Dent Traumatol</source>. (<year>2024</year>) <volume>40</volume>(<issue>6</issue>):<fpage>722</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1111/edt.12965</pub-id><pub-id pub-id-type="pmid">38742754</pub-id></mixed-citation></ref>
<ref id="B35"><label>35.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Portilla</surname> <given-names>ND</given-names></name> <name><surname>Garcia-Font</surname> <given-names>M</given-names></name> <name><surname>Nagendrababu</surname> <given-names>V</given-names></name> <name><surname>Abbott</surname> <given-names>PV</given-names></name> <name><surname>Sanchez</surname> <given-names>JAG</given-names></name> <name><surname>Abella</surname> <given-names>F</given-names></name></person-group>. <article-title>Accuracy and consistency of gemini responses regarding the management of traumatized permanent teeth</article-title>. <source>Dent Traumatol</source>. (<year>2025</year>) <volume>41</volume>(<issue>2</issue>):<fpage>171</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1111/edt.13004</pub-id><pub-id pub-id-type="pmid">39460511</pub-id></mixed-citation></ref>
<ref id="B36"><label>36.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tokg&#x00F6;z Kaplan</surname> <given-names>T</given-names></name> <name><surname>Cankar</surname> <given-names>M</given-names></name></person-group>. <article-title>Evidence-based potential of generative artificial intelligence large language models on dental avulsion: ChatGPT versus gemini</article-title>. <source>Dent Traumatol</source>. (<year>2025</year>) <volume>41</volume>(<issue>2</issue>):<fpage>178</fpage>&#x2013;<lpage>86</lpage>. <pub-id pub-id-type="doi">10.1111/edt.12999</pub-id></mixed-citation></ref>
<ref id="B37"><label>37.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gallifant</surname> <given-names>J</given-names></name> <name><surname>Afshar</surname> <given-names>M</given-names></name> <name><surname>Ameen</surname> <given-names>S</given-names></name> <name><surname>Aphinyanaphongs</surname> <given-names>Y</given-names></name> <name><surname>Chen</surname> <given-names>S</given-names></name> <name><surname>Cacciamani</surname> <given-names>G</given-names></name><etal/></person-group> <article-title>The TRIPOD-LLM reporting guideline for studies using large language models</article-title>. <source>Nat Med</source>. (<year>2025</year>) <volume>31</volume>(<issue>1</issue>):<fpage>60</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1038/s41591-024-03425-5</pub-id><pub-id pub-id-type="pmid">39779929</pub-id></mixed-citation></ref>
<ref id="B38"><label>38.</label><mixed-citation publication-type="journal"><collab>The CHART Collaborative</collab>, <person-group person-group-type="author"><name><surname>Huo</surname> <given-names>B</given-names></name> <name><surname>Collins</surname> <given-names>GS</given-names></name> <name><surname>Chartash</surname> <given-names>D</given-names></name> <name><surname>Thirunavukarasu</surname> <given-names>AJ</given-names></name> <name><surname>Flanagin</surname> <given-names>A</given-names></name><etal/></person-group> <article-title>Reporting guideline for chatbot health advice studies: the CHART statement</article-title>. <source>JAMA Netw Open</source>. (<year>2025</year>) <volume>8</volume>(<issue>8</issue>):<fpage>e2530220</fpage>. <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2025.30220</pub-id><pub-id pub-id-type="pmid">40747871</pub-id></mixed-citation></ref>
<ref id="B39"><label>39.</label><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Cohen</surname> <given-names>J</given-names></name></person-group>. <source>Statistical Power Analysis for the Behavioral Sciences</source>. <edition>2nd ed.</edition> <publisher-loc>Hillside, NJ</publisher-loc>: <publisher-name>Lawrence Erlbaum Associates</publisher-name> (<year>1988</year>).</mixed-citation></ref>
<ref id="B40"><label>40.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rossettini</surname> <given-names>G</given-names></name> <name><surname>Bargeri</surname> <given-names>S</given-names></name> <name><surname>Cook</surname> <given-names>C</given-names></name> <name><surname>Guida</surname> <given-names>S</given-names></name> <name><surname>Palese</surname> <given-names>A</given-names></name> <name><surname>Rodeghiero</surname> <given-names>L</given-names></name><etal/></person-group> <article-title>Accuracy of ChatGPT-3.5, ChatGPT-4o, copilot, gemini, claude, and perplexity in advising on lumbosacral radicular pain against clinical practice guidelines: cross-sectional study</article-title>. <source>Front Digit Health</source>. (<year>2025</year>) <volume>7</volume>:<fpage>1574287</fpage>. <pub-id pub-id-type="doi">10.3389/fdgth.2025.1574287</pub-id><pub-id pub-id-type="pmid">40657647</pub-id></mixed-citation></ref>
<ref id="B41"><label>41.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>L</given-names></name> <name><surname>Li</surname> <given-names>J</given-names></name> <name><surname>Zhuang</surname> <given-names>B</given-names></name> <name><surname>Huang</surname> <given-names>S</given-names></name> <name><surname>Fang</surname> <given-names>M</given-names></name> <name><surname>Wang</surname> <given-names>C</given-names></name><etal/></person-group> <article-title>Accuracy of large language models when answering clinical research questions: systematic review and network meta-analysis</article-title>. <source>J Med Internet Res</source>. (<year>2025</year>) <volume>27</volume>:<fpage>e64486</fpage>. <pub-id pub-id-type="doi">10.2196/64486</pub-id><pub-id pub-id-type="pmid">40305085</pub-id></mixed-citation></ref>
<ref id="B42"><label>42.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Borgonovo</surname> <given-names>F</given-names></name> <name><surname>Matsuo</surname> <given-names>T</given-names></name> <name><surname>Petri</surname> <given-names>F</given-names></name> <name><surname>Amin Alavi</surname> <given-names>SM</given-names></name> <name><surname>Mazudie Ndjonko</surname> <given-names>LC</given-names></name> <name><surname>Gori</surname> <given-names>A</given-names></name><etal/></person-group> <article-title>Battle of the bots: solving clinical cases in osteoarticular infections with large language models</article-title>. <source>Mayo Clin Proc Digit Health</source>. (<year>2025</year>) <volume>3</volume>(<issue>3</issue>):<fpage>100230</fpage>. <pub-id pub-id-type="doi">10.1016/j.mcpdig.2025.100230</pub-id><pub-id pub-id-type="pmid">40583928</pub-id></mixed-citation></ref>
<ref id="B43"><label>43.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xie</surname> <given-names>L</given-names></name> <name><surname>Jin</surname> <given-names>Y</given-names></name> <name><surname>Xu</surname> <given-names>L</given-names></name> <name><surname>Chang</surname> <given-names>S</given-names></name> <name><surname>Xu</surname> <given-names>X</given-names></name></person-group>. <article-title>Fusing domain knowledge with a fine-tuned large language model for enhanced molecular property prediction</article-title>. <source>J Chem Theory Comput</source>. (<year>2025</year>) <volume>21</volume>(<issue>14</issue>):<fpage>6743</fpage>&#x2013;<lpage>58</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jctc.5c00605</pub-id><pub-id pub-id-type="pmid">40631446</pub-id></mixed-citation></ref>
<ref id="B44"><label>44.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>P</given-names></name> <name><surname>Wu</surname> <given-names>Y</given-names></name> <name><surname>Jin</surname> <given-names>K</given-names></name> <name><surname>Chen</surname> <given-names>X</given-names></name> <name><surname>He</surname> <given-names>M</given-names></name> <name><surname>Shi</surname> <given-names>D</given-names></name></person-group>. <article-title>DeepSeek-R1 outperforms gemini 2.0 pro, OpenAI o1, and o3-mini in bilingual complex ophthalmology reasoning</article-title>. <source>Adv Ophthalmol Pract Res</source>. (<year>2025</year>) <volume>5</volume>(<issue>3</issue>):<fpage>189</fpage>&#x2013;<lpage>95</lpage>. <pub-id pub-id-type="doi">10.1016/j.aopr.2025.05.001</pub-id><pub-id pub-id-type="pmid">40678192</pub-id></mixed-citation></ref>
<ref id="B45"><label>45.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shirani</surname> <given-names>M</given-names></name></person-group>. <article-title>Comparing the performance of ChatGPT 4o, DeepSeek R1, and gemini 2 pro in answering fixed prosthodontics questions over time</article-title>. <source>J Prosthet Dent</source>. (<year>2025</year>):S0022-3913(25)00400-7. <pub-id pub-id-type="doi">10.1016/j.prosdent.2025.04.038</pub-id></mixed-citation></ref>
<ref id="B46"><label>46.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mohammad-Rahimi</surname> <given-names>H</given-names></name> <name><surname>Setzer</surname> <given-names>FC</given-names></name> <name><surname>Aminoshariae</surname> <given-names>A</given-names></name> <name><surname>Dummer</surname> <given-names>PMH</given-names></name> <name><surname>Duncan</surname> <given-names>HF</given-names></name> <name><surname>Nosrat</surname> <given-names>A</given-names></name></person-group>. <article-title>Artificial intelligence chatbots in endodontic education&#x2014;concepts and potential applications</article-title>. <source>Int Endodontic J</source>. (<year>2025</year>). <pub-id pub-id-type="doi">10.1111/iej.14231</pub-id></mixed-citation></ref>
<ref id="B47"><label>47.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lawson McLean</surname> <given-names>A</given-names></name> <name><surname>Hristidis</surname> <given-names>V</given-names></name></person-group>. <article-title>Evidence-Based analysis of AI chatbots in oncology patient education: implications for trust, perceived realness, and misinformation management</article-title>. <source>J Canc Educ</source>. (<year>2025</year>) <volume>40</volume>(<issue>4</issue>):<fpage>482</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1007/s13187-025-02592-4</pub-id></mixed-citation></ref>
<ref id="B48"><label>48.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Azimi</surname> <given-names>I</given-names></name> <name><surname>Qi</surname> <given-names>M</given-names></name> <name><surname>Wang</surname> <given-names>L</given-names></name> <name><surname>Rahmani</surname> <given-names>AM</given-names></name> <name><surname>Li</surname> <given-names>Y</given-names></name></person-group>. <article-title>Evaluation of LLMs accuracy and consistency in the registered dietitian exam through prompt engineering and knowledge retrieval</article-title>. <source>Sci Rep</source>. (<year>2025</year>) <volume>15</volume>(<issue>1</issue>):<fpage>1506</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-024-85003-w</pub-id><pub-id pub-id-type="pmid">39789057</pub-id></mixed-citation></ref>
<ref id="B49"><label>49.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sonoda</surname> <given-names>Y</given-names></name> <name><surname>Kurokawa</surname> <given-names>R</given-names></name> <name><surname>Hagiwara</surname> <given-names>A</given-names></name> <name><surname>Asari</surname> <given-names>Y</given-names></name> <name><surname>Fukushima</surname> <given-names>T</given-names></name> <name><surname>Kanzawa</surname> <given-names>J</given-names></name><etal/></person-group> <article-title>Structured clinical reasoning prompt enhances LLM&#x2019;s diagnostic capabilities in diagnosis please quiz cases</article-title>. <source>Jpn J Radiol</source>. (<year>2025</year>) <volume>43</volume>(<issue>4</issue>):<fpage>586</fpage>&#x2013;<lpage>92</lpage>. <pub-id pub-id-type="doi">10.1007/s11604-024-01712-2</pub-id><pub-id pub-id-type="pmid">39625594</pub-id></mixed-citation></ref>
<ref id="B50"><label>50.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sivarajkumar</surname> <given-names>S</given-names></name> <name><surname>Kelley</surname> <given-names>M</given-names></name> <name><surname>Samolyk-Mazzanti</surname> <given-names>A</given-names></name> <name><surname>Visweswaran</surname> <given-names>S</given-names></name> <name><surname>Wang</surname> <given-names>Y</given-names></name></person-group>. <article-title>An empirical evaluation of prompting strategies for large language models in zero-shot clinical natural language processing: algorithm development and validation study</article-title>. <source>JMIR Med Inform</source>. (<year>2024</year>) <volume>12</volume>:<fpage>e55318</fpage>. <pub-id pub-id-type="doi">10.2196/55318</pub-id><pub-id pub-id-type="pmid">38587879</pub-id></mixed-citation></ref>
<ref id="B51"><label>51.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mesk&#x00F3;</surname> <given-names>B</given-names></name></person-group>. <article-title>Prompt engineering as an important emerging skill for medical professionals: tutorial</article-title>. <source>J Med Internet Res</source>. (<year>2023</year>) <volume>25</volume>:<fpage>e50638</fpage>. <pub-id pub-id-type="doi">10.2196/50638</pub-id></mixed-citation></ref>
<ref id="B52"><label>52.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Singhal</surname> <given-names>K</given-names></name> <name><surname>Azizi</surname> <given-names>S</given-names></name> <name><surname>Tu</surname> <given-names>T</given-names></name> <name><surname>Mahdavi</surname> <given-names>SS</given-names></name> <name><surname>Wei</surname> <given-names>J</given-names></name> <name><surname>Chung</surname> <given-names>HW</given-names></name><etal/></person-group> <article-title>Large language models encode clinical knowledge</article-title>. <source>Nature</source>. (<year>2023</year>) <volume>620</volume>(<issue>7972</issue>):<fpage>172</fpage>&#x2013;<lpage>80</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-023-06291-2</pub-id><pub-id pub-id-type="pmid">37438534</pub-id></mixed-citation></ref>
<ref id="B53"><label>53.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mendon&#x00E7;a De Moura</surname> <given-names>JD</given-names></name> <name><surname>Fontana</surname> <given-names>CE</given-names></name> <name><surname>Reis Da Silva Lima</surname> <given-names>VH</given-names></name> <name><surname>De Souza Alves</surname> <given-names>I</given-names></name> <name><surname>Andr&#x00E9; De Melo Santos</surname> <given-names>P</given-names></name> <name><surname>De Almeida Rodrigues</surname> <given-names>P</given-names></name></person-group>. <article-title>Comparative accuracy of artificial intelligence chatbots in pulpal and periradicular diagnosis: a cross-sectional study</article-title>. <source>Comput Biol Med</source>. (<year>2024</year>) <volume>183</volume>:<fpage>109332</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2024.109332</pub-id></mixed-citation></ref>
<ref id="B54"><label>54.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ong</surname> <given-names>JCL</given-names></name> <name><surname>Chang</surname> <given-names>SY</given-names></name> <name><surname>William</surname> <given-names>W</given-names></name> <name><surname>Butte</surname> <given-names>AJ</given-names></name> <name><surname>Shah</surname> <given-names>NH</given-names></name> <name><surname>Chew</surname> <given-names>LST</given-names></name><etal/></person-group> <article-title>Ethical and regulatory challenges of large language models in medicine</article-title>. <source>Lancet</source>. (<year>2024</year>) <volume>6</volume>(<issue>6</issue>):<fpage>e428</fpage>&#x2013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1016/S2589-7500(24)00061-X</pub-id></mixed-citation></ref>
<ref id="B55"><label>55.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nguyen</surname> <given-names>VA</given-names></name> <name><surname>Vuong</surname> <given-names>TQT</given-names></name> <name><surname>Nguyen</surname> <given-names>VH</given-names></name></person-group>. <article-title>Benchmarking large-language-model vision capabilities in oral and maxillofacial anatomy: A cross-sectional study</article-title>. <source>PloS one</source>. (<year>2025</year>) <volume>20</volume>(<issue>10</issue>):<fpage>e0335775</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0335775</pub-id><pub-id pub-id-type="pmid">41150678</pub-id></mixed-citation></ref></ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by"><p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3233076/overview">Camila Pinheiro Furquim</ext-link>, IOA Boutique Curitiba, Brazil</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by"><p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/990924/overview">Walter Y. H. Lam</ext-link>, The University of Hong Kong, Hong Kong SAR, China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3128255/overview">Viet Anh Nguyen</ext-link>, Phenikaa University, Vietnam</p></fn>
</fn-group>
</back>
</article>