<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Public Health</journal-id>
<journal-title-group>
<journal-title>Frontiers in Public Health</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Public Health</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2296-2565</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpubh.2026.1776697</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>The information challenge in public health crises: a study on the reliability and readability of information provided by large language model for thunderstorm asthma</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Zhu</surname>
<given-names>Zhenliang</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Feng</surname>
<given-names>Yanghui</given-names>
</name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Cao</surname>
<given-names>Feng</given-names>
</name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3315306"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>School of Nursing, Zhejiang Chinese Medical University</institution>, <city>Hangzhou</city>, <country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>Department of Critical Care Medicine, Zhejiang Hospital</institution>, <city>Hangzhou</city>, <country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>&#x002A;</label>Correspondence: Feng Cao, <email xlink:href="mailto:cf20150715@163.com">cf20150715@163.com</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-18">
<day>18</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>14</volume>
<elocation-id>1776697</elocation-id>
<history>
<date date-type="received">
<day>28</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>02</day>
<month>02</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>06</day>
<month>02</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2026 Zhu, Feng and Cao.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Zhu, Feng and Cao</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-18">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Background</title>
<p>The advent of LLM (large language model) has seen extensive application in health information consultation, enabling interactive responses to complex queries; however, their reliability and readability warrant further investigation. This study aims to assess the reliability and readability of cross-disciplinary responses generated by artificial intelligence platforms regarding thunderstorm asthma, including ChatGPT-4, Deepseek-V3.2-V3.2, Perplexity Pro, and Microsoft Copilot.</p>
</sec>
<sec>
<title>Methods</title>
<p>This study uses Google Trends to identify and filter topic-specific information on thunderstorm asthma. This study analyses cross-disciplinary responses generated by ChatGPT-4, Deepseek-V3.2, Perplexity Pro, and Microsoft Copilot in response to conversational inputs. The 29 selected responses exhibit varying levels of meteorological forecasting accuracy concerning thunderstorms, as well as prevalent themes related to asthma symptomatology and therapeutic interventions. The study employed reliability assessment tools, including the DISCERN instrument, the Ensuring Quality Information for Patients Scale (EQIP), the JAMA benchmarks, and the Global Quality Scoring (GQS), in conjunction with six authoritative readability metrics&#x2014;namely, the Automated Readability Index (ARI), Coleman-Liau Grade Level (CL), Flesch&#x2013;Kincaid Grade Level (FKGL), Flesch Reading Ease Score (FRES), Gunning Fog Index (GFI), and SMOG&#x2014;to enable a comprehensive evaluation.</p>
</sec>
<sec>
<title>Results</title>
<p>Research findings indicate statistically significant differences in the reliability of various artificial intelligence programmes when responding to complex interdisciplinary information queries. Microsoft Copilot demonstrates superior performance in terms of information reliability and structural quality, consistently achieving higher scores than ChatGPT-4-4o and Perplexity Pro, thereby providing more dependable information. However, all programme-generated informational responses were excessively complex for the general public, failing to meet sixth-grade reading comprehension standards, as the majority of outputs were written at a secondary education level or higher.</p>
</sec>
<sec>
<title>Conclusion</title>
<p>This study reveals that while LLM demonstrate some reliability in handling complex health consultations, none meet the recommended readability benchmark for a sixth-grade reading level. Future efforts should focus on improving the reliability and readability of LLM generated health information to enhance comprehension amongst broader audiences.</p>
</sec>
</abstract>
<kwd-group>
<kwd>information response</kwd>
<kwd>large language model</kwd>
<kwd>readability</kwd>
<kwd>reliability</kwd>
<kwd>thunderstorm asthma</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement>
</funding-group>
<counts>
<fig-count count="2"/>
<table-count count="4"/>
<equation-count count="6"/>
<ref-count count="33"/>
<page-count count="9"/>
<word-count count="6224"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Disaster and Emergency Medicine</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="sec1">
<title>Background</title>
<p>Thunderstorm asthma represents a public health phenomenon triggered under specific meteorological conditions. Thunderstorms increase the concentration of airborne allergens in circulating air masses, leading to acute exacerbations or onset of asthma characterised by respiratory distress in susceptible populations. The majority of cases occur in the 30&#x2013;50 age group, with onset predominantly during spring and early summer. Between 36 and 56% of affected individuals had no prior asthma diagnosis.</p>
<p>Since the initial documentation of thunderstorm asthma events in Birmingham, UK, in the early 1980s (<xref ref-type="bibr" rid="ref1">1</xref>), globally, nearly 30 thunderstorm asthma events have been reported, resulting in more than 7,000 hospitalisations and at least 18 fatalities (<xref ref-type="bibr" rid="ref2 ref3 ref4 ref5 ref6">2&#x2013;6</xref>). The thunderstorm asthma event that occurred in Melbourne, Australia, in 2016 had significant and far-reaching impacts (<xref ref-type="bibr" rid="ref5">5</xref>). The sudden surge in cases overwhelmed healthcare services, severely compromising the delivery of medical care. Hospital admissions increased tenfold, ultimately resulting in ten fatalities attributable to thunderstorm asthma. Shorter durations of continuous event sequences are associated with a higher likelihood of abrupt outbreaks. Comparable meteorological conditions have created favourable conditions for pollen rupture and particulate dispersion, which are clearly evident as primary triggers in thunderstorm asthma events associated with environmental changes.</p>
<p>In the context of global climate change, the environmental challenges we face are becoming increasingly severe. In 2022, a government assessment report confirmed that climate change has contributed to increased pollen concentrations, accelerated dispersal rates, and a rise in extreme weather events, posing a direct threat to respiratory health, including asthma incidence (<xref ref-type="bibr" rid="ref7">7</xref>).</p>
<p>In the current global context marked by an uneven distribution of limited medical resources and compressed timeframes (<xref ref-type="bibr" rid="ref8">8</xref>), the emergence of LLM has diversified the sources of health-related information (<xref ref-type="bibr" rid="ref9">9</xref>, <xref ref-type="bibr" rid="ref10">10</xref>). Artificial intelligence conversational applications, such as ChatGPT-4, Deepseek-V3.2, Perplexity Pro, and Microsoft Copilot, are now widely deployed. The multiplicity of information acquisition channels constitutes a potential source of health information, making the analysis of their reliability and readability critically important. The reliability of information affects audience cognition and decision-making regarding health behaviours. Meanwhile, optimising the readability of information to align with sixth-grade reading comprehension levels enhances audience understanding (<xref ref-type="bibr" rid="ref11">11</xref>, <xref ref-type="bibr" rid="ref12">12</xref>).</p>
<p>However, certain limitations remain in addressing the occurrence, early warning mechanisms, and effective prevention and control measures for thunderstorm asthma. As an emerging public health incident, the prevention, effective containment, and reduction of mortality risks amongst vulnerable populations within a limited timeframe are of significant importance. Previous studies have utilised tweets from social media platforms such as Twitter to develop early detection systems for acute health events, including thunderstorm asthma (<xref ref-type="bibr" rid="ref13">13</xref>).</p>
<p>This study aims to conduct an in-depth analysis of the following two research questions: (1) Reliability Analysis of Information Provided by LLM on &#x201C;Thunderstorm Asthma,&#x201D; and (2) Does the readability of information provided by LLM on &#x201C;Thunderstorm Asthma&#x201D; meet the recommended standards for health education materials?</p>
</sec>
<sec sec-type="materials|methods" id="sec2">
<title>Materials and methods</title>
<sec id="sec3">
<title>Question source and processing</title>
<p>To achieve comprehensive retrieval coverage, this study determines the standardised medical term for &#x201C;Thunderstorm Asthma&#x201D; through the Medical Subject Headings (MeSH) database. A theme is a collection of terminology that shares the same conceptual framework across languages and may encompass the most relevant information (<xref ref-type="bibr" rid="ref14">14</xref>). Ensure the comprehensiveness and accuracy of the research. The following MeSH entry terms have been identified: &#x201C;Thunderstorm Asthma&#x201D;, &#x201C;Asthma Thunderstorm&#x201D;, &#x201C;Asthma&#x201D;, &#x201C;Tornadoes&#x201D;, &#x201C;Asthma storm&#x201D;, &#x201C;Thunderstorm and Health&#x201D;, &#x201C;Thunderstorm Warning&#x201D;.</p>
<p>Google Trends serves as a dynamic indicator of search behaviour and public interest over time (<xref ref-type="bibr" rid="ref15">15</xref>). In this study, relevant standardised terms were input into the Google Trends system to capture, to the greatest extent possible, the global search trends related to &#x201C;Asthma Thunderstorm,&#x201D; thereby enabling the identification of the most relevant search queries associated with the &#x201C;Asthma Thunderstorm&#x201D; topic. To conduct a five-year longitudinal analysis (2020&#x2013;2025) of global search patterns and public attention (parameters: Worldwide; All categories; Popular searches; time frame: September 2020 to September 2025). To make up for the limitations of search engine data, respiratory critical care experts excluded duplicate and irrelevant information. After a series of screenings, 24 representative results related to &#x201C;Thunderstorm Asthma&#x201D; were finally determined to be used for the model performance benchmark information assessment of the LLM (see <xref ref-type="table" rid="tab1">Table 1</xref>).</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Google trends data, 25 keywords related to thunderstorm asthma worldwide from 2020 to 2025.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" colspan="2">Thunderstorm asthma: (2020/09&#x2013;2025/09, worldwide)</th>
</tr>
<tr>
<th align="left" valign="top">Top</th>
<th align="center" valign="top">Relevance</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">1. Melbourne thunderstorm</td>
<td align="center" valign="top">100</td>
</tr>
<tr>
<td align="left" valign="top">2. Asthma Melbourne</td>
<td align="center" valign="top">99</td>
</tr>
<tr>
<td align="left" valign="top">3. Melbourne thunderstorm asthma</td>
<td align="center" valign="top">99</td>
</tr>
<tr>
<td align="left" valign="top">4. Thunderstorm warning</td>
<td align="center" valign="top">74</td>
</tr>
<tr>
<td align="left" valign="top">5. Asthma thunderstorm warning</td>
<td align="center" valign="top">74</td>
</tr>
<tr>
<td align="left" valign="top">6. Victoria thunderstorm asthma</td>
<td align="center" valign="top">44</td>
</tr>
<tr>
<td align="left" valign="top">7. Thunderstorm asthma today</td>
<td align="center" valign="top">31</td>
</tr>
<tr>
<td align="left" valign="top">8. Thunderstorm asthma warning Victoria</td>
<td align="center" valign="top">31</td>
</tr>
<tr>
<td align="left" valign="top">9. What is &#x201C;thunderstorm asthma&#x201D;</td>
<td align="center" valign="top">23</td>
</tr>
<tr>
<td align="left" valign="top">10.what is &#x201C;asthma&#x201D;</td>
<td align="center" valign="top">23</td>
</tr>
<tr>
<td align="left" valign="top">11.what is &#x201C;thunderstorm&#x201D;</td>
<td align="center" valign="top">23</td>
</tr>
<tr>
<td align="left" valign="top"><bold>12. Pollen (removed)</bold></td>
<td align="center" valign="top"><bold>22</bold></td>
</tr>
<tr>
<td align="left" valign="top">13. Thunderstorm asthma Australia</td>
<td align="center" valign="top">17</td>
</tr>
<tr>
<td align="left" valign="top">14. Melbourne weather</td>
<td align="center" valign="top">15</td>
</tr>
<tr>
<td align="left" valign="top">15. Asthma symptoms</td>
<td align="center" valign="top">15</td>
</tr>
<tr>
<td align="left" valign="top">16. Thunderstorm asthma symptoms</td>
<td align="center" valign="top">14</td>
</tr>
<tr>
<td align="left" valign="top">17. Melbourne weather thunderstorm asthma</td>
<td align="center" valign="top">14</td>
</tr>
<tr>
<td align="left" valign="top">18. Thunderstorm asthma warning today</td>
<td align="center" valign="top">14</td>
</tr>
<tr>
<td align="left" valign="top">19. Thunderstorm asthma warning Melbourne</td>
<td align="center" valign="top">13</td>
</tr>
<tr>
<td align="left" valign="top">20. Thunderstorm asthma forecast</td>
<td align="center" valign="top">12</td>
</tr>
<tr>
<td align="left" valign="top">21. Pollen count</td>
<td align="center" valign="top">11</td>
</tr>
<tr>
<td align="left" valign="top">22. Melbourne pollen</td>
<td align="center" valign="top">9</td>
</tr>
<tr>
<td align="left" valign="top">23. Asthma storm</td>
<td align="center" valign="top">8</td>
</tr>
<tr>
<td align="left" valign="top">24. Epidemic thunderstorm asthma</td>
<td align="center" valign="top">7</td>
</tr>
<tr>
<td align="left" valign="top">25. Thunderstorm asthma attack</td>
<td align="center" valign="top">6</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>To improve the systematic assessment, the final search results are summarised. This study selected four mainstream LLM, including ChatGPT-4-4o, Deepseek-V3.2, Perplexity Pro, and Microsoft Copilot (version: 1.240.110.0), to conduct a benchmark performance analysis on the quality of information responses provided by LLM. Large language model have a certain degree of exclusivity. Their internal weight information, training language databases, etc. are not made public. It is impossible to determine whether the training data sets of LLM contain relevant public health information on &#x201C;thunderstorm asthma.&#x201D; To avoid data contamination that may result from the opacity of data structures and prevent any potential bias risks, all browser-related data are cleared before each prompt word is input to prevent artificial intelligence from relying on cached data or internal algorithms from the previous interaction.</p>
<sec id="sec4">
<title>Reliability evaluation</title>
<p>To ensure a thorough assessment of the benchmark performance advantages of LLM, this study, based on an optimised process, is oriented towards non-professional patients, providing benchmark information response support without requiring users to have professional levels. Four most prominent LLM were chosen to be analysed in relation to particular aspects of the quality of information, namely DISCERN (content integrity), EQIP (presentation clarity), Global Quality Score (narrative coherence), and JAMA benchmarks (source transparency) evaluate the reliability of the benchmark performance of responses based on LLM.</p>
<p>The DISCERN instrument, developed in 1999 through a collaboration between the University of Oxford and the National Health Service (NHS) in the United Kingdom, is designed to assess the quality of information on treatment options provided by health-related websites (<xref ref-type="bibr" rid="ref16">16</xref>). The instrument consists of three sections and contains 16 items. Scoring is interpreted as follows: a total score of 63&#x2013;75 indicates excellent quality, 51&#x2013;62 good quality, 39&#x2013;50 average quality, 27&#x2013;38 poor quality, and 16&#x2013;26 inferior quality.</p>
<p>The EQIP tool evaluates the quality of documents on health information websites by assessing content, data identification, and structural features to measure their reliability, usability, and effectiveness (<xref ref-type="bibr" rid="ref17">17</xref>). The instrument consists of 20 items, each rated on a four-point scale (&#x201C;Yes,&#x201D; &#x201C;Partially,&#x201D; &#x201C;No,&#x201D; &#x201C;Not Applicable&#x201D;). The EQIP score is calculated as a percentage, and the overall mean EQIP score is categorised into predefined quality classifications.</p>
<p>The GQS provides a comprehensive assessment of the quality of health-related content. The GQS employs a five-point scale to evaluate online health information on quality, information flow, and usability: 1&#x202F;=&#x202F;inferior quality; 2&#x202F;=&#x202F;poor quality; 3&#x202F;=&#x202F;average quality; 4&#x202F;=&#x202F;good quality; and 5&#x202F;=&#x202F;excellent quality.</p>
<p>JAMA serves as the core framework for evaluating the appropriateness of professional health information, using descriptive metrics to assess authorship, sourcing, referencing, timeliness, and disclosure of conflicts of interest (<xref ref-type="bibr" rid="ref18">18</xref>). Each factor is scored on a scale from 0 to 1, yielding a total score ranging from 0 to 4.</p>
<p>All evaluations were conducted independently by authors FC and ZLZ following a standardised procedure. ICC analysis was used to assess the consistency amongst all raters, ensuring the reliability of the final scores between raters (<xref ref-type="bibr" rid="ref19">19</xref>). In the event of a disagreement, the senior researcher YHF will assess the final score.</p>
</sec>
<sec id="sec5">
<title>Readability evaluation</title>
<p>Readability is assessed using six widely recognised indices&#x2014;ARI, CL, FKGL, FRES, GFI, and SMOG&#x2014;through an online calculator.<xref ref-type="fn" rid="fn0001"><sup>1</sup></xref> Based on considerations of health literacy and patient safety, and to facilitate patient comprehension, the American Medical Association (AMA) and the National Institutes of Health (NIH) recommend that healthcare materials be written at a sixth-grade reading level (<xref ref-type="bibr" rid="ref20">20</xref>). A FRES score of &#x2265;80.0 indicates easily understandable text, while scores below 6.0 on other readability metrics indicate high comprehensibility.</p>
<p>Automated Readability Index (ARI)18:</p>
<disp-formula id="E1">
<mml:math id="M1">
<mml:mn>4.71</mml:mn>
<mml:mspace width="0.25em"/>
<mml:mo stretchy="true">(</mml:mo>
<mml:mtext>characterswords</mml:mtext>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:mn>0.5</mml:mn>
<mml:mspace width="0.25em"/>
<mml:mo stretchy="true">(</mml:mo>
<mml:mtext>wordssentences</mml:mtext>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>&#x2013;</mml:mo>
<mml:mn>21.43</mml:mn>
</mml:math>
</disp-formula>
<p>Flesch Reading Ease Score (FRES)18:</p>
<disp-formula id="E2">
<mml:math id="M2">
<mml:mn>206.835</mml:mn>
<mml:mo>&#x2013;</mml:mo>
<mml:mn>1.015</mml:mn>
<mml:mo stretchy="true">&#x2329;</mml:mo>
<mml:mo>/</mml:mo>
<mml:mtext>span</mml:mtext>
<mml:mo stretchy="true">&#x232A;</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mtext>wordssentences</mml:mtext>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>&#x2013;</mml:mo>
<mml:mn>84.6</mml:mn>
<mml:mspace width="0.25em"/>
<mml:mo stretchy="true">(</mml:mo>
<mml:mtext>syllableswords</mml:mtext>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</disp-formula>
<p>Gunning Fog Index (GFI)19:</p>
<disp-formula id="E3">
<mml:math id="M3">
<mml:mn>0.4</mml:mn>
<mml:mspace width="0.25em"/>
<mml:mo stretchy="true">[</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mtext>wordssentences</mml:mtext>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:mn>100</mml:mn>
<mml:mspace width="0.25em"/>
<mml:mo stretchy="true">(</mml:mo>
<mml:mtext>complexwordswords</mml:mtext>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo stretchy="true">]</mml:mo>
</mml:math>
</disp-formula>
<p>Flesch&#x2013;Kincaid Grade Level (FKGL)19:</p>
<disp-formula id="E4">
<mml:math id="M4">
<mml:mn>0.39</mml:mn>
<mml:mspace width="0.25em"/>
<mml:mo stretchy="true">(</mml:mo>
<mml:mtext>wordssentences</mml:mtext>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:mn>11.8</mml:mn>
<mml:mspace width="0.25em"/>
<mml:mo stretchy="true">(</mml:mo>
<mml:mtext>syllableswords</mml:mtext>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>&#x2013;</mml:mo>
<mml:mn>15.59</mml:mn>
</mml:math>
</disp-formula>
<p>Coleman-Liau Index (CL)20:</p>
<disp-formula id="E5">
<mml:math id="M5">
<mml:mn>5.89</mml:mn>
<mml:mspace width="0.25em"/>
<mml:mo stretchy="true">(</mml:mo>
<mml:mtext>characterswords</mml:mtext>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>&#x2013;</mml:mo>
<mml:mn>0.3</mml:mn>
<mml:mspace width="0.25em"/>
<mml:mo stretchy="true">(</mml:mo>
<mml:mtext>sentenceswords</mml:mtext>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>&#x2013;</mml:mo>
<mml:mn>15.8</mml:mn>
</mml:math>
</disp-formula>
<p>Simple Measure of Gobbledygook (SMOG)21:</p>
<disp-formula id="E6">
<mml:math id="M6">
<mml:mn>14.430</mml:mn>
<mml:mo>&#x00D7;</mml:mo>
<mml:mtext>polysyllables</mml:mtext>
<mml:mo>&#x00D7;</mml:mo>
<mml:mn>30</mml:mn>
<mml:mtext>sentences</mml:mtext>
<mml:mo>+</mml:mo>
<mml:mn>3.1291</mml:mn>
</mml:math>
</disp-formula>
</sec>
</sec>
<sec id="sec6">
<title>Statistical analysis</title>
<sec id="sec7">
<title>Reliability and readability analysis</title>
<p>This study employs a multi-dimensional assessment framework, including DISCERN, EQIP, JAMA, and GQS, to benchmark the performance of four LLM based response systems&#x2014;ChatGPT-4-4o, Deepseek-V3.2, Perplexity Pro, and Microsoft Copilot (Version: 1.240.110.0). Given that the rating data did not conform to a normal distribution, the statistics were described by the median and interquartile range (IQR), and the non-parametric Kruskal-Wallis test was used to compare the statistical differences in reliability scores amongst different LLM.</p>
<p>In readability analysis and assessment, readability scores for each LLM information responses are calculated based on six readability metrics: Automated Readability Index (ARI), Coleman-Liau Index (CL), Flesch&#x2013;Kincaid Grade Level (FKGL), Flesch Reading Ease (FRES), Gunning Fog Index (GFI), and Simple Measure of Gobbledygook (SMOG). Since the distribution of readability score data fails to meet the normality assumption required for parametric tests, the Kruskal&#x2013;Wallis non-parametric test was also employed to examine statistically significant differences in readability scores across the four programmes.</p>
</sec>
</sec>
<sec id="sec8">
<title>Comparison with 6th-grade benchmarks</title>
<p>The Wilcoxon signed-rank test was used to evaluate the information responses of LLM robots against the reading comprehension level of sixth graders. The readability scores generated by artificial intelligence were compared with the established reference values for sixth graders to analyse the statistical differences.</p>
<p>All tests were two sided statistical tests and the predetermined level of significance was <italic>p</italic>&#x202F;&#x003C;&#x202F;0.05. The data processing, statistical analysis, and visualisation were done using R (4.5.1).</p>
</sec>
</sec>
<sec sec-type="results" id="sec9">
<title>Results</title>
<sec id="sec10">
<title>Reliability analysis</title>
<p>This study utilised the DISCERN, EQIP, JAMA, and GQS assessment tools to evaluate the reliability of LLM generated responses. The reliability index of each LLM programme is measured by descriptive statistics (median [Q1, Q3]) (see <xref ref-type="table" rid="tab2">Table 2</xref>). The inter-rater intraclass correlation coefficient amongst all evaluators was 0.813 (0.767&#x2013;0.864). In the model stability assessment, to ensure consistency in the generated informational responses and mitigate response randomness, all LLM bots were operated under deterministic parameters (temperature&#x202F;=&#x202F;0.0) and executed three times independently. The results were entirely consistent, yielding a stability coefficient of 100%. The Kruskal-Wallis test results indicate statistically significant differences in DISCERN, EQIP, and JAMA scores across the different LLM systems. In contrast, no statistically significant difference was observed for GQS scores (<italic>p</italic>&#x202F;=&#x202F;0.843).</p>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>Reliability score of artificial intelligence programme information response.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th>Row type</th>
<th align="center" valign="top">DISCERN (Q1, Q3)</th>
<th align="center" valign="top">EQIP (Q1, Q3)</th>
<th align="center" valign="top">GQS (Q1, Q3)</th>
<th align="center" valign="top">JAMA (Q1, Q3)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="bottom">ChatGPT-4_4o</td>
<td align="center" valign="bottom">40 (36.47)</td>
<td align="center" valign="bottom">65 (60, 65)</td>
<td align="center" valign="bottom">4 (3, 4)</td>
<td align="center" valign="bottom">1 (0, 2)</td>
</tr>
<tr>
<td align="left" valign="bottom">Microsoft Copilot</td>
<td align="center" valign="bottom">48.5 (46.5, 50)</td>
<td align="center" valign="bottom">70 (65, 71.25)</td>
<td align="center" valign="bottom">4 (3, 4)</td>
<td align="center" valign="bottom">2 (0, 2)</td>
</tr>
<tr>
<td align="left" valign="bottom">Perplexity_Pro</td>
<td align="center" valign="bottom">45 (42.75, 47)</td>
<td align="center" valign="bottom">62.5 (60, 65)</td>
<td align="center" valign="bottom">4 (3, 4)</td>
<td align="center" valign="bottom">1 (0, 2)</td>
</tr>
<tr>
<td align="left" valign="bottom">Deepseek-V3.2</td>
<td align="center" valign="bottom">47 (46, 50)</td>
<td align="center" valign="bottom">60 (60, 60)</td>
<td align="center" valign="bottom">4 (3, 4)</td>
<td align="center" valign="bottom">0 (0, 0)</td>
</tr>
<tr>
<td align="left" valign="middle">P</td>
<td align="center" valign="middle">&#x003C;0.001</td>
<td align="center" valign="middle">&#x003C;0.001</td>
<td align="center" valign="middle">0.843</td>
<td align="center" valign="middle">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="middle">H</td>
<td align="center" valign="middle">25.987</td>
<td align="center" valign="middle">36.115</td>
<td align="center" valign="middle">0.826</td>
<td align="center" valign="middle">25.213</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The DISCERN analysis reveals significant differences in the benchmark performance of information responses amongst various language models. The Perplexity Pro [45 (42.75, 47)] demonstrated a moderate level of performance, whereas ChatGPT-4-4o [40 (36, 47)] exhibited comparatively inferior information quality. This suggests significant variability in response reliability amongst LLM based conversational agents during information retrieval and analytical tasks. In contrast, the information responses generated by ChatGPT-4-4o exhibit a baseline performance of low quality, attributable to issues with source reliability, support capabilities, and decision-making processes. Although Microsoft Copilot [48.5 (46.5, 50)] and Deepseek-V3.2 [47 (46, 50)] received relatively higher scores and delivered moderately structured responses, their baseline performance is still rated as average overall (see <xref ref-type="fig" rid="fig1">Figure 1</xref>).</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>LLM programme response reliability ratings.</p>
</caption>
<graphic xlink:href="fpubh-14-1776697-g001.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Bar chart comparing mean reliability scores of four chatbots&#x2014;ChatGPT_4o, Copilot, deepseek, and Perplexity_Pro&#x2014;across DISCERN, EQIP, GQS, and JAMA metrics; Copilot achieves the highest scores in DISCERN, EQIP, and JAMA.</alt-text>
</graphic>
</fig>
<p>The EQIP results demonstrate that all evaluated LLM based agents received favourable quality assessments. Microsoft Copilot achieved the highest score [70 (65, 71.25)], while Deepseek-V3.2 received the lowest [60 (60, 60)]. All LLM based systems demonstrated satisfactory performance in terms of overall structure and clarity, with their comprehensive performance ratings evaluated as &#x201C;good.&#x201D; However, deficiencies were noted in the provision of detailed source information, textual readability, and balanced multi-perspective analysis (see <xref ref-type="fig" rid="fig1">Figure 1</xref>).</p>
<p>The GQS scoring results indicated no statistically significant differences. The performance of artificial intelligence in providing basic health information was moderately satisfactory, with an overall favourable impression, and the assessed LLM systems were rated as good (median&#x202F;=&#x202F;4.0) (see <xref ref-type="fig" rid="fig1">Figure 1</xref>).</p>
<p>The JAMA rating scale revealed a statistically significant difference in outcomes, with Deepseek-V3.2 (median score: 0 [0, 0]) and Microsoft Copilot (median score: 2 [0, 2]) demonstrating distinct performance levels. The performance of LLM in adhering to evidence-based medicine standards reveals significant deficiencies in providing reliable author-related information and disclosures, particularly regarding authorship attribution and conflict-of-interest statements (see <xref ref-type="fig" rid="fig1">Figure 1</xref>).</p>
<p>To delineate the baseline performance disparities in information response capabilities amongst LLM, a Dunn&#x2019;s test was employed for pairwise comparative analysis, thereby elucidating the inter-model performance variations (see <xref ref-type="table" rid="tab3">Table 3</xref>).</p>
<table-wrap position="float" id="tab3">
<label>Table 3</label>
<caption>
<p>Test results of Dunn given in the form of reliability scores (<italic>p</italic>-values).</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" char="&#x00D7;">Comparison</th>
<th align="center" valign="top">DISCERN</th>
<th align="center" valign="top">EQIP</th>
<th align="center" valign="top">GQS</th>
<th align="center" valign="top">JAMA</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="bottom" char="&#x00D7;">ChatGPT-4_4o &#x2013; Microsoft Copilot</td>
<td align="center" valign="bottom">0.000</td>
<td align="center" valign="bottom">0.000</td>
<td align="center" valign="bottom">1.000</td>
<td align="center" valign="bottom">0.219</td>
</tr>
<tr>
<td align="left" valign="bottom" char="&#x00D7;">ChatGPT-4_4o &#x2013; Deepseek-V3.2</td>
<td align="center" valign="bottom">0.000</td>
<td align="center" valign="bottom">0.084</td>
<td align="center" valign="bottom">0.974</td>
<td align="center" valign="bottom">0.001</td>
</tr>
<tr>
<td align="left" valign="bottom" char="&#x00D7;">Microsoft Copilot &#x2013; Deepseek-V3.2</td>
<td align="center" valign="bottom">0.522</td>
<td align="center" valign="bottom">0.000</td>
<td align="center" valign="bottom">0.949</td>
<td align="center" valign="bottom">0.000</td>
</tr>
<tr>
<td align="left" valign="bottom" char="&#x00D7;">ChatGPT-4_4o &#x2013; Perplexity_Pro</td>
<td align="center" valign="bottom">0.201</td>
<td align="center" valign="bottom">0.547</td>
<td align="center" valign="bottom">1.000</td>
<td align="center" valign="bottom">1.000</td>
</tr>
<tr>
<td align="left" valign="bottom" char="&#x00D7;">Microsoft Copilot &#x2013; Perplexity_Pro</td>
<td align="center" valign="bottom">0.004</td>
<td align="center" valign="bottom">0.000</td>
<td align="center" valign="bottom">1.000</td>
<td align="center" valign="bottom">0.274</td>
</tr>
<tr>
<td align="left" valign="bottom" char="&#x00D7;">Deepseek-V3.2 &#x2013; Perplexity_Pro</td>
<td align="center" valign="bottom">0.023</td>
<td align="center" valign="bottom">0.228</td>
<td align="center" valign="bottom">1.000</td>
<td align="center" valign="bottom">0.002</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>On the DISCERN instrument for assessing information reliability and quality, ChatGPT-4-4o demonstrated significantly superior performance compared to Microsoft Copilot (<italic>p</italic>&#x202F;&#x003C;&#x202F;0.001) and Deepseek-V3.2 (<italic>p</italic>&#x202F;&#x003C;&#x202F;0.001). Meanwhile, Perplexity Pro demonstrated significantly superior performance compared to Microsoft Copilot (<italic>p</italic>&#x202F;=&#x202F;0.004) and Deepseek-V3.2 (<italic>p</italic>&#x202F;=&#x202F;0.023). However, no statistically significant differences were observed between ChatGPT-4-4o and Perplexity Pro (<italic>p</italic>&#x202F;=&#x202F;0.201), nor between Microsoft Copilot and Deepseek-V3.2 (<italic>p</italic>&#x202F;=&#x202F;0.522).</p>
<p>On the Evaluation of the Quality of Information Provision (EQIP) scale, the differences between the vast majority of model pairs did not reach statistical significance (<italic>p</italic>&#x202F;&#x003E;&#x202F;0.05).</p>
<p>In the evaluation using the Global Quality Score (GQS) scale, pairwise comparisons amongst all models did not reveal statistically significant differences (all <italic>p</italic>-values &#x003E;0.999), indicating that the outputs of all models were perceived to be of comparable quality in terms of overall perceptual assessment.</p>
<p>In assessments conducted using the JAMA scale to evaluate information transparency, the performance of Deepseek-V3.2 was significantly inferior to all other models. By contrast, no statistically significant differences were observed in pairwise comparisons amongst ChatGPT-4-4o, Microsoft Copilot, and Perplexity Pro (all <italic>p</italic>&#x202F;&#x003E;&#x202F;0.05).</p>
<p>This study reveals that model performance is critically dependent on the selected evaluation framework. It further confirms the pervasive challenge faced by current LLM based systems in simultaneously ensuring the quality of structured information provision and adhering to conventional medical information transparency standards.</p>
</sec>
<sec id="sec11">
<title>Readability analysis</title>
<p>This study employs six widely recognised readability assessment tools: the ARI, FRES, Gunning-Fog, FKGL, Coleman-Liau, and SMOG indices. The median readability scores, presented as median (Q1, Q3) (see <xref ref-type="table" rid="tab4">Table 4</xref>).</p>
<table-wrap position="float" id="tab4">
<label>Table 4</label>
<caption>
<p>Readability score of artificial intelligence programme response.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Programme (Q1, Q3)</th>
<th align="center" valign="top">ARIa</th>
<th align="center" valign="top">CL</th>
<th align="center" valign="top">FKGL</th>
<th align="center" valign="top">FRES</th>
<th align="center" valign="top">GFI</th>
<th align="center" valign="top">SMOG</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">6th grade level</td>
<td align="center" valign="middle">6</td>
<td align="center" valign="middle">6</td>
<td align="center" valign="middle">6</td>
<td align="center" valign="middle">80&#x2013;90</td>
<td align="center" valign="middle">6</td>
<td align="center" valign="middle">6</td>
</tr>
<tr>
<td align="left" valign="middle">ChatGPT-4-4o</td>
<td align="center" valign="middle">12.78 (11.35, 15.62)</td>
<td align="center" valign="middle">13.85 (12.45, 15.29)</td>
<td align="center" valign="middle">10.37 (9.08, 12.97)</td>
<td align="center" valign="middle">40.50 (36.50, 46.75)</td>
<td align="center" valign="middle">11.65 (10.70, 12.70)</td>
<td align="center" valign="middle">9.89 (8.16, 11.40)</td>
</tr>
<tr>
<td align="left" valign="middle">Microsoft Copilot</td>
<td align="center" valign="middle">12.45 (11.32, 13.30)</td>
<td align="center" valign="middle">14.17 (12.88, 14.85)</td>
<td align="center" valign="middle">10.15 (9.17, 10.76)</td>
<td align="center" valign="middle">44.50 (39.00, 51.50)</td>
<td align="center" valign="middle">11.20 (10.28, 12.30)</td>
<td align="center" valign="middle">9.23 (8.72, 9.85)</td>
</tr>
<tr>
<td align="left" valign="middle">Perplexity Pro</td>
<td align="center" valign="middle">17.75 (15.19, 18.93)</td>
<td align="center" valign="middle">16.48 (15.49, 17.26)</td>
<td align="center" valign="middle">15.13 (12.72, 16.03)</td>
<td align="center" valign="middle">27.00 (24.00, 36.25)</td>
<td align="center" valign="middle">13.65 (12.18, 14.50)</td>
<td align="center" valign="middle">12.57 (11.26, 13.45)</td>
</tr>
<tr>
<td align="left" valign="middle">Deepseek-V3.2</td>
<td align="center" valign="middle">13.52 (12.82, 15.41)</td>
<td align="center" valign="middle">14.03 (13.16, 14.86)</td>
<td align="center" valign="middle">11.40 (10.37, 12.05)</td>
<td align="center" valign="middle">43.00 (36.75, 45.25)</td>
<td align="center" valign="middle">11.90 (11.28, 12.73)</td>
<td align="center" valign="middle">10.01 (9.49, 11.03)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Data are presented as median (interquartile range, IQR). The recommended scores for the 6th-grade reading level are provided as a reference. ARI, Automated Readability Index; CL, Coleman-Liau Index; FKGL, Flesch&#x2013;Kincaid Grade Level; FRES, Flesch Reading Ease Score; GFI, Gunning Fog Index; SMOG, Simple Measure of Gobbledygook.</p>
</table-wrap-foot>
</table-wrap>
<p>The results indicate that all analysed metrics for evaluating the readability of responses generated by major LLM &#x2014;namely ARI, GFI, FKGL, CL, and SMOG&#x2014;significantly surpassed the recommended benchmark for sixth-grade reading comprehension. The FRES score falls significantly below the benchmark range of 80&#x2013;90 corresponding to the 6th-grade reading level. Even the top-performing Microsoft Copilot achieves a median score of merely 45.25, remaining within the difficult readability range. The results from ARI, GFI, FKGL, CL, and SMOG assessments collectively indicate that the text&#x2019;s reading difficulty aligns with that of upper middle-grade levels. The standard information responses of the LLM robot evaluated in this study exhibit substantial readability challenges, with a comprehension level exceeding that of the general public (sixth-grade equivalent), thereby imposing certain limitations on vulnerable populations with low digital health literacy (see <xref ref-type="fig" rid="fig2">Figure 2</xref>).</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>Mean readability scores of the LLM programme responds indexes. Red line indicates the 6th-grade level which is the highest recommended reading level for patient education materials. ARI, Automated Readability Index; GFI, Gunning Fog Index; FKGL, Flesch&#x2013;Kincaid Grade Level; CL, Coleman-Liau Index; SMOG, Simple Measure of Gobbledygook; FRES, Flesch Reading Ease Score.</p>
</caption>
<graphic xlink:href="fpubh-14-1776697-g002.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Bar chart grid compares mean readability scores among four chatbot models (ChatGPT_4o, Copilot, deepseek, Perplexity_Pro) using six metrics: ARI, CL, FKGL, GFI, SMOG, and FRES. Perplexity_Pro consistently has the highest scores for all metrics except FRES, where it scores lowest. Copilot has the lowest scores for ARI, FKGL, GFI, and SMOG, but the highest for FRES. Red dashed lines represent reference thresholds for each metric. Each metric is represented in a separate panel, colored bars denote different models, and the title identifies the analysis.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec sec-type="discussion" id="sec12">
<title>Discussion</title>
<p>With the advancement of artificial intelligence technologies, these systems have garnered increasing attention and adoption. Some users are now employing LLM as substitutes for traditional search engines to seek health-related advice or support decision-making in the healthcare domain. It is noteworthy that, compared to other types of informational responses, the reliability of medical and health-related information is of paramount importance, as erroneous content may lead to adverse health outcomes (<xref ref-type="bibr" rid="ref21">21</xref>, <xref ref-type="bibr" rid="ref22">22</xref>). The findings of this study indicate that the information provided in the responses lacks critical details on certain key topics, exhibits moderate reliability, poor readability, and inconsistent quality. The quality of responses provided by LLM should be continually evaluated, where quality is defined as &#x201C;the totality of characteristics of an entity that bear on its ability to meet stated and implied requirements&#x201D; (<xref ref-type="bibr" rid="ref23">23</xref>, <xref ref-type="bibr" rid="ref24">24</xref>). Findings from this study and other research indicate that the provided informational responses lack critical content in key subject areas and exhibit inconsistent performance across reliability and readability evaluation metrics.</p>
<sec id="sec13">
<title>Reliability of LLM generated health information</title>
<p>This study employs the DISCERN instrument, EQIP, GQS, and JAMA benchmarks to evaluate the reliability and quality of health information responses regarding &#x201C;thunderstorm asthma&#x201D; generated by LLM. By leveraging the respective strengths and relevance of these four distinct assessment tools, a comprehensive, multi-faceted evaluation of response reliability and information quality is conducted (<xref ref-type="bibr" rid="ref25">25</xref>). DISCERN and EQIP are widely used to assess the reliability and quality of written health materials. The former emphasises explicitly optimal evaluation criteria for studies on treatment efficacy, focusing on health information relevant to treatment decisions&#x2014;including therapeutic benefits, risks, and quality of life outcomes (<xref ref-type="bibr" rid="ref26">26</xref>). The latter focuses on the characteristics of information content in clinical application contexts. The GQS evaluation tool provides a subjective assessment of overall quality based on users&#x2019; individual needs and perceptions, as well as the universality of the information. JAMA evaluates four essential characteristics based on the standards for internet healthcare information published in the Journal of the American Medical Association, including authorship and attribution (e.g., author names and affiliations, cited references, copyright information), currency (e.g., date of posting, date of update), and disclosure (e.g., conflicts of interest, sponsorship, advertising, and commercial funding) (<xref ref-type="bibr" rid="ref27">27</xref>).</p>
<p>The evaluation conducted through DISCERN, EQIP, GQS, and JAMA benchmark analyses indicates that Microsoft Copilot exhibits superior performance in information response compared to other LLM. The transparency and precision of the information sources utilised by Microsoft Copilot contribute significantly to its favourable reliability assessment. Meanwhile, ChatGPT-4&#x2019;s reliability is compromised by the lack of source attribution and insufficient disclosure of information origins, whereas Deepseek-V3.2 demonstrates significantly lower scores on reliability and source transparency metrics&#x2014;particularly under the DISCERN and JAMA evaluation frameworks&#x2014;compared to other LLM systems. When seeking health information through LLM, internet users should prioritise the reliability of response sources, as the scientific rigour and evidence-based support of these sources have become increasingly critical indicators for assessing the credibility of health-related information, Incomplete dissemination of information and the provision of unreliable content may lead to harmful consequences, such as delays in seeking medical assistance or the adoption of erroneous preventive measures during similar public health emergencies. The transparency of information sources determines perceived reliability in health communication, whereas the dissemination of incomplete or misleading information undermines this credibility (<xref ref-type="bibr" rid="ref19">19</xref>). The use of social media as a source of health information carries the potential risk of disseminating incomplete or misleading content (<xref ref-type="bibr" rid="ref28">28</xref>).</p>
</sec>
<sec id="sec14">
<title>Readability of LLM generated health information</title>
<p>This study employs the ARI, CL, FKGL, FRES, GFI, and SMOG indices to evaluate the complexity of health information responses concerning &#x201C;Thunderstorm Asthma&#x201D; generated by LLM, thereby assessing the readability of these informational replies. Relevant research indicates that educational disparities are more likely to result in internet resources serving as a primary contributor to non-adherence to medical advice. Vulnerable populations, including old adults individuals and those with lower educational attainment, frequently encounter online health information that is highly readable yet unreliable. The development of health education materials tailored to the public&#x2019;s reading level ensures that internet users can fully comprehend preventive measures for thunderstorm asthma and the safety of pharmacological treatments, thereby facilitating informed decision-making. High readability and reliable information responses are essential to prevent the dissemination of misinformation and enable the public to make informed preventive decisions regarding public health safety issues, such as thunderstorm asthma. Although studies indicate that both SMOG and GFI are more suitable as optimal indices for evaluating the readability of online health-related materials (<xref ref-type="bibr" rid="ref29">29</xref>). However, for complex public health emergencies such as thunderstorm asthma, this study aims to assess the readability and complexity of LLM generated health information using multidimensional indices to yield comprehensive evaluation outcomes (<xref ref-type="bibr" rid="ref30 ref31 ref32">30&#x2013;32</xref>). The findings of this study indicate that the readability of all artificial intelligence programme-generated responses exceeds the recommended sixth-grade reading level, posing significant challenges in conveying health information to individuals with limited eHealth literacy. This study employs a multi-index evaluation framework to identify deficiencies in the content, structure, and other aspects of health information consultations provided through LLM question-answering systems. For instance, the Perplexity model&#x2019;s responses regarding &#x201C;thunderstorm weather forecasting&#x201D; included statements such as &#x201C;require carrying emergency medications&#x201D; and descriptions about &#x201C;symptoms and treatments of thunderstorm asthma&#x201D;. The text includes the specialised terms &#x201C;inhaled corticosteroid medications&#x201D; and &#x201C;airway spasm&#x201D;; these ambiguous expressions and domain-specific terminology pose significant challenges for non-expert users in comprehension and information navigation. Science communication should employ accessible language that aligns with public needs, such as describing the mechanism as &#x201C;alleviating airway hyperresponsiveness through inhaled aerosolised medications, exemplified by corticosteroids such as Budesonide,&#x201D; to convey information clearly and concisely.</p>
<p>Currently, LLM can provide us with a vast amount of convenient information and have shown good performance in numerous studies. However, in complex interdisciplinary fields, especially in the health domain, the difficulty of responding to information may be too high for users with low e-health literacy, making it hard for them to understand and equally access key information. This is a problem that needs to be addressed at present. At the same time, when it comes to obtaining external information, if the retrieved information is from non-medical authoritative sources, the quality of such information cannot be guaranteed.</p>
</sec>
<sec id="sec15">
<title>Limitations</title>
<p>This study is subject to certain limitations. First, the exclusive reliance on Google Trends as a data source limits access to search query topics from alternative search engines, including regions affected by internet censorship or limited connectivity (<xref ref-type="bibr" rid="ref33">33</xref>). Our global-scale analysis seeks to account for regional variations in thematic inquiries, linguistic diversity, and cultural differences, all of which may influence research outcomes. Currently, Google Trends remains the primary tool for analysing global trends in online health information searches. Furthermore, artificial intelligence-based LLM are constrained by limited sample sizes and are applicable only for interpreting probabilistic emergent events, such as thunderstorm asthma.</p>
<p>This study is based solely on a limited set of query data, which may not fully capture the broad spectrum of user needs concerning thunderstorm asthma health information. The scope of the training data, potential computational biases, and other inherent limitations may affect the accuracy, completeness, and clarity of the generated responses.</p>
<p>The readability and reliability metrics used in this study may not fully capture the nuanced distinctions inherent in LLM generated content. While evaluation results for various indices indirectly reflect information quality, they do not account for subjective factors, such as users&#x2019; contextual needs.</p>
<p>In the future, LLM systems will undergo continuous iteration and refinement. The evaluation of LLM generated responses in healthcare information must be regularly updated accordingly. Furthermore, it is essential to explore real-time user interaction data to accurately assess the reliability, readability, and practical utility of health information provided by LLM applications.</p>
</sec>
</sec>
<sec sec-type="conclusions" id="sec16">
<title>Conclusion</title>
<p>This study highlights the importance of dynamic evaluation in the response process of LLM for healthcare information inquiries. Although platforms such as Perplexity and Microsoft Copilot demonstrate acceptable reliability, their information readability remains suboptimal. Going forward, the development of LLM applications must prioritise addressing diverse users&#x2019; reliability requirements and linguistic readability needs, which are critical for facilitating effective communication of healthcare information.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="sec17">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="ethics-statement" id="sec18">
<title>Ethics statement</title>
<p>This study did not include human subject research, and therefore, was conducted in accordance with the ethical principles outlined in the Declaration of Helsinki, it did not require institutional ethics review board submission.</p>
</sec>
<sec sec-type="author-contributions" id="sec19">
<title>Author contributions</title>
<p>ZZ: Data curation, Formal analysis, Investigation, Methodology, Writing &#x2013; original draft. YF: Methodology, Resources, Software, Validation, Visualization, Writing &#x2013; review &#x0026; editing. FC: Project administration, Supervision, Validation, Visualization, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec sec-type="COI-statement" id="sec20">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="sec21">
<title>Generative AI statement</title>
<p>The author(s) declared that Generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="sec22">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><label>1.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Packe</surname><given-names>GE</given-names></name> <name><surname>Ayres</surname><given-names>JG</given-names></name></person-group>. <article-title>Asthma outbreak during a thunderstorm</article-title>. <source>Lancet</source>. (<year>1985</year>) <volume>2</volume>:<fpage>199</fpage>&#x2013;<lpage>204</lpage>. doi: <pub-id pub-id-type="doi">10.1016/s0140-6736(85)91510-7</pub-id>, <pub-id pub-id-type="pmid">2862383</pub-id></mixed-citation></ref>
<ref id="ref2"><label>2.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname><given-names>YY</given-names></name> <name><surname>Xue</surname><given-names>T</given-names></name> <name><surname>Li</surname><given-names>HR</given-names></name> <name><surname>Guan</surname><given-names>K</given-names></name></person-group>. <article-title>Retrospective analysis of epidemic thunderstorm asthma in children in Yulin, Northwest China</article-title>. <source>Pediatr Res</source>. (<year>2021</year>) <volume>89</volume>:<fpage>958</fpage>&#x2013;<lpage>61</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41390-020-0980-9</pub-id>, <pub-id pub-id-type="pmid">32454517</pub-id></mixed-citation></ref>
<ref id="ref3"><label>3.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fan</surname><given-names>C</given-names></name> <name><surname>Li</surname><given-names>X</given-names></name> <name><surname>Cai</surname><given-names>H</given-names></name> <name><surname>Tong</surname><given-names>X</given-names></name> <name><surname>Ai</surname><given-names>L</given-names></name> <name><surname>Li</surname><given-names>Y</given-names></name> <etal/></person-group>. <article-title>Epidemic thunderstorm asthma in Hohhot, northern China: a retrospective analysis of clinical characteristics in 155 patients</article-title>. <source>J Allergy Clin Immunol Pract</source>. (<year>2024</year>) <volume>12</volume>:<fpage>2514</fpage>&#x2013;<lpage>2516.e1</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jaip.2024.06.012</pub-id>, <pub-id pub-id-type="pmid">38897343</pub-id></mixed-citation></ref>
<ref id="ref4"><label>4.</label><mixed-citation publication-type="other"><collab id="coll1">Inspector-General for Emergency Management Victoria, State Government of Victoria</collab>. <source>Review of response to the thunderstorm asthma event of 21&#x2013;22 November 2016 &#x2013; Final report</source>. <publisher-loc>Melbourne</publisher-loc>: <publisher-name>IGEM</publisher-name>; (<year>2017</year>). Available online at: <ext-link xlink:href="https://www.igem.vic.gov.au/publications/igem-reports/review-of-response-to-the-thunderstorm-asthma-event-of-21-22-november-0" ext-link-type="uri">https://www.igem.vic.gov.au/publications/igem-reports/review-of-response-to-the-thunderstorm-asthma-event-of-21-22-november-0</ext-link> (accessed Oct 16, 2025).</mixed-citation></ref>
<ref id="ref5"><label>5.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Andrew</surname><given-names>E</given-names></name> <name><surname>Nehme</surname><given-names>Z</given-names></name> <name><surname>Bernard</surname><given-names>S</given-names></name> <name><surname>Abramson</surname><given-names>MJ</given-names></name> <name><surname>Newbigin</surname><given-names>E</given-names></name> <name><surname>Piper</surname><given-names>B</given-names></name> <etal/></person-group>. <article-title>Stormy weather: a retrospective analysis of demand for emergency medical services during epidemic thunderstorm asthma</article-title>. <source>BMJ</source>. (<year>2017</year>) <volume>359</volume>:<fpage>j5636</fpage>. doi: <pub-id pub-id-type="doi">10.1136/bmj.j5636</pub-id>, <pub-id pub-id-type="pmid">29237604</pub-id></mixed-citation></ref>
<ref id="ref6"><label>6.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yan</surname><given-names>M</given-names></name> <name><surname>Jin</surname><given-names>Q</given-names></name> <name><surname>Chu</surname><given-names>J</given-names></name> <name><surname>Na</surname><given-names>J</given-names></name></person-group>. <article-title>Yi qi qun ti xing leibao xiao chuan de diaocha fenxi [investigation and analysis of a thunderstorm asthma outbreak]</article-title>. <source>Ningxia Med J</source>. (<year>2023</year>) <volume>45</volume>:<fpage>270</fpage>&#x2013;<lpage>3</lpage>. doi: <pub-id pub-id-type="doi">10.13621/j.1001-5949.2023.03.0270</pub-id></mixed-citation></ref>
<ref id="ref7"><label>7.</label><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>P&#x00F6;rtner</surname><given-names>HO</given-names></name> <name><surname>Roberts</surname><given-names>DC</given-names></name> <name><surname>Tignor</surname><given-names>MMB</given-names></name> <name><surname>Poloczanska</surname><given-names>ES</given-names></name> <name><surname>Mintenbeck</surname><given-names>K</given-names></name> <name><surname>Alegr&#x00ED;a</surname><given-names>A</given-names></name> <etal/></person-group>. <source>Climate change 2022: Impacts, adaptation and vulnerability</source>. <publisher-loc>Cambridge</publisher-loc>: <publisher-name>Cambridge University Press</publisher-name> (<year>2022</year>).</mixed-citation></ref>
<ref id="ref8"><label>8.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname><given-names>W</given-names></name> <name><surname>Kan</surname><given-names>H</given-names></name> <name><surname>Jiang</surname><given-names>Y</given-names></name> <name><surname>Geng</surname><given-names>Y</given-names></name> <name><surname>Nie</surname><given-names>Y</given-names></name> <name><surname>Yang</surname><given-names>M</given-names></name></person-group>. <article-title>MED-ChatGPT-4 Microsoft copilot: a ChatGPT-4 medical assistant for case mining and adjunctive therapy</article-title>. <source>Front Med (Lausanne)</source>. (<year>2024</year>) <volume>11</volume>:<fpage>1460553</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fmed.2024.1460553</pub-id></mixed-citation></ref>
<ref id="ref9"><label>9.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Goodman</surname><given-names>RS</given-names></name> <name><surname>Patrinely</surname><given-names>JR</given-names></name> <name><surname>Stone</surname><given-names>CA</given-names></name> <name><surname>Zimmerman</surname><given-names>E</given-names></name> <name><surname>Donald</surname><given-names>RR</given-names></name> <name><surname>Chang</surname><given-names>SS</given-names></name> <etal/></person-group>. <article-title>Accuracy and reliability of chatbot responses to physician questions</article-title>. <source>JAMA Netw Open</source>. (<year>2023</year>) <volume>6</volume>:<fpage>e2336483</fpage>. doi: <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2023.36483</pub-id>, <pub-id pub-id-type="pmid">37782499</pub-id></mixed-citation></ref>
<ref id="ref10"><label>10.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zusman</surname><given-names>NL</given-names></name> <name><surname>Bauer</surname><given-names>M</given-names></name> <name><surname>Mann</surname><given-names>J</given-names></name> <name><surname>Goldstein</surname><given-names>RY</given-names></name></person-group>. <article-title>AI = appropriate insight? ChatGPT-4 appropriately answers parents&#x2019; questions for common pediatric orthopaedic conditions</article-title>. <source>J Pediatr Orthop Soc North Am</source>. (<year>2023</year>) <volume>5</volume>:<fpage>762</fpage>. doi: <pub-id pub-id-type="doi">10.55275/jposna-2023-762</pub-id>, <pub-id pub-id-type="pmid">40432947</pub-id></mixed-citation></ref>
<ref id="ref11"><label>11.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lowndes</surname><given-names>AM</given-names></name> <name><surname>Connelly</surname><given-names>DM</given-names></name></person-group>. <article-title>User experiences of older adults navigating an online database of community-based physical activity programs</article-title>. <source>Digit Health</source>. (<year>2023</year>) <volume>9</volume>:<fpage>20552076231167004</fpage>. doi: <pub-id pub-id-type="doi">10.1177/20552076231167004</pub-id>, <pub-id pub-id-type="pmid">37051565</pub-id></mixed-citation></ref>
<ref id="ref12"><label>12.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Toiv</surname><given-names>A</given-names></name> <name><surname>Saleh</surname><given-names>Z</given-names></name> <name><surname>Ishak</surname><given-names>A</given-names></name> <name><surname>Alsheik</surname><given-names>E</given-names></name> <name><surname>Venkat</surname><given-names>D</given-names></name> <name><surname>Nandi</surname><given-names>N</given-names></name> <etal/></person-group>. <article-title>Digesting digital health: a study of appropriateness and readability of ChatGPT-4-generated gastroenterological information</article-title>. <source>Clin Transl Gastroenterol</source>. (<year>2024</year>) <volume>15</volume>:<fpage>e00765</fpage>. doi: <pub-id pub-id-type="doi">10.14309/ctg.0000000000000765</pub-id>, <pub-id pub-id-type="pmid">39212302</pub-id></mixed-citation></ref>
<ref id="ref13"><label>13.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Joshi</surname><given-names>A</given-names></name> <name><surname>Sparks</surname><given-names>R</given-names></name> <name><surname>McHugh</surname><given-names>J</given-names></name> <name><surname>Karimi</surname><given-names>S</given-names></name> <name><surname>Paris</surname><given-names>C</given-names></name> <name><surname>MacIntyre</surname><given-names>CR</given-names></name></person-group>. <article-title>Harnessing tweets for early detection of an acute disease event</article-title>. <source>Epidemiology</source>. (<year>2020</year>) <volume>31</volume>:<fpage>90</fpage>&#x2013;<lpage>7</lpage>. doi: <pub-id pub-id-type="doi">10.1097/EDE.0000000000001133</pub-id>, <pub-id pub-id-type="pmid">31651659</pub-id></mixed-citation></ref>
<ref id="ref14"><label>14.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mavragani</surname><given-names>A</given-names></name> <name><surname>Ochoa</surname><given-names>G</given-names></name></person-group>. <article-title>Google trends in infodemiology and infoveillance: methodology framework</article-title>. <source>JMIR Public Health Surveill</source>. (<year>2019</year>) <volume>5</volume>:<fpage>e13439</fpage>. doi: <pub-id pub-id-type="doi">10.2196/13439</pub-id>, <pub-id pub-id-type="pmid">31144671</pub-id></mixed-citation></ref>
<ref id="ref15"><label>15.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname><given-names>C</given-names></name> <name><surname>Wang</surname><given-names>Y</given-names></name> <name><surname>Yang</surname><given-names>H</given-names></name> <name><surname>Hou</surname><given-names>J</given-names></name> <name><surname>Sun</surname><given-names>L</given-names></name> <name><surname>Zhang</surname><given-names>X</given-names></name> <etal/></person-group>. <article-title>Association between cancer incidence and mortality in web-based data in China: Infodemiology study</article-title>. <source>J Med Internet Res</source>. (<year>2019</year>) <volume>21</volume>:<fpage>e10677</fpage>. doi: <pub-id pub-id-type="doi">10.2196/10677</pub-id>, <pub-id pub-id-type="pmid">30694203</pub-id></mixed-citation></ref>
<ref id="ref16"><label>16.</label><mixed-citation publication-type="book"><collab id="coll2">American Medical Association</collab>. <source>Health literacy and patient safety: help patients understand</source>. <publisher-loc>Chicago (IL)</publisher-loc>: <publisher-name>AMA</publisher-name> (<year>2007</year>).</mixed-citation></ref>
<ref id="ref17"><label>17.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Charnock</surname><given-names>D</given-names></name> <name><surname>Shepperd</surname><given-names>S</given-names></name> <name><surname>Needham</surname><given-names>G</given-names></name> <name><surname>Gann</surname><given-names>R</given-names></name></person-group>. <article-title>DISCERN: an instrument for judging the quality of written consumer health information on treatment choices</article-title>. <source>J Epidemiol Community Health</source>. (<year>1999</year>) <volume>53</volume>:<fpage>105</fpage>&#x2013;<lpage>11</lpage>. doi: <pub-id pub-id-type="doi">10.1136/jech.53.2.105</pub-id>, <pub-id pub-id-type="pmid">10396471</pub-id></mixed-citation></ref>
<ref id="ref18"><label>18.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Moult</surname><given-names>B</given-names></name> <name><surname>Franck</surname><given-names>LS</given-names></name> <name><surname>Brady</surname><given-names>H</given-names></name></person-group>. <article-title>Ensuring quality information for patients: development and preliminary validation of a new instrument to improve the quality of written health care information</article-title>. <source>Health Expect</source>. (<year>2004</year>) <volume>7</volume>:<fpage>165</fpage>&#x2013;<lpage>75</lpage>. doi: <pub-id pub-id-type="doi">10.1111/j.1369-7625.2004.00273.x</pub-id>, <pub-id pub-id-type="pmid">15117391</pub-id></mixed-citation></ref>
<ref id="ref19"><label>19.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname><given-names>Z</given-names></name> <name><surname>Lum</surname><given-names>E</given-names></name> <name><surname>Car</surname><given-names>J</given-names></name></person-group>. <article-title>Medication management apps for diabetes: systematic assessment of the transparency and reliability of health information dissemination</article-title>. <source>JMIR Mhealth Uhealth</source>. (<year>2020</year>) <volume>8</volume>:<fpage>e15364</fpage>. doi: <pub-id pub-id-type="doi">10.2196/15364</pub-id>, <pub-id pub-id-type="pmid">32130163</pub-id></mixed-citation></ref>
<ref id="ref20"><label>20.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ngo</surname><given-names>S</given-names></name> <name><surname>Asirvatham</surname><given-names>R</given-names></name> <name><surname>Baird</surname><given-names>GL</given-names></name> <name><surname>Sarraju</surname><given-names>A</given-names></name> <name><surname>Maron</surname><given-names>DJ</given-names></name> <name><surname>Rodriguez</surname><given-names>F</given-names></name></person-group>. <article-title>Readability and reliability of online patient education materials about statins</article-title>. <source>Am J Prev Cardiol</source>. (<year>2023</year>) <volume>16</volume>:<fpage>100594</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ajpc.2023.100594</pub-id>, <pub-id pub-id-type="pmid">37822580</pub-id></mixed-citation></ref>
<ref id="ref21"><label>21.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Vargas Meza</surname><given-names>X</given-names></name> <name><surname>Oikawa</surname><given-names>M</given-names></name></person-group>. <article-title>Japanese perception of brain death and implications for new medical technologies: quantitative and qualitative social media analysis</article-title>. <source>JMIR Form Res</source>. (<year>2024</year>) <volume>8</volume>:<fpage>e54025</fpage>. doi: <pub-id pub-id-type="doi">10.2196/54025</pub-id>, <pub-id pub-id-type="pmid">39291895</pub-id></mixed-citation></ref>
<ref id="ref22"><label>22.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pauer</surname><given-names>F</given-names></name> <name><surname>Litzkendorf</surname><given-names>S</given-names></name> <name><surname>G&#x00F6;bel</surname><given-names>J</given-names></name> <name><surname>Storf</surname><given-names>H</given-names></name> <name><surname>Zeidler</surname><given-names>J</given-names></name> <name><surname>Graf von der Schulenburg</surname><given-names>JM</given-names></name></person-group>. <article-title>Rare diseases on the internet: An assessment of the quality of online information</article-title>. <source>J Med Internet Res</source>. (<year>2017</year>) <volume>19</volume>:<fpage>e23</fpage>. doi: <pub-id pub-id-type="doi">10.2196/jmir.7056</pub-id>, <pub-id pub-id-type="pmid">28100442</pub-id></mixed-citation></ref>
<ref id="ref23"><label>23.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Byabazaire</surname><given-names>J</given-names></name> <name><surname>O&#x2019;Hare</surname><given-names>GMP</given-names></name> <name><surname>Collier</surname><given-names>R</given-names></name> <name><surname>Delaney</surname><given-names>D</given-names></name></person-group>. <article-title>IoT data quality assessment framework using adaptive weighted estimation fusion</article-title>. <source>Sensors</source>. (<year>2023</year>) <volume>23</volume>:<fpage>5993</fpage>. doi: <pub-id pub-id-type="doi">10.3390/s23135993</pub-id>, <pub-id pub-id-type="pmid">37447841</pub-id></mixed-citation></ref>
<ref id="ref24"><label>24.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Selman</surname><given-names>TJ</given-names></name> <name><surname>Prakash</surname><given-names>T</given-names></name> <name><surname>Khan</surname><given-names>KS</given-names></name></person-group>. <article-title>Quality of health information for cervical cancer treatment on the internet</article-title>. <source>BMC Womens Health</source>. (<year>2006</year>) <volume>6</volume>:<fpage>9</fpage>. doi: <pub-id pub-id-type="doi">10.1186/1472-6874-6-9</pub-id>, <pub-id pub-id-type="pmid">16787534</pub-id></mixed-citation></ref>
<ref id="ref25"><label>25.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gen&#x00E7;</surname><given-names>M</given-names></name></person-group>. <article-title>YouTube as a source of patient information on positron emission tomography</article-title>. <source>J Health Sci Med</source>. (<year>2023</year>) <volume>6</volume>:<fpage>597</fpage>&#x2013;<lpage>603</lpage>. doi: <pub-id pub-id-type="doi">10.32322/jhsm.1245143</pub-id></mixed-citation></ref>
<ref id="ref26"><label>26.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Baqraf</surname><given-names>YKA</given-names></name> <name><surname>Keikhosrokiani</surname><given-names>P</given-names></name> <name><surname>Al-Rawashdeh</surname><given-names>M</given-names></name></person-group>. <article-title>Evaluating online health information quality using machine learning and deep learning: a systematic literature review</article-title>. <source>Digit Health</source>. (<year>2023</year>) <volume>9</volume>:<fpage>20552076231212296</fpage>. doi: <pub-id pub-id-type="doi">10.1177/20552076231212296</pub-id>, <pub-id pub-id-type="pmid">38025112</pub-id></mixed-citation></ref>
<ref id="ref27"><label>27.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Eglenen</surname><given-names>MN</given-names></name> <name><surname>Arslan</surname><given-names>C</given-names></name> <name><surname>Cakan</surname><given-names>DG</given-names></name></person-group>. <article-title>Quality and content assessment of internet information on nasoalveolar molding</article-title>. <source>BMC Public Health</source>. (<year>2025</year>) <volume>25</volume>:<fpage>389</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s12889-025-21616-8</pub-id>, <pub-id pub-id-type="pmid">39885471</pub-id></mixed-citation></ref>
<ref id="ref28"><label>28.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ye</surname><given-names>C</given-names></name> <name><surname>Fang</surname><given-names>Y</given-names></name> <name><surname>Lian</surname><given-names>Y</given-names></name> <name><surname>He</surname><given-names>Y</given-names></name></person-group>. <article-title>Gluten-free diet on video platforms: retrospective infodemiology study</article-title>. <source>Digit Health</source>. (<year>2024</year>) <volume>10</volume>:<fpage>20552076231224594</fpage>. doi: <pub-id pub-id-type="doi">10.1177/20552076231224594</pub-id>, <pub-id pub-id-type="pmid">38235417</pub-id></mixed-citation></ref>
<ref id="ref29"><label>29.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname><given-names>J</given-names></name> <name><surname>Wang</surname><given-names>S</given-names></name> <name><surname>Jiang</surname><given-names>H</given-names></name></person-group>. <article-title>The impact of popular science articles by physicians on their performance on online medical platforms</article-title>. <source>Healthcare</source>. (<year>2022</year>) <volume>10</volume>:<fpage>2432</fpage>. doi: <pub-id pub-id-type="doi">10.3390/healthcare10122432</pub-id>, <pub-id pub-id-type="pmid">36553956</pub-id></mixed-citation></ref>
<ref id="ref30"><label>30.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yamada</surname><given-names>Y</given-names></name> <name><surname>Okuhara</surname><given-names>T</given-names></name> <name><surname>Yokota</surname><given-names>R</given-names></name> <name><surname>Furukawa</surname><given-names>E</given-names></name> <name><surname>Okada</surname><given-names>H</given-names></name> <name><surname>Kiuchi</surname><given-names>T</given-names></name></person-group>. <article-title>Evaluating the understandability and actionability of Japanese human papillomavirus vaccination educational materials on cervical cancer</article-title>. <source>Health Promot Int</source>. (<year>2025</year>) <volume>40</volume>:<fpage>daaf034</fpage>. doi: <pub-id pub-id-type="doi">10.1093/heapro/daaf034</pub-id>, <pub-id pub-id-type="pmid">40265632</pub-id></mixed-citation></ref>
<ref id="ref31"><label>31.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Soto-Ch&#x00E1;vez</surname><given-names>MJ</given-names></name> <name><surname>Bustos</surname><given-names>MM</given-names></name> <name><surname>Fern&#x00E1;ndez-&#x00C1;vila</surname><given-names>DG</given-names></name> <name><surname>Mu&#x00F1;oz</surname><given-names>OM</given-names></name></person-group>. <article-title>Evaluation of information provided to patients by ChatGPT-4 about chronic diseases in Spanish language</article-title>. <source>Digit Health</source>. (<year>2024</year>) <volume>10</volume>:<fpage>20552076231224603</fpage>. doi: <pub-id pub-id-type="doi">10.1177/20552076231224603</pub-id></mixed-citation></ref>
<ref id="ref32"><label>32.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Blount</surname><given-names>T</given-names></name> <name><surname>Gerhold</surname><given-names>C</given-names></name> <name><surname>Bailey</surname><given-names>V</given-names></name> <name><surname>Sweeney</surname><given-names>MJ</given-names></name></person-group>. <article-title>An objective analysis of the quality and readability of online information on breast implant illness</article-title>. <source>Cureus</source>. (<year>2025</year>) <volume>17</volume>:<fpage>e82042</fpage>. doi: <pub-id pub-id-type="doi">10.7759/cureus.82042</pub-id>, <pub-id pub-id-type="pmid">40351909</pub-id></mixed-citation></ref>
<ref id="ref33"><label>33.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Scharkow</surname><given-names>M</given-names></name> <name><surname>Vogelgesang</surname><given-names>J</given-names></name></person-group>. <article-title>Measuring the public agenda using search engine queries</article-title>. <source>Int J Public Opin Res</source>. (<year>2011</year>) <volume>23</volume>:<fpage>104</fpage>&#x2013;<lpage>13</lpage>. doi: <pub-id pub-id-type="doi">10.1093/ijpor/edq048</pub-id></mixed-citation></ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0002">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2359888/overview">Andreas Follmann</ext-link>, University Hospital RWTH Aachen, Germany</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0003">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2539291/overview">Youssef Er-Rays</ext-link>, Ibn Tofail University, Morocco</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3345173/overview">Ad&#x00E9;la Kotatkova</ext-link>, University of Jaume I, Spain</p>
</fn>
</fn-group>
<fn-group>
<fn id="fn0001"><label>1</label><p><ext-link xlink:href="https://readabilityformulas.com" ext-link-type="uri">https://readabilityformulas.com</ext-link></p></fn>
</fn-group>
<fn-group>
<fn fn-type="abbr" id="abbr1">
<label>Abbreviations:</label>
<p>EQIP, Ensuring Quality Information for Patients Scale; GQS, Global Quality Scoring; ARI, Automated Readability Index; CL, Coleman-Liau Grade Level; FKGL, Flesch&#x2013;Kincaid Grade Level; FRES, Flesch Reading Ease Score; GFI, Gunning Fog Index; SMOG, Simple Measure of Gobbledygook; MeSH, Medical Subject Headings; NIH, National Institutes of Health; AMA, American Medical Association; NHS, National Health Service; LLM, Large Language Model.</p>
</fn>
</fn-group>
</back>
</article>