<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Artif. Intell.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Artificial Intelligence</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Artif. Intell.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2624-8212</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frai.2026.1754498</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Classification of pediatric dental diseases from panoramic radiographs using natural language transformer and deep learning models</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Pham</surname> <given-names>Tuan D.</given-names></name>
<xref ref-type="aff" rid="aff1"/>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/59759"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Al-Hebshi</surname> <given-names>Seba</given-names></name>
<xref ref-type="aff" rid="aff1"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><institution>Barts and The London School of Medicine and Dentistry, Queen Mary University of London</institution>, <city>London</city>, <country country="gb">United Kingdom</country></aff>
<author-notes>
<corresp id="c001"><label>&#x0002A;</label>Correspondence: Tuan D. Pham, <email xlink:href="mailto:tuan.pham@qmul.ac.uk">tuan.pham@qmul.ac.uk</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-03-03">
<day>03</day>
<month>03</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>9</volume>
<elocation-id>1754498</elocation-id>
<history>
<date date-type="received">
<day>26</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>07</day>
<month>02</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>09</day>
<month>02</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2026 Pham and Al-Hebshi.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Pham and Al-Hebshi</copyright-holder>
<license>
<ali:license_ref start_date="2026-03-03">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Accurate classification of pediatric dental diseases from panoramic radiographs is essential for early diagnosis and effective treatment planning. While deep learning models traditionally operate directly on image data, text-based representations generated from radiographs may provide an alternative strategy for disease classification.</p></sec>
<sec>
<title>Methods</title>
<p>This study proposed a text-driven framework in which a natural language transformer was used to generate structured textual descriptions from panoramic radiographs. These descriptions were subsequently classified for binary disease detection using three deep learning architectures: a one-dimensional convolutional neural network (1D-CNN), a long short-term memory (LSTM) network, and a pretrained Bidirectional Encoder Representations from Transformer (BERT) model. Model performance was evaluated and compared against three pretrained convolutional neural networks trained directly on radiographic images.</p></sec>
<sec>
<title>Results</title>
<p>The 1D-CNN achieved the highest performance with 84% accuracy, demonstrating balanced classification across disease categories. The BERT model reached 77% accuracy, showing strong performance in detecting periapical infections but comparatively lower sensitivity for caries identification. The LSTM model performed substantially worse, achieving 57% accuracy. Both the 1D-CNN and BERT text-based approaches outperformed the three image-based pretrained CNN models.</p></sec>
<sec>
<title>Discussion</title>
<p>These findings suggest that text-based classification of panoramic radiographs is a potential alternative to conventional image-based deep learning methods. Language-driven models show promise for radiographic interpretation; however, challenges remain in achieving consistent generalizability across disease types. Future research should focus on improving radiograph-to-text generation quality, developing hybrid architectures that integrate textual and visual features, and validating performance on larger and more diverse datasets to strengthen clinical applicability.</p></sec></abstract>
<kwd-group>
<kwd>artificial intelligence</kwd>
<kwd>children</kwd>
<kwd>deep learning</kwd>
<kwd>dental diseases</kwd>
<kwd>natural language processing</kwd>
<kwd>panoramic radiographs</kwd>
</kwd-group>
<funding-group>
  <funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement>
</funding-group>
<counts>
<fig-count count="5"/>
<table-count count="5"/>
<equation-count count="0"/>
<ref-count count="34"/>
<page-count count="16"/>
<word-count count="9734"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Medicine and Public Health</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>Accurate and early diagnosis of pediatric dental diseases is essential for effective treatment planning and the prevention of long-term complications (<xref ref-type="bibr" rid="B21">Muntean et al., 2015</xref>; <xref ref-type="bibr" rid="B34">Zou et al., 2018</xref>; <xref ref-type="bibr" rid="B24">Oral Health in America: Advances and Challenges, 2021</xref>; <xref ref-type="bibr" rid="B6">Featherstone et al., 2021</xref>; <xref ref-type="bibr" rid="B2">Abdelaziz, 2023</xref>). Panoramic radiographs are widely used in dentistry to assess conditions such as caries and periapical infections. Traditionally, diagnosis relies on manual interpretation by dental professionals, a process that can be time-consuming and subject to interobserver variability. To enhance diagnostic efficiency and accuracy, artificial intelligence (AI) has emerged as a promising tool in dental imaging, particularly through deep learning models trained for automated disease classification (<xref ref-type="bibr" rid="B25">Ossowska et al., 2022</xref>; <xref ref-type="bibr" rid="B5">Ding et al., 2023</xref>; <xref ref-type="bibr" rid="B4">Dhingra, 2023</xref>; <xref ref-type="bibr" rid="B7">Ghaffari et al., 2024</xref>).</p>
<p>Recent advances in deep learning have enabled AI models to assist in detecting and diagnosing a range of pediatric dental conditions, an area that has historically received less attention than adult dental diagnostics. <xref ref-type="bibr" rid="B14">Kaya et al. (2022)</xref> introduced a deep learning model specifically designed to detect permanent tooth germs in pediatric panoramic radiographs, addressing a gap in prior research, which has primarily focused on adult radiographs. Similarly, <xref ref-type="bibr" rid="B22">Ong et al. (2024)</xref> developed a fully automated deep learning framework for assessing dental development stages in pediatric radiographs, further demonstrating AI&#x00027;s potential to improve diagnostic accuracy and clinical decision-making.</p>
<p>The systematic review by <xref ref-type="bibr" rid="B15">Khanagar et al. (2022)</xref> highlights the transformative impact of AI in pediatric dentistry, particularly in early disease detection, which is critical for timely intervention and treatment. Their review examined various AI models applied in diagnosing conditions such as dental caries and periodontal diseases, reinforcing the potential of machine learning in pediatric oral healthcare. Beyond disease detection, AI also plays a role in treatment planning and patient management. <xref ref-type="bibr" rid="B17">La Rosa et al. (2024)</xref> explored how AI algorithms can identify early signs of dental pathologies and improve orthodontic diagnoses through automated image analysis. This aligns with <xref ref-type="bibr" rid="B20">Mahajan et al. (2023)</xref>, who conducted a systematic review on the effectiveness of AI in pediatric dentistry, concluding that AI-based diagnostic tools have demonstrated promising results in clinical applications.</p>
<p>Despite these advancements, the implementation of AI in pediatric dentistry faces several challenges, particularly concerning data quality, dataset size, and model generalizability. AI models require large and diverse datasets to achieve robust performance, yet pediatric dental datasets are often limited, leading to potential biases and decreased model reliability. <xref ref-type="bibr" rid="B11">Hsieh (2024)</xref> emphasized that while AI holds great promise for comprehensive dental disease classification, challenges such as class imbalance and the rarity of certain conditions can hinder model training and diagnostic accuracy. These limitations underscore the need for ongoing research to refine AI methodologies, improve data preprocessing techniques, and develop strategies for overcoming data scarcity.</p>
<p>The integration of AI into pediatric dental disease classification from panoramic radiographs is a rapidly evolving field with significant potential to enhance diagnostic accuracy, reduce human error, and improve patient outcomes. However, to fully realize its benefits, further research is needed to address challenges related to data availability, annotation consistency, and model validation across diverse populations. While AI-driven image-based classification using convolutional neural networks (CNNs) has demonstrated success, such approaches often require extensive preprocessing as well as large datasets to achieve high performance. Hybrid models that integrate text descriptions from images and deep learning for classification could provide a promising direction for improving accuracy, interpretability, and scalability in pediatric dental diagnostics.</p>
<p>Large language models (LLMs), particularly ChatGPT, have emerged as transformative tools in medical and dental applications, leveraging advanced natural language processing (NLP) capabilities to enhance various aspects of healthcare delivery. These models are designed to understand and generate human-like text, making them valuable for a range of tasks, including patient communication, clinical decision support, and medical education.</p>
<p>In addition to these applications, LLMs enable the transformation of visual medical information into structured textual representations. To the best of the authors&#x00027; knowledge, text-based classification of dental images has not been previously reported, and there is currently no established literature directly addressing this paradigm. Consequently, a broader discussion of LLMs in dentistry and healthcare is provided to establish the necessary background and to contextualize the proposed approach within the emerging landscape of language-driven medical AI applications.</p>
<p>One of the primary applications of ChatGPT in healthcare is its ability to assist in clinical decision-making. A study by <xref ref-type="bibr" rid="B8">Giannakopoulos et al. (2023)</xref> evaluated the performance of ChatGPT and other LLMs in supporting evidence-based dentistry, demonstrating that these models can effectively provide relevant information and aid practitioners in making informed decisions, ultimately improving patient care. Similarly, <xref ref-type="bibr" rid="B12">Huang et al. (2023)</xref> highlighted ChatGPT&#x00027;s potential in dental education, suggesting that it could serve as a valuable resource for students by offering instant access to information and facilitating interactive learning experiences.</p>
<p>In patient communication, LLMs like ChatGPT have shown promise in addressing inquiries and enhancing patient satisfaction. A survey conducted by <xref ref-type="bibr" rid="B32">Yong et al. (2024)</xref> found that LLMs were effective in resolving patient complaints, emphasizing their ability to generate thoughtful and contextually appropriate responses. This capability is particularly valuable in fostering better patient-provider relationships and improving overall healthcare experiences.</p>
<p>The integration of LLMs in medical education has also been a focal point of recent research. <xref ref-type="bibr" rid="B1">Abd-Alrazaq et al. (2023)</xref> explored how these models enhance medical students&#x00027; learning experiences by providing comprehensive explanations of complex medical concepts. Similarly, <xref ref-type="bibr" rid="B31">Wu et al. (2024)</xref> evaluated ChatGPT&#x00027;s performance on nursing licensure examinations, indicating that LLMs can serve as effective study aids, potentially improving educational outcomes.</p>
<p>Despite these advantages, the deployment of LLMs in healthcare is not without challenges. <xref ref-type="bibr" rid="B19">Lin et al. (2024)</xref> noted that while ChatGPT offers valuable support in critical care medicine, it also has limitations, such as the potential to generate inaccurate or misleading information. This concern is echoed by <xref ref-type="bibr" rid="B29">Tiwari et al. (2024)</xref>, who conducted a systematic review on the implications of ChatGPT in public health dentistry, highlighting the need for rigorous assessment of risks, including biases and misinformation, before widespread adoption.</p>
<p>The motivation for this study stems from the need to enhance the automated classification of pediatric dental diseases from panoramic radiographs by leveraging both NLP and deep learning. Traditional image-based classification methods often require extensive preprocessing, high-resolution datasets, and expertise to interpret complex radiographic features. To overcome these challenges, this study explores an alternative approach by using ChatGPT to generate textual descriptions of the conditions depicted in panoramic radiographs. These textual representations serve as input for deep learning models, enabling disease classification through language-based analysis rather than direct image interpretation. This approach aims to improve diagnostic accuracy, enhance model interpretability, and reduce reliance on large-scale labeled radiographic datasets. By integrating AI-driven text generation with deep learning classification, this study seeks to demonstrate the feasibility of a text-based framework for dental disease diagnosis, offering a scalable and interpretable alternative to traditional radiographic analysis.</p></sec>
<sec id="s2">
<label>2</label>
<title>Methods</title>
<sec>
<label>2.1</label>
<title>Panoramic radiographs</title>
<p>The children&#x00027;s dental panoramic radiographs dataset (<xref ref-type="bibr" rid="B33">Zhang et al., 2023</xref>), hosted on Figshare (<xref ref-type="bibr" rid="B33">Zhang et al., 2023</xref>), is utilized in this study. The dataset, titled <italic>Child Dental Disease Detection Dataset</italic>, was originally designed with a structured division into &#x0201C;Train&#x0201D; and &#x0201C;Test&#x0201D; subsets, as described in <xref ref-type="bibr" rid="B33">Zhang et al. (2023)</xref>. Each subset contains both the original radiographic images and corresponding expert annotations. The dataset consists of 100 images categorized into five dental diseases: caries (class 1), periapical infections (class 2), pulpitis (class 3), deep sulcus (class 4), and dental developmental abnormalities (class 5).</p>
<p>The annotation process involved six dental experts to ensure accuracy and reliability. Four experts independently annotated 25 randomly assigned anonymous images in the first round. Two additional experts then reviewed the labeled images to assess annotation accuracy. In cases of ambiguity, all six experts engaged in a consensus discussion, and the final labels were determined based on their unified decision. This multi-step validation process aimed to enhance the consistency and diagnostic precision of the dataset annotations. For this study, only class 1 (caries) and class 2 (periapical infections) were utilized to maintain a balanced dataset for machine learning classification. Thus, the final data subset used in this study consisted of 29 panoramic radiographs for dental caries and 29 for periapical infections, resulting in a total of 58 images. This selection was made to ensure class balance and to support a more reliable evaluation of the machine learning models.</p>
</sec>
<sec>
<label>2.2</label>
<title>LLM: ChatGPT</title>
<p>ChatGPT (<xref ref-type="bibr" rid="B23">OpenAI, 2023</xref>) is an advanced LLM based on OpenAI&#x00027;s GPT architecture, designed to process and generate human-like text by leveraging deep learning techniques. Trained on vast datasets, including general knowledge, medical literature, and structured language patterns, ChatGPT can understand, summarize, and generate contextual responses across diverse domains. Its transformer-based architecture allows it to analyze complex relationships between words, enabling it to generate coherent and contextually relevant descriptions. This makes ChatGPT a valuable tool in medical AI applications, particularly for interpreting and summarizing clinical information.</p>
<p>Although ChatGPT does not directly analyze images, it can process structured input derived from automated or human-interpreted imaging analyses. By leveraging pre-existing medical knowledge, it can describe abnormalities such as bone loss, periapical radiolucencies, or carious lesions based on textual cues extracted from images. High-resolution panoramic radiographs contain detailed anatomical structures, but reduced-resolution images may lose some fine details. ChatGPT can compensate for this by generating structured descriptions that highlight key diagnostic features, ensuring that relevant clinical insights are preserved for downstream classification tasks.</p>
<p>A text-based approach provides an interpretable summary of radiographic findings, making AI-driven diagnostic models more accessible to clinicians. ChatGPT-generated descriptions allow for a structured, standardized representation of dental disease characteristics, improving transparency in AI-assisted diagnosis. Additionally, processing full-resolution radiographs with deep learning models requires substantial computational resources. By reducing image resolution and transforming key features into text, ChatGPT enables a more computationally efficient pipeline while retaining essential diagnostic information. By converting panoramic radiograph features into text, ChatGPT serves as a bridge between image interpretation and deep learning classification, offering an efficient, interpretable, and scalable approach to dental disease diagnosis.</p>
<p>To ensure consistency and reproducibility in the generation of textual descriptions, the same prompt structure was applied to all panoramic radiographs. Each image was processed independently in a single-turn interaction, without iterative refinement, conversational feedback, or manual post-editing of the generated output. The objective was to obtain structured, standardized, and non-interpretative textual representations of observable visual content rather than clinically validated diagnoses.</p>
<p>The full prompt template used for all images was as follows:</p>
<disp-quote><p><italic>You are given a dental panoramic radiograph. Describe only observable anatomical and radiographic features. Do not provide diagnosis, severity, or clinical interpretation. Do not speculate or infer disease. Use neutral medical language. Write exactly 12 words. Return only the sentence, with no extra text</italic>.</p></disp-quote>
<p>To minimize subjectivity and hallucination, the prompt explicitly prohibited diagnostic inference, severity assessment, and speculative language. The generated textual descriptions were subsequently reviewed by a qualified dentist to assess their anatomical plausibility, consistency with the source images, and adherence to the non-interpretative prompt constraints. This assessment was qualitative in nature and aimed to identify gross inconsistencies or clinically implausible descriptions rather than to provide formal diagnostic validation.</p>
</sec>
<sec>
<label>2.3</label>
<title>LSTM</title>
<p>LSTM networks (<xref ref-type="bibr" rid="B10">Hochreiter and Schmidhuber, 1997</xref>) are a type of recurrent neural network (RNN) designed to handle sequential data by addressing the issue of vanishing gradients, which commonly affects traditional RNNs. LSTMs are particularly effective in learning long-range dependencies, making them well-suited for tasks involving time series, NLP, and speech recognition.</p>
<p>Unlike standard RNNs, which struggle to retain information over long sequences, LSTMs use a specialized gating mechanism to regulate the flow of information. The network consists of three key gates: the forget gate, which decides what information to discard from the previous state; the input gate, which determines what new information to store; and the output gate, which controls what information is passed to the next time step. These gates allow LSTMs to selectively remember or forget information, enabling them to capture dependencies across long sequences while mitigating issues related to gradient decay.</p>
<p>The architecture of the LSTM model used in this study consists of a sequence input layer with an input size of 1, followed by a word embedding layer with an embedding dimension of 50. An LSTM layer with 100 hidden units was included, with the output mode set to &#x0201C;last&#x0201D; to process the entire sequence. A fully connected layer was added, with the number of units matching the number of classes in the data. A dropout layer with a rate of 0.2 was applied to prevent overfitting, followed by a softmax layer to classify the input into one of the two classes. <xref ref-type="fig" rid="F1">Figure 1a</xref> shows the architecture of the LSTM.</p>
<fig position="float" id="F1">
<label>Figure 1</label>
<caption><p>Network layers: LSTM <bold>(a)</bold>, model trained with features extracted from BERT <bold>(b)</bold>, and 1D-CNN <bold>(c)</bold>.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1754498-g0001.tif">
<alt-text content-type="machine-generated">Three neural network architecture diagrams are shown. Panel (a) presents a sequential model with layers labeled sequenceinput, word-embedding, lstm, fc, dropout, and softmax. Panel (b) depicts a linear stack of input, fc, dropout, softmax, and classoutput layers. Panel (c) illustrates a complex branched network with multiple convolution, batch normalization, ReLU, dropout, and maxpooling layers merging into fully connected, dropout, and softmax layers. Each architecture is labeled (a), (b), and (c) below the diagrams.</alt-text>
</graphic>
</fig>
<p>For training, the Adam optimizer and cross-entropy loss were chosen, and the maximum number of epochs was set to 600. The batch size was specified as 128, and the model was evaluated using accuracy. Validation data was provided, and the best network based on validation performance was saved.</p>
</sec>
<sec>
<label>2.4</label>
<title>BERT</title>
<p>BERT (<xref ref-type="bibr" rid="B3">Devlin et al., 2019</xref>) is a deep learning model, designed to process and understand natural language with high contextual awareness. Unlike traditional word embedding techniques that represent words independently, BERT captures deep bidirectional contextual relationships by considering both preceding and following words in a sentence. This ability makes it particularly powerful for tasks such as text classification, question answering, and language inference.</p>
<p>BERT is based on the transformer architecture, which relies on self-attention mechanisms to process entire sequences in parallel rather than sequentially, as in RNNs and LSTM networks. The model is pretrained on large text corpora using two key objectives: masked language modeling, where random words are masked, and the model learns to predict them, and next sentence prediction, where the model learns relationships between sentence pairs. This pretraining enables BERT to generalize well to various NLP tasks after fine-tuning on domain-specific datasets.</p>
<p>In this study, the BERT-Base model, consisting of 108.8 million learnable parameters, was used for classification. A tokenizer was employed to encode the text into sequences of integers, enabling the model to process the input effectively. Following tokenization, the data was partitioned into training and validation sets as previously described. To prepare the data for training, the BERT tokens were organized into mini-batches. This step ensures that the data is processed in manageable chunks, which is crucial for training large models like BERT. The BERT model was then used to transform the tokenized data into feature vectors by extracting embeddings, which served as input features for both the training and validation datasets.</p>
<p>Subsequently, a deep learning network was constructed for classification. The network architecture included a feature input layer, a fully connected layer to map the feature vectors to class labels, a dropout layer to reduce overfitting through regularization, and a softmax layer to output the class probabilities. This network was specifically designed to classify the BERT-derived feature vectors. <xref ref-type="fig" rid="F1">Figure 1b</xref> shows the architecture of the network designed for classifying BERT-derived feature vectors, referred to as BERT for brevity.</p>
<p>The training process was configured with the following options: a mini-batch size of 128, the Adam optimizer, a maximum of 600 epochs, an initial learning rate of 0.0001, and without data shuffling to ensure consistent data order during training.</p>
</sec>
<sec>
<label>2.5</label>
<title>1D-CNN</title>
<p>For classification using a 1D-CNN (<xref ref-type="bibr" rid="B18">LeCun et al., 1998</xref>), the network architecture was designed for text classification and begins with specifying the input size as 1, which corresponds to the channel dimension of the input integer sequence. The input data were then embedded using a word embedding layer with a dimension of 50.</p>
<p>The next step involved creating blocks of layers for different <italic>n</italic>-gram lengths, specifically 2, 3, 4, and 5. Each block consists of a 1D convolutional layer, batch normalization, a ReLU activation layer, a dropout layer with a rate of 0.2, and a global max pooling layer. For each block, 100 convolutional filters were used, and the size of the convolutional filter corresponds to the <italic>n</italic>-gram length. These blocks were connected to the word embedding layer, and their outputs were concatenated using a concatenation layer.</p>
<p>The final part of the architecture includes a fully connected layer, which outputs the class predictions, followed by a softmax layer for classification. The network was structured to handle various <italic>n</italic>-gram lengths by connecting each block to the word embedding layer and finally linking the pooling layers to the concatenation layer. <xref ref-type="fig" rid="F1">Figure 1c</xref> shows the architecture of the 1D-CNN.</p>
<p>The network was trained using the Adam optimizer with a mini-batch size of 128. The model was optimized using cross-entropy loss, The training included validation using a separate validation dataset. The network with the lowest validation loss was saved.</p>
</sec>
<sec>
<label>2.6</label>
<title>Binary classification performance metrics</title>
<p>In this binary classification task, the objective is to distinguish between caries and periapical infections. The classification parameters are defined as follows: true positive (TP) refers to periapical infections correctly identified as periapical infections, while false positive (FP) denotes caries incorrectly classified as periapical infections. True negative (TN) represents caries accurately identified as caries, and false negative (FN) refers to periapical infections incorrectly classified as caries.</p>
<p>The performance of the classification model is evaluated using several key metrics. Accuracy (ACC) measures the proportion of correct predictions, including both true positives and true negatives, relative to the total number of predictions for the two classes. Sensitivity (SEN) reflects the model&#x00027;s ability to correctly identify periapical infections, while specificity (SPE) assesses its ability to accurately classify caries. Precision (PRE) quantifies the proportion of cases predicted as periapical infections that actually belong to the class of periapical infection.</p>
<p>The F1 score is a metric that provides the harmonic mean of precision and sensitivity, balancing the trade-off between the two. It emphasizes the correct identification of periapical infections (true positives) while accounting for the impact of false positives and false negatives.</p>
<p>The area under the receiver operating characteristic curve (AUC) offers a comprehensive measure of the model&#x00027;s ability to separate the two classes. It represents the relationship between sensitivity and specificity, with a higher AUC indicating better performance and greater robustness in distinguishing between caries and periapical infections.</p>
<p>Mathematical expressions for these performance metrics are provided in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Mathematical expressions for metrics evaluating the binary classification model.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Metric</bold></th>
<th valign="top" align="left"><bold>Expression</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Accuracy (ACC)</td>
<td valign="top" align="left"><inline-formula><mml:math id="M1"><mml:mfrac><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">TP</mml:mtext></mml:mstyle><mml:mo>&#x0002B;</mml:mo><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">TN</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">TP</mml:mtext></mml:mstyle><mml:mo>&#x0002B;</mml:mo><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">TN</mml:mtext></mml:mstyle><mml:mo>&#x0002B;</mml:mo><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">FP</mml:mtext></mml:mstyle><mml:mo>&#x0002B;</mml:mo><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">FN</mml:mtext></mml:mstyle></mml:mrow></mml:mfrac></mml:math></inline-formula></td>
</tr>
<tr>
<td valign="top" align="left">Sensitivity (SEN)</td>
<td valign="top" align="left"><inline-formula><mml:math id="M2"><mml:mfrac><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">TP</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">TP</mml:mtext></mml:mstyle><mml:mo>&#x0002B;</mml:mo><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">FN</mml:mtext></mml:mstyle></mml:mrow></mml:mfrac></mml:math></inline-formula></td>
</tr>
<tr>
<td valign="top" align="left">Specificity (SPE)</td>
<td valign="top" align="left"><inline-formula><mml:math id="M3"><mml:mfrac><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">TN</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">TN</mml:mtext></mml:mstyle><mml:mo>&#x0002B;</mml:mo><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">FP</mml:mtext></mml:mstyle></mml:mrow></mml:mfrac></mml:math></inline-formula></td>
</tr>
<tr>
<td valign="top" align="left">Precision (PRE)</td>
<td valign="top" align="left"><inline-formula><mml:math id="M4"><mml:mfrac><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">TP</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">TP</mml:mtext></mml:mstyle><mml:mo>&#x0002B;</mml:mo><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">FP</mml:mtext></mml:mstyle></mml:mrow></mml:mfrac></mml:math></inline-formula></td>
</tr>
<tr>
<td valign="top" align="left">F1 score</td>
<td valign="top" align="left"><inline-formula><mml:math id="M5"><mml:mfrac><mml:mrow><mml:mn>2</mml:mn><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">TP</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">TP</mml:mtext></mml:mstyle><mml:mo>&#x0002B;</mml:mo><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">FP</mml:mtext></mml:mstyle><mml:mo>&#x0002B;</mml:mo><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">FN</mml:mtext></mml:mstyle></mml:mrow></mml:mfrac></mml:math></inline-formula></td>
</tr></tbody>
</table>
</table-wrap>
</sec>
<sec>
<label>2.7</label>
<title>Hyperparameter selection and training configuration</title>
<p>Hyperparameters were selected using a fixed and reproducible configuration, consistent with the exploratory nature of this study and the limited dataset size. All neural network models were trained using the Adam optimiser with an initial learning rate of 0.0001 and a mini-batch size of 128, providing stable convergence across experiments. Models were trained for 100 epochs, with data shuffling applied at each epoch to minimize ordering effects. Validation accuracy was evaluated once per epoch to monitor training behavior.</p>
<p>The same training configuration was applied across all models to ensure methodological consistency and fair comparison. No adaptive or fold-specific hyperparameter optimisation was performed, as the emphasis of this work is on feasibility assessment and relative model behavior rather than performance maximization.</p></sec>
</sec>
<sec sec-type="results" id="s3">
<label>3</label>
<title>Results</title>
<p>The resolution of the original panoramic radiographs was reduced 25 times. ChatGPT based on OpenAI&#x00027;s GPT-4 architecture was then used to generate concise, approximately 12-word descriptions of potential bone abnormalities depicted in the reduced-resolution images. <xref ref-type="fig" rid="F2">Figure 2</xref> displays reduced-resolution panoramic radiographs showing caries and periapical infections in children, while <xref ref-type="table" rid="T2">Table 2</xref> presents the ChatGPT-generated descriptions of the bone abnormalities corresponding to these radiographs.</p>
<fig position="float" id="F2">
<label>Figure 2</label>
<caption><p>Reduced-resolution panoramic radiographs of pediatric dental diseases: caries or class 1 <bold>(a&#x02013;c)</bold>, and periapical infections or class 2 <bold>(d&#x02013;f)</bold>.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1754498-g0002.tif">
<alt-text content-type="machine-generated">Six dental panoramic x-ray images labeled a through f, each showing different stages or conditions of jaw and tooth development, with varying alignments and visibility of teeth and jaw structure in each panel.</alt-text>
</graphic>
</fig>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>ChatGPT-based descriptions of pediatric dental diseases on panoramic radiographs as shown in <xref ref-type="fig" rid="F2">Figure 2</xref>.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold><xref ref-type="fig" rid="F2">Figure 2</xref></bold></th>
<th valign="top" align="center"><bold>Class</bold></th>
<th valign="top" align="left"><bold>Descriptions</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">(a)</td>
<td valign="top" align="center">1</td>
<td valign="top" align="left">Possible bone loss, irregular jaw contours, fractures, cysts, or impacted teeth visible</td>
</tr>
<tr>
<td valign="top" align="left">(b)</td>
<td valign="top" align="center">1</td>
<td valign="top" align="left">Possible jaw asymmetry, unerupted teeth, bone resorption, cysts, fractures, or density irregularities</td>
</tr>
<tr>
<td valign="top" align="left">(c)</td>
<td valign="top" align="center">1</td>
<td valign="top" align="left">Impacted teeth with shortened roots, uneven cortical plates, and possible bone resorption</td>
</tr>
<tr>
<td valign="top" align="left">(d)</td>
<td valign="top" align="center">2</td>
<td valign="top" align="left">Impacted teeth, malocclusion, bone loss, cysts, fractures, asymmetry, infection, tumors, osteosclerosis, erosion, ankylosis, osteolysis</td>
</tr>
<tr>
<td valign="top" align="left">(e)</td>
<td valign="top" align="center">2</td>
<td valign="top" align="left">Dental crowding, impacted teeth, mandibular asymmetry, unerupted molars, root resorption, and hypercementosis</td>
</tr>
<tr>
<td valign="top" align="left">(f)</td>
<td valign="top" align="center">2</td>
<td valign="top" align="left">Impacted third molars, dental crowding, mandibular asymmetry, shortened roots, cystic changes, hypercementosis</td>
</tr></tbody>
</table>
</table-wrap>
<p>For the processed text data, each document is converted into a sequence of numeric indices. To achieve this, a word encoding function was used to create a word encoding, which maps words to numeric indices. The documents were then converted into sequences, ensuring that all sequences are of equal length. The sequences were padded and truncated to a target length, and the longest sequence length was selected for uniformity.</p>
<p>The text data were split into training and validation sets using a non-stratified holdout partition, where 90% of the data was allocated for training and the remaining 10% for validation. This partitioning process was repeated ten times, and the average classification results were recorded. <xref ref-type="fig" rid="F3">Figure 3</xref> shows the word clouds of training and test data.</p>
<fig position="float" id="F3">
<label>Figure 3</label>
<caption><p>Word clouds of training <bold>(a)</bold> and test <bold>(b)</bold> data.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1754498-g0003.tif">
<alt-text content-type="machine-generated">Word cloud diagrams labeled a and b depicting frequent terms in dental and mandibular pathology. Prominent words include asymmetry, impacted, bone, mandibular, molars, teeth, unerupted, crowding, and cystic, with varying font sizes to indicate frequency.</alt-text>
</graphic>
</fig>
<p>A preprocessing function is defined to prepare the text data for model input. This function performs several tasks: it tokenizes the text, converts it to lowercase, and removes punctuation. The training and validation text data are then processed using this function.</p>
<p>To compare the text-based with image-based classifiers, three pretrained CNNs that are SqueezeNet (<xref ref-type="bibr" rid="B13">Iandola et al., 2016</xref>), GoogLeNet (<xref ref-type="bibr" rid="B28">Szegedy et al., 2015</xref>), and AlexNet (<xref ref-type="bibr" rid="B16">Krizhevsky et al., 2017</xref>) were also applied for the classification. The pretrained CNN models were trained and validated with reduced-resolution panoramic radiographs. Training options for the pretrained CNNs were specified as follows. The training process included data augmentation to enhance model generalization and prevent overfitting. Training images underwent random vertical flipping, translation of up to 30 pixels, and scaling variations between 90% and 110% to introduce variability and ensure the model learned good features rather than memorizing specific details. Validation images were resized without augmentation to maintain consistency during evaluation.</p>
<p><xref ref-type="fig" rid="F4">Figure 4</xref> illustrates the training and validation processes for the 1D-CNN, LSTM, and BERT models on a held-out partition. <xref ref-type="fig" rid="F5">Figure 5</xref> shows the training and validation processes for the three pretrained CNN models (SqueezeNet, GoogLeNet, and AlexNet) on a held-out partition. <xref ref-type="table" rid="T3">Table 3</xref> presents the performance metrics obtained from the 1D-CNN, LSTM, BERT, and the three pretrained CNN models. To further assess the performance of the various AI models using images only (pretrained CNNs) and text only (1D-CNN, LSTM, and BERT), 5-fold cross-validation was performed. <xref ref-type="table" rid="T4">Tables 4</xref>, <xref ref-type="table" rid="T5">5</xref> report the performance metrics and the corresponding 95% confidence intervals estimated from the distribution of cross-validation scores, respectively.</p>
<fig position="float" id="F4">
<label>Figure 4</label>
<caption><p>Training and validation processes: 1D-CNN <bold>(a)</bold>, LSTM <bold>(b)</bold>, and BERT feature based net, where solid and dotted lines indicate training and validation, respectively <bold>(c)</bold>.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1754498-g0004.tif">
<alt-text content-type="machine-generated">Figure contains three panels labeled a, b, and c, each showing side-by-side line charts for training and validation accuracy above and training and validation loss below, plotted against iteration number. Panels a and b display early plateauing of validation accuracy and loss, while panel c shows steady incremental improvement in accuracy and a gradual decline in loss, with dashed lines marking critical points.</alt-text>
</graphic>
</fig>
<fig position="float" id="F5">
<label>Figure 5</label>
<caption><p>Training and validation processes of pretrained CNN models: SqueezeNet <bold>(a)</bold>, GoogLeNet <bold>(b)</bold>, and AlexNet <bold>(c)</bold>, where solid and dotted lines indicate training and validation, respectively.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1754498-g0005.tif">
<alt-text content-type="machine-generated">Figure contains three panels labeled (a), (b), and (c), each with two line charts showing model accuracy and loss over 100 iterations. Accuracy charts highlight rapid increases stabilizing near 100%, while loss charts show quick reduction to near zero. Differences between panels appear in the detailed curve shapes and loss convergence patterns, illustrating variations in model training performance and optimization behavior between datasets or methods.</alt-text>
</graphic>
</fig>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Performance measures of AI models evaluated using a 90% training and 10% testing split on reduced-resolution images and their corresponding derived textual representations.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center"><bold>ACC (%)</bold></th>
<th valign="top" align="center"><bold>SEN (%)</bold></th>
<th valign="top" align="center"><bold>SPE (%)</bold></th>
<th valign="top" align="center"><bold>PRE (%)</bold></th>
<th valign="top" align="center"><bold>F1</bold></th>
<th valign="top" align="center"><bold>AUC</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" colspan="7"><bold>Input: images only</bold></td>
</tr>
<tr>
<td valign="top" align="left">SqueezeNet</td>
<td valign="top" align="center">50.00</td>
<td valign="top" align="center">46.67</td>
<td valign="top" align="center">53.33</td>
<td valign="top" align="center">54.17</td>
<td valign="top" align="center">0.49</td>
<td valign="top" align="center">0.62</td>
</tr>
<tr>
<td valign="top" align="left">GoogLeNet</td>
<td valign="top" align="center">56.67</td>
<td valign="top" align="center">66.67</td>
<td valign="top" align="center">46.67</td>
<td valign="top" align="center">58.67</td>
<td valign="top" align="center">0.61</td>
<td valign="top" align="center">0.62</td>
</tr>
<tr>
<td valign="top" align="left">AlexNet</td>
<td valign="top" align="center">66.67</td>
<td valign="top" align="center">73.33</td>
<td valign="top" align="center">60.00</td>
<td valign="top" align="center">63.33</td>
<td valign="top" align="center">0.67</td>
<td valign="top" align="center">0.73</td>
</tr>
<tr>
<td valign="top" align="left" colspan="7"><bold>Input: text only</bold></td>
</tr>
<tr>
<td valign="top" align="left">LSTM</td>
<td valign="top" align="center">56.67</td>
<td valign="top" align="center">75.00</td>
<td valign="top" align="center">41.67</td>
<td valign="top" align="center">55.56</td>
<td valign="top" align="center">0.62</td>
<td valign="top" align="center">0.72</td>
</tr>
<tr>
<td valign="top" align="left">BERT</td>
<td valign="top" align="center">76.67</td>
<td valign="top" align="center">83.33</td>
<td valign="top" align="center">66.67</td>
<td valign="top" align="center">83.33</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">0.73</td>
</tr>
<tr>
<td valign="top" align="left">1D-CNN</td>
<td valign="top" align="center">84.00</td>
<td valign="top" align="center">86.67</td>
<td valign="top" align="center">86.67</td>
<td valign="top" align="center">86.67</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">0.93</td>
</tr></tbody>
</table>
</table-wrap>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>Cross-validated (5-fold) performance measures of AI models on reduced-resolution images and their corresponding derived textual representations.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="center"><bold>Model</bold></th>
<th valign="top" align="center"><bold>ACC (%)</bold></th>
<th valign="top" align="center"><bold>SEN (%)</bold></th>
<th valign="top" align="center"><bold>SPE (%)</bold></th>
<th valign="top" align="center"><bold>PRE (%)</bold></th>
<th valign="top" align="center"><bold>F1</bold></th>
<th valign="top" align="center"><bold>AUC</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" colspan="7"><bold>Input: images only</bold></td>
</tr>
<tr>
<td valign="top" align="left">SqueezeNet</td>
<td valign="top" align="center">53.33 &#x000B1; 4.56</td>
<td valign="top" align="center">46.67 &#x000B1; 21.73</td>
<td valign="top" align="center">60.00 &#x000B1; 19.00</td>
<td valign="top" align="center">53.43 &#x000B1; 4.80</td>
<td valign="top" align="center">0.48 &#x000B1; 0.15</td>
<td valign="top" align="center">0.54 &#x000B1; 0.12</td>
</tr>
<tr>
<td valign="top" align="left">GoogLeNet</td>
<td valign="top" align="center">63.33 &#x000B1; 7.45</td>
<td valign="top" align="center">63.33 &#x000B1; 21.73</td>
<td valign="top" align="center">63.33 &#x000B1; 21.73</td>
<td valign="top" align="center">68.33 &#x000B1; 18.77</td>
<td valign="top" align="center">0.62 &#x000B1; 0.11</td>
<td valign="top" align="center">0.71 &#x000B1; 0.07</td>
</tr>
<tr>
<td valign="top" align="left">AlexNet</td>
<td valign="top" align="center">58.33 &#x000B1; 5.89</td>
<td valign="top" align="center">76.67 &#x000B1; 14.91</td>
<td valign="top" align="center">40.00 &#x000B1; 19.00</td>
<td valign="top" align="center">56.78 &#x000B1; 6.13</td>
<td valign="top" align="center">0.65 &#x000B1; 0.05</td>
<td valign="top" align="center">0.63 &#x000B1; 0.13</td>
</tr>
<tr>
<td valign="top" align="left" colspan="7"><bold>Input: text only</bold></td>
</tr>
<tr>
<td valign="top" align="left">LSTM</td>
<td valign="top" align="center">54.55 &#x000B1; 8.11</td>
<td valign="top" align="center">62.00 &#x000B1; 18.00</td>
<td valign="top" align="center">46.00 &#x000B1; 13.00</td>
<td valign="top" align="center">53.97 &#x000B1; 7.97</td>
<td valign="top" align="center">0.57 &#x000B1; 0.12</td>
<td valign="top" align="center">0.62 &#x000B1; 0.05</td>
</tr>
<tr>
<td valign="top" align="left">BERT</td>
<td valign="top" align="center">65.91 &#x000B1; 1.52</td>
<td valign="top" align="center">77.50 &#x000B1; 17.92</td>
<td valign="top" align="center">54.17 &#x000B1; 15.96</td>
<td valign="top" align="center">62.29 &#x000B1; 3.15</td>
<td valign="top" align="center">0.68 &#x000B1; 0.06</td>
<td valign="top" align="center">0.74 &#x000B1; 0.09</td>
</tr>
<tr>
<td valign="top" align="left">1D-CNN</td>
<td valign="top" align="center">75.61 &#x000B1; 6.44</td>
<td valign="top" align="center">83.33 &#x000B1; 11.79</td>
<td valign="top" align="center">66.67 &#x000B1; 8.17</td>
<td valign="top" align="center">73.57 &#x000B1; 6.21</td>
<td valign="top" align="center">0.78 &#x000B1; 0.07</td>
<td valign="top" align="center">0.81 &#x000B1; 0.12</td>
</tr></tbody>
</table>
</table-wrap>
<table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>95% confidence intervals for cross-validated (5-fold) performance measures of AI models using reduced-resolution images and their corresponding derived textual representations.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="center"><bold>Model</bold></th>
<th valign="top" align="center"><bold>ACC (%)</bold></th>
<th valign="top" align="center"><bold>SEN (%)</bold></th>
<th valign="top" align="center"><bold>SPE (%)</bold></th>
<th valign="top" align="center"><bold>PRE (%)</bold></th>
<th valign="top" align="center"><bold>F1</bold></th>
<th valign="top" align="center"><bold>AUC</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" colspan="7"><bold>Input: images only</bold></td>
</tr>
<tr>
<td valign="top" align="left">SqueezeNet</td>
<td valign="top" align="center">[47.67, 59.00]</td>
<td valign="top" align="center">[19.68, 73.65]</td>
<td valign="top" align="center">[36.41, 83.60]</td>
<td valign="top" align="center">[47.47, 59.39]</td>
<td valign="top" align="center">[0.29, 0.66]</td>
<td valign="top" align="center">[0.39, 0.69]</td>
</tr>
<tr>
<td valign="top" align="left">GoogLeNet</td>
<td valign="top" align="center">[54.08, 72.59]</td>
<td valign="top" align="center">[36.35, 90.32]</td>
<td valign="top" align="center">[36.35, 90.32]</td>
<td valign="top" align="center">[45.02, 91.64]</td>
<td valign="top" align="center">[0.48, 0.76]</td>
<td valign="top" align="center">[0.62, 0.79]</td>
</tr>
<tr>
<td valign="top" align="left">AlexNet</td>
<td valign="top" align="center">[51.02, 65.65]</td>
<td valign="top" align="center">[58.16, 95.18]</td>
<td valign="top" align="center">[16.40, 63.60]</td>
<td valign="top" align="center">[49.17, 64.39]</td>
<td valign="top" align="center">[0.58, 0.71]</td>
<td valign="top" align="center">[0.47, 0.79]</td>
</tr>
<tr>
<td valign="top" align="left" colspan="7"><bold>Input: text only</bold></td>
</tr>
<tr>
<td valign="top" align="left">LSTM</td>
<td valign="top" align="center">[44.48, 64.61]</td>
<td valign="top" align="center">[39.41, 84.59]</td>
<td valign="top" align="center">[29.86, 62.14]</td>
<td valign="top" align="center">[44.07, 63.87]</td>
<td valign="top" align="center">[0.42, 0.72]</td>
<td valign="top" align="center">[0.56, 0.68]</td>
</tr>
<tr>
<td valign="top" align="left">BERT</td>
<td valign="top" align="center">[63.50, 68.32]</td>
<td valign="top" align="center">[48.98, 100]</td>
<td valign="top" align="center">[28.78, 79.56]</td>
<td valign="top" align="center">[57.29, 67.30]</td>
<td valign="top" align="center">[0.58, 0.79]</td>
<td valign="top" align="center">[0.60, 0.89]</td>
</tr>
<tr>
<td valign="top" align="left">1D-CNN</td>
<td valign="top" align="center">[67.61, 83.60]</td>
<td valign="top" align="center">[68.70, 97.97]</td>
<td valign="top" align="center">[56.53, 76.80]</td>
<td valign="top" align="center">[65.86, 81.28]</td>
<td valign="top" align="center">[0.69, 0.87]</td>
<td valign="top" align="center">[0.67, 0.96]</td>
</tr></tbody>
</table>
</table-wrap>
<p>Given the exploratory nature of this study and the limited dataset size, all reported performance measures should be interpreted as preliminary. Rather than claiming definitive diagnostic performance, the results emphasizes relative model behavior, comparative trends, and feasibility across different modeling strategies. In particular, performance metrics reported for the 1D-CNN and other models are intended to illustrate their potential effectiveness under constrained data conditions, rather than to represent clinically generalisable accuracy. These results primarily support methodological insights into model suitability and representation learning, and they motivate further validation on larger and independent datasets.</p></sec>
<sec sec-type="discussion" id="s4">
<label>4</label>
<title>Discussion</title>
<sec>
<label>4.1</label>
<title>The LSTM model</title>
<p>For the 90%&#x02013;10% training-testing split, this classifier showed a sensitivity of 75.00%, indicating it correctly identified periapical infections in three out of every four instances. However, its specificity is much lower at 41.67%, suggesting a significant struggle in accurately classifying caries. This imbalance between sensitivity and specificity highlights that the LSTM model was biased toward recognizing periapical infections but failed to detect caries effectively. The overall performance is further reflected in the model&#x00027;s moderate precision of 55.56%, F1 score of 0.62, and a relatively low AUC of 0.72, indicating that while it performed reasonably well in identifying caries, its ability to distinguish between the two classes was limited.</p>
<p>The low performance of the LSTM model, particularly in terms of specificity when classifying caries, can be attributed to several factors. First, the LSTM&#x00027;s ability to capture sequential dependencies in text might be limited by the small dataset size. The model may not have enough data to learn the subtle differences between caries and periapical infections from the text alone, leading to a bias toward identifying one class (such as periapical infections) more accurately than the other. Moreover, LSTM models are typically better suited for sequential data such as time series, but they may not be the most effective for text classification tasks without appropriate preprocessing or feature engineering.</p>
<p>For the 5-fold cross-validation, based on <xref ref-type="table" rid="T4">Tables 4</xref>, <xref ref-type="table" rid="T5">5</xref>, the LSTM model trained on text-only inputs exhibits modest and variable performance under 5-fold cross-validation. Overall, the results indicate that the LSTM is able to capture some discriminative patterns from the derived textual representations, but its classification capability remains limited.</p>
<p>The model shows a tendency toward higher sensitivity for detecting periapical infections than specificity for identifying caries, indicating an imbalance in class-wise performance. This behavior suggests that while the sequential modeling capacity of the LSTM can identify infection-related features, it is less effective at consistently distinguishing non-infection cases. The corresponding confidence intervals are relatively wide, reflecting variability across folds and highlighting the instability of the model when trained on a small dataset.</p>
<p>In comparison with other text-based approaches, the LSTM is consistently outperformed by both BERT and the 1D-CNN across all evaluated metrics. This suggests that the representational power of the LSTM is insufficient to fully exploit the information encoded in the derived textual features, particularly under data-limited conditions.</p>
</sec>
<sec>
<label>4.2</label>
<title>The BERT model</title>
<p>BERT achieved a sensitivity of 83.33%, indicating strong performance in correctly identifying periapical infection cases. However, its specificity was lower at 66.67%, suggesting a relatively higher misclassification rate for caries. While the model effectively detected periapical infections, it struggled to distinguish periapical infections from caries, which is reflected in its overall accuracy of 76.67%. The precision of 83.33% indicates that most of the positive classifications were correct, contributing to a high F1 score of 0.82, which balances precision and sensitivity. However, the AUC of 0.73 suggests moderate overall discriminative ability. This performance could stem from challenges in capturing subtle textual differences in descriptions of caries.</p>
<p>BERT&#x00027;s moderate performance may be due to reliance on textual descriptions, which can be ambiguous or inconsistent. The small dataset limits learning diverse patterns, increasing the risk of overfitting to specific linguistic variations. Additionally, medical image interpretation requires nuanced domain knowledge that text alone may not fully capture. Unlike image-based models that analyze spatial and structural features, BERT depends solely on textual abstraction, which may omit subtle visual details crucial for distinguishing between similar conditions like caries and periapical infections.</p>
<p>As shown in <xref ref-type="table" rid="T4">Tables 4</xref>, <xref ref-type="table" rid="T5">5</xref>, the BERT model operating on text-only inputs demonstrates consistently stronger performance than the LSTM across all evaluation metrics under 5-fold cross-validation. These results indicate that transformer-based representations are more effective at capturing discriminative information from the derived textual features.</p>
<p>BERT exhibits a more favorable balance between sensitivity for detecting periapical infections and specificity for identifying caries, suggesting improved robustness in class-wise discrimination compared with recurrent models. In addition, the confidence intervals associated with BERT&#x00027;s performance measures are generally narrower than those of the LSTM, reflecting greater stability across cross-validation folds despite the limited dataset size.</p>
<p>When compared with other text-based approaches, BERT substantially improves upon the LSTM and approaches the performance of the 1D-CNN, highlighting the benefit of contextualized representations learned via self-attention. These findings suggest that the pretrained language modeling paradigm enables more effective exploitation of the structured textual representations derived from panoramic radiographs.</p>
</sec>
<sec>
<label>4.3</label>
<title>The 1D-CNN model</title>
<p>This net, on the other hand, demonstrated a much more balanced performance with both sensitivity and specificity at 86.67%. This indicates that the model performed well in identifying both caries and periapical infections with equal accuracy. The balance in performance is also evident in the model&#x00027;s high accuracy of 84.00%, precision of 86.67%, and F1 score of 0.84. Additionally, with an AUC of 0.93, the 1D-CNN model exhibited strong overall classification capability, effectively distinguishing between the two classes with a high degree of robustness.</p>
<p>While the 1D-CNN model demonstrated strong performance with balanced sensitivity and specificity, caution is also required when interpreting its results on the small dataset. Despite its high accuracy, the model may still face challenges in generalizing to larger or more diverse datasets. With a limited sample size, there is a risk of overfitting, where the model might learn patterns specific to the small dataset that do not hold well in broader contexts. The 1D-CNN&#x00027;s performance should be validated on larger datasets to ensure its robustness and reliability in real-world applications.</p>
<p><xref ref-type="table" rid="T4">Tables 4</xref>, <xref ref-type="table" rid="T5">5</xref> show that the 1D-CNN model trained on text-only inputs achieves the strongest and most consistent performance among all evaluated approaches under 5-fold cross-validation. This indicates that convolutional modeling of the derived one-dimensional representations is particularly well suited to the characteristics of the transformed panoramic radiographic data.</p>
<p>The 1D-CNN demonstrates a favorable balance between sensitivity for detecting periapical infections and specificity for identifying caries, reflecting robust class-wise discrimination. Compared with recurrent and transformer-based models, its performance exhibits reduced variability across folds, as evidenced by comparatively narrower confidence intervals, suggesting greater stability under small-sample conditions.</p>
<p>In direct comparison with other text-based models, the 1D-CNN consistently outperforms both the LSTM and BERT across all evaluation metrics. This performance advantage suggests that local pattern extraction via convolutional filters is more effective than sequential or contextual language modeling for capturing salient features in the derived textual representations.</p>
</sec>
<sec>
<label>4.4</label>
<title>Comparisons</title>
<p>As shown in <xref ref-type="table" rid="T3">Table 3</xref>, the classification performance of Group 1, which includes three pretrained CNN models trained on panoramic radiographs (SqueezeNet, GoogLeNet, and AlexNet), is generally lower than that of Group 2, which consists of three text-based classifiers (LSTM, BERT, and 1D-CNN).</p>
<p>Among the pretrained CNN models, AlexNet achieved the highest accuracy at 66.67%, with a sensitivity of 73.33% and a specificity of 60.00%, suggesting it performed better in detecting both caries and periapical infections compared to SqueezeNet and GoogLeNet. While GoogLeNet showed relatively balanced sensitivity and specificity, it struggled with an overall accuracy of 56.67%. SqueezeNet exhibited the weakest performance, with an accuracy of only 50.00% and poor sensitivity (46.67%). The AUC values for all the pretrained CNN models remained relatively low, ranging from 0.62 to 0.73, indicating moderate classification ability.</p>
<p>In contrast, text-based classifiers demonstrated superior performance. The 1D-CNN model outperformed all others, achieving the highest accuracy of 84.00% with balanced sensitivity and specificity at 86.67%, along with the highest AUC of 0.93. BERT also showed strong results, with an accuracy of 76.67%, though its specificity (66.67%) was lower than its sensitivity (83.33%), suggesting a bias toward detecting caries more effectively. The LSTM model struggled, with an accuracy of 56.67%, high sensitivity (75.00%), but very low specificity (41.67%), indicating difficulty in correctly identifying periapical infections.</p>
<p>Overall, text-based classification outperformed image-based classification in distinguishing caries and periapical infections from panoramic radiographs, particularly with 1D-CNN and BERT achieving significantly better results than the CNN-based models. The superior performance of text-based classifiers suggests that transforming radiographic data into textual descriptions enables more effective feature extraction, reducing dependence on large annotated image datasets and complex preprocessing.</p>
<p>A comparison between the text-based models (LSTM, BERT, and 1D-CNN) and the pretrained image-based CNNs (SqueezeNet, GoogLeNet, and AlexNet), as reported in <xref ref-type="table" rid="T4">Tables 4</xref>, <xref ref-type="table" rid="T5">5</xref>, reveals clear performance differences between the two modeling paradigms.</p>
<p>Overall, the text-based approaches demonstrate superior discriminative capability compared with the image-only CNNs. In particular, the 1D-CNN consistently outperforms all pretrained image-based models, exhibiting stronger class-wise discrimination and greater stability across cross-validation folds. This suggests that the derived one-dimensional textual representations preserve salient diagnostic information that can be more effectively exploited by convolutional architectures than by directly processing reduced-resolution images.</p>
<p>Among the image-based models, GoogLeNet generally performs more robustly than SqueezeNet and AlexNet, reflecting the benefit of deeper and more structured feature extraction. However, even the strongest image-based CNN remains inferior to the best-performing text-based models, indicating limitations in learning discriminative features from low-resolution panoramic images under data-scarce conditions.</p>
<p>The LSTM shows comparable or slightly lower performance than the pretrained CNNs, highlighting the limitations of recurrent sequence modeling for this task. In contrast, BERT consistently exceeds the performance of all image-based CNNs, underscoring the advantage of contextualized representations learned via transformer architectures. Nevertheless, both LSTM and BERT exhibit greater variability than the 1D-CNN, suggesting sensitivity to small-sample effects.</p>
<p>These results indicate that transforming panoramic radiographs into structured textual or signal-like representations, followed by appropriate text-based modeling, can outperform direct image-based classification using pretrained CNNs in limited-data settings. The findings further suggest that model-representation alignment plays a critical role in achieving robust performance when dataset size and image resolution are constrained.</p>
<p>An important methodological consideration concerns the use of reduced-resolution panoramic radiographs for the image-based models. All pretrained CNNs were trained on images downscaled by a factor of 25 to ensure computational feasibility. While this approach enabled efficient model training, it inevitably resulted in substantial loss of fine-grained anatomical detail and likely constrained the performance of image-based classifiers.</p>
<p>As a result, the comparative analysis presented in this study should not be interpreted as a direct or fair comparison between optimally trained image-based models and text-based approaches. Instead, the findings reflect a comparison between a degraded image-based pipeline and a text-based representation derived from the same underlying images. This distinction is critical when interpreting the observed performance differences and their implications for clinical applicability. In practice, image-based models trained on original-resolution radiographs would be expected to achieve superior diagnostic performance.</p>
</sec>
<sec>
<label>4.5</label>
<title>Training and validation</title>
<p>Analysis of the training and validation processes of the 1D-CNN, LSTM, and BERT models using a hold-out data partition, as shown in <xref ref-type="fig" rid="F4">Figure 4</xref>, can provide insights into the classification performance of each model.</p>
<p>For the 1D-CNN model (<xref ref-type="fig" rid="F4">Figure 4a</xref>), the training process appeared stable, with both training and validation performance improving steadily. The small gap between the solid (training) and dotted (validation) lines suggested minimal overfitting, indicating that the model generalized well to unseen data.</p>
<p>In the case of the LSTM model (<xref ref-type="fig" rid="F4">Figure 4b</xref>), the training curve improved, but the validation curve may show more fluctuations or a larger gap, suggesting potential overfitting or difficulty in generalizing. LSTMs often struggle with smaller datasets, as they require longer sequences to fully leverage their sequential learning capabilities.</p>
<p>For the BERT-based model (<xref ref-type="fig" rid="F4">Figure 4c</xref>), validation accuracy reached 100% while validation loss decreased, suggesting strong learning. However, this raises concerns about overfitting, as the small dataset may have led the model to memorize patterns rather than generalize. Further evaluation on independent data or a larger dataset is needed to confirm its reliability and real-world applicability.</p>
<p>The training and validation processes of SqueezeNet, GoogLeNet, and AlexNet exhibit overfitting, as illustrated in <xref ref-type="fig" rid="F5">Figure 5</xref>. All three models reached 100% training accuracy while maintaining much lower validation accuracy. SqueezeNet struggled the most, with unstable validation accuracy and erratic validation loss, indicating poor generalization. GoogLeNet showed better training stability, but its validation accuracy fluctuated significantly, suggesting that the model memorized training patterns but failed to generalize effectively. AlexNet performed relatively better, with a more consistent validation accuracy trend, though still affected by overfitting. The high validation loss reinforced the limited ability of these pretrained CNNs to classify dental diseases from panoramic radiographs reliably.</p>
</sec>
<sec>
<label>4.6</label>
<title>Image-based text descriptions</title>
<p>Using ChatGPT to translate panoramic radiographs into text descriptions offers several advantages over direct classification of the images using image-based classifiers as follows.</p>
<p><italic>Ability to leverage semantic understanding:</italic> ChatGPT can analyze and describe high-level features of radiographs in natural language, summarizing complex patterns or abnormalities in a human-readable format. These textual descriptions can encapsulate contextual information and insights that may help in downstream classification tasks.</p>
<p><italic>Simplified workflow</italic>: Generating text descriptions directly from radiographs can simplify the workflow by reducing the preprocessing steps needed for image-based classifiers. This allows for faster and more efficient analysis, particularly in large-scale datasets.</p>
<p><italic>Scalability and resource efficiency:</italic> Reducing image resolution (by 25 times) simplifies the computational load and storage requirements, which is particularly useful for large-scale datasets. Image-based classifiers often struggle with reduced-resolution images, as essential visual details may be lost. In contrast, ChatGPT can still infer potential abnormalities from simplified features, potentially acting as a bridge between low-quality inputs and meaningful outputs.</p>
<p><italic>Improved generalizability;</italic> Instead of relying solely on pixel-level patterns, textual descriptions generated by ChatGPT can generalize better across variations in image quality, acquisition settings, or equipment. This approach can reduce the dependency on extensive image-based model training, which often requires high-resolution data and large labeled datasets.</p>
<p><italic>Enhanced interpretability:</italic> Text-based outputs are inherently more interpretable to healthcare professionals than direct image classifications, making it easier to validate the results and integrate them into diagnostic workflows. Descriptions can highlight specific features, such as &#x0201C;possible bone loss near tooth x&#x0201D;, rather than providing a simple categorical label.</p>
</sec>
<sec>
<label>4.7</label>
<title>Limitations</title>
<p>This study has several limitations. First, the dataset is relatively small and derived from a single center, which restricts statistical power and increases the risk of overfitting, even though 5-fold cross-validation was employed. While cross-validation provides more robust performance estimates than a single train-test split, it cannot fully eliminate overfitting in data-limited settings.</p>
<p>The dataset may not fully capture the diversity of pediatric dental conditions across different age groups, socioeconomic backgrounds, and geographic regions. In addition, variations in image acquisition protocols, annotation quality, and clinical diagnostic criteria may introduce bias and limit the generalizability of the reported results. The absence of external validation cohorts further constrains the assessment of model robustness across different institutions and imaging environments.</p>
<p>Furthermore, although the proposed AI approaches demonstrate promising performance in this exploratory setting, they have not yet been validated in large-scale, prospective clinical studies. Moreover, the interpretability of the deep learning outputs requires further enhancement to support transparency, clinician trust, and clinical adoption.</p>
<p>In addition, the lack of longitudinal data limits the ability to assess disease progression or treatment outcomes over time. Future work involving larger, multi-center, and longitudinal datasets will be essential to address these limitations and to support the development of clinically robust AI systems.</p>
<p>Another limitation of this study is the absence of quantitative inter-rater agreement analysis and large-scale clinical validation. Although a qualified dentist qualitatively reviewed the generated textual descriptions to assess anatomical plausibility and adherence to the non-interpretative prompt constraints, this review was not intended to establish diagnostic accuracy. As a result, the findings should be interpreted as exploratory, reflecting methodological feasibility and relative model behavior rather than clinically validated performance.</p>
<p>Although the proposed framework introduces human-readable textual representations as an intermediate modality, no formal evidence is currently provided to demonstrate that these generated descriptions add diagnostic value or directly improve clinical decision-making. The textual outputs were designed to be descriptive and non-interpretative, and their role in this study is methodological rather than clinical. As such, the presence of textual descriptions should not be construed as conferring inherent clinical interpretability or diagnostic usefulness. This limitation underscores the exploratory nature of the present work. Future studies will be required to systematically evaluate whether such textual representations meaningfully support clinical reasoning, enhance diagnostic confidence, or contribute to decision-making workflows when assessed by dental professionals in real-world settings.</p>
<p>A semantic concern is the occasional appearance of terminology that falls outside the defined clinical scope of this study. In a small number of cases, the generated textual descriptions included terms such as &#x0201C;tumors&#x0201D; and &#x0201C;ankylosis&#x0201D;, which are not representative of the pediatric dental disease categories considered (caries and periapical infections). Although these terms were not intended as diagnoses and arose within a non-interpretative descriptive context, their presence highlights the risk of semantic drift or hallucinated content when using LLMs, even under constrained prompting. To assess the impact of this issue, the affected terms were removed and the analyses were repeated. No meaningful changes in model performance were observed, indicating that the reported results were not driven by the inclusion of such terminology.</p>
</sec>
<sec>
<label>4.8</label>
<title>Future work</title>
<p>Future research will focus on addressing the identified limitations and enhancing clinical applicability. Expanding the dataset size, including additional disease categories, through multi-center collaborations will be essential for improving robustness, fairness, and generalisability across diverse populations. In particular, large-scale and multi-center datasets will enable robust external validation and reduce dataset-specific bias.</p>
<p>Strengthening clinical validation will be a priority in future work through systematic expert evaluation involving multiple dental professionals. This will include structured qualitative assessments supported by questionnaires to evaluate the perceived clinical relevance, clarity, and usefulness of the generated textual representations, alongside quantitative inter-rater reliability analysis to assess consistency across expert assessments. Together, these measures will enable a more rigorous evaluation of the accuracy, consistency, and potential clinical value of the proposed approach.</p>
<p>Together, these evaluations will enable a more robust determination of whether the proposed textual representations meaningfully align with clinical findings and whether they provide added value in real-world dental practice, beyond their current role as an intermediate representation for exploratory modeling.</p>
<p>In addition, incorporating multimodal data&#x02013;such as radiographic images, clinical records, behavioral indicators, and temporal information&#x02013;may further enhance predictive performance and enable early detection of disease onset or progression. The development of explainable AI frameworks and calibration methods will also be prioritized to improve interpretability and clinician confidence. Pilot clinical studies in pediatric dental settings will be necessary to evaluate workflow integration, usability, and impact on diagnostic efficiency.</p>
<p>Future research will also include benchmarking the proposed text-based and signal-derived approaches against image-based models trained on original-resolution radiographs. This will enable a more clinically representative and methodologically balanced comparison, clarifying the relative advantages and limitations of each modeling paradigm. Such evaluations will be essential for determining the conditions under which alternative representations may offer practical benefits and for guiding the translation of these methods into real-world clinical workflows.</p>
<p>Future studies will also include formal ablation analyses to more systematically evaluate the contribution of individual components within the proposed framework. Such analyses will examine the impact of representation choices, model architectures, and key design decisions by selectively removing or modifying components under controlled conditions. Conducting these ablation studies on larger datasets will be essential to obtain statistically reliable insights, reduce overfitting effects, and better understand the factors driving model performance and robustness.</p>
<p>Regarding model selection considerations, recent vision transformer (ViT) architectures (<xref ref-type="bibr" rid="B9">Han et al., 2023</xref>; <xref ref-type="bibr" rid="B27">Saha and Xu, 2025</xref>; <xref ref-type="bibr" rid="B30">Wang et al., 2025</xref>) were considered conceptually but were not included in the present experiments. Vision transformers generally require substantially larger datasets or extensive pretraining and fine-tuning to achieve stable and reliable performance. Given the limited dataset size and the exploratory focus of this study, incorporating ViT-based models would likely introduce greater performance variability and increase the risk of overfitting. Accordingly, established pretrained CNNs were selected as image-based baselines to provide a controlled comparison with the proposed text-based approaches. Future work will include systematic evaluation of ViT models when larger datasets become available, enabling a more comprehensive assessment of their potential advantages in this application.</p>
</sec>
<sec>
<label>4.9</label>
<title>Clinical implications</title>
<p>The proposed AI-based approach offers significant potential for improving pediatric dental care. By assisting clinicians in early detection of caries, developmental anomalies, and periodontal conditions, it may facilitate timely interventions and reduce the burden of untreated oral diseases in children. Automated image analysis and risk stratification could support preventive care programs, particularly in resource-limited or community-based settings. Furthermore, integrating such AI tools into routine dental examinations may enhance diagnostic consistency, optimize treatment planning, and improve patient engagement through personalized education. The adoption of AI in pediatric dentistry could transform preventive strategies and promote long-term oral health outcomes.</p>
<p>From a clinical perspective, it should be pointed out that the adopted experimental framing highlights a potential role for text-based or signal-derived representations in scenarios where image quality, spatial resolution, storage capacity, or data transmission bandwidth are limited. Such conditions may arise in low-resource clinical settings, tele-dentistry applications, or when working with legacy imaging systems. In these contexts, structured textual abstractions derived from radiographs may preserve diagnostically relevant information even when image fidelity is compromised. Nevertheless, this use case should be viewed as complementary rather than substitutive, as high-resolution image-based analysis remains the preferred approach for routine clinical decision-making.</p></sec></sec>
<sec id="s5">
<label>5</label>
<title>Conclusions</title>
<p>This study demonstrates the potential of using a large language model to translate panoramic radiographs into textual descriptions for dental disease classification using deep learning. By leveraging text-based classification instead of direct image-based analysis, the approach eliminates the need for complex image preprocessing, such as segmentation, and allows models like 1D-CNN to process text-based descriptions efficiently. Compared to traditional image-based classifiers, this method offers greater interpretability and accessibility, particularly in settings where high-resolution imaging or expert annotations are limited.</p>
<p>Beyond dental disease classification, this approach has broader applications in medical imaging, where textual descriptions can be used for AI-driven diagnosis across radiology, dermatology, and pathology. Future research should focus on optimizing text generation from images, ensuring that descriptions capture clinically relevant features with high fidelity. Additionally, investigating hybrid models that combine text-based and image-based features may enhance classification accuracy and generalizability. Expanding datasets, improving domain-specific language models, and incorporating external clinical knowledge into text-based AI models will be crucial for advancing AI-assisted diagnostics in healthcare.</p></sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: Children&#x00027;s Dental Panoramic Radiographs Dataset for Caries Segmentation and Dental Disease Detection. Figshare: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.6084/m9.figshare.c.6317013.v1">https://doi.org/10.6084/m9.figshare.c.6317013.v1</ext-link>.</p>
</sec>
<sec sec-type="ethics-statement" id="s7">
<title>Ethics statement</title>
<p>This is a secondary data analysis. The ethics approval was waived by Queen Mary University of London Ethics Committee. The studies were conducted in accordance with the local legislation and institutional requirements. Written informed consent for participation was not required from the participants or the participants&#x00027; legal guardians/next of kin in accordance with the national legislation and institutional requirements.</p>
</sec>
<sec sec-type="author-contributions" id="s8">
<title>Author contributions</title>
<p>TP: Conceptualization, Formal analysis, Investigation, Methodology, Resources, Software, Supervision, Validation, Visualization, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. SA-H: Formal analysis, Validation, Writing &#x02013; review &#x00026; editing.</p>
</sec>
<ack><title>Acknowledgments</title><p>This work has been released as a preprint (<xref ref-type="bibr" rid="B26">Pham, 2025</xref>).</p>
</ack>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
<p>The author TP declared that they were an editorial board member of Frontiers, at the time of submission. This had no impact on the peer review process and the final decision.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Abd-Alrazaq</surname> <given-names>A.</given-names></name> <name><surname>AlSaad</surname> <given-names>R.</given-names></name> <name><surname>Alhuwail</surname> <given-names>D.</given-names></name> <name><surname>Ahmed</surname> <given-names>A.</given-names></name> <name><surname>Healy</surname> <given-names>P. M.</given-names></name> <name><surname>Latifi</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Large language models in medical education: opportunities, challenges, and future directions</article-title>. <source>JMIR Med Educ</source>. <volume>9</volume>:<fpage>e48291</fpage>. doi: <pub-id pub-id-type="doi">10.2196/48291</pub-id><pub-id pub-id-type="pmid">37261894</pub-id></mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Abdelaziz</surname> <given-names>M.</given-names></name></person-group> (<year>2023</year>). <article-title>Detection, diagnosis, and monitoring of early caries: the future of individualized dental care</article-title>. <source>Diagnostics</source> <volume>13</volume>:<fpage>3649</fpage>. doi: <pub-id pub-id-type="doi">10.3390/diagnostics13243649</pub-id><pub-id pub-id-type="pmid">38132233</pub-id></mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Devlin</surname> <given-names>J.</given-names></name> <name><surname>Chang</surname> <given-names>M. W.</given-names></name> <name><surname>Lee</surname> <given-names>K.</given-names></name> <name><surname>Toutanova</surname> <given-names>K.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;BERT pre-training of deep bidirectional transformers for language understanding,&#x0201D;</article-title> in <source>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source> (<publisher-loc>Minneapolis, MN</publisher-loc>: <publisher-name>Association for Computational Linguistics (ACL</publisher-name>)), <fpage>4171</fpage>&#x02013;<lpage>4186</lpage>.</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dhingra</surname> <given-names>K.</given-names></name></person-group> (<year>2023</year>). <article-title>Artificial intelligence in dentistry: current state and future directions</article-title>. <source>Bullet. Royal College Surg. Engl</source>. <volume>105</volume>, <fpage>380</fpage>&#x02013;<lpage>383</lpage>. doi: <pub-id pub-id-type="doi">10.1308/rcsbull.2023.132</pub-id></mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ding</surname> <given-names>H.</given-names></name> <name><surname>Wu</surname> <given-names>J.</given-names></name> <name><surname>Zhao</surname> <given-names>W.</given-names></name> <name><surname>Matinlinna</surname> <given-names>J. P.</given-names></name> <name><surname>Burrow</surname> <given-names>M. F.</given-names></name> <name><surname>Tsoi</surname> <given-names>J. K. H</given-names></name></person-group>. (<year>2023</year>). <article-title>Artificial intelligence in dentistry&#x02013;A review</article-title>. <source>Front. Dent. Med</source> <volume>4</volume>:<fpage>1085251</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fdmed.2023.1085251</pub-id><pub-id pub-id-type="pmid">39935549</pub-id></mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Featherstone</surname> <given-names>J. D. B.</given-names></name> <name><surname>Crystal</surname> <given-names>Y. O.</given-names></name> <name><surname>Alston</surname> <given-names>P.</given-names></name> <name><surname>Chaffee</surname> <given-names>B. W.</given-names></name> <name><surname>Dom&#x000E9;jean</surname> <given-names>S.</given-names></name> <name><surname>Rechmann</surname> <given-names>P.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Evidence-based caries management for all ages&#x02013;practical guidelines</article-title>. <source>Front Oral Health</source>. <volume>2</volume>:<fpage>657518</fpage>. doi: <pub-id pub-id-type="doi">10.3389/froh.2021.657518</pub-id><pub-id pub-id-type="pmid">35048005</pub-id></mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ghaffari</surname> <given-names>M.</given-names></name> <name><surname>Zhu</surname> <given-names>Y.</given-names></name> <name><surname>Shrestha</surname> <given-names>A. A.</given-names></name></person-group> (<year>2024</year>). <article-title>review of advancements of artificial intelligence in dentistry</article-title>. <source>Dentist. Rev</source>. <volume>4</volume>:<fpage>100081</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.dentre.2024.100081</pub-id></mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Giannakopoulos</surname> <given-names>K.</given-names></name> <name><surname>Kavadella</surname> <given-names>A.</given-names></name> <name><surname>Aaqel Salim</surname> <given-names>A.</given-names></name> <name><surname>Stamatopoulos</surname> <given-names>V.</given-names></name> <name><surname>Kaklamanos</surname> <given-names>E. G.</given-names></name></person-group> (<year>2023</year>). <article-title>Evaluation of the performance of generative AI large language models ChatGPT, Google Bard, and Microsoft Bing Chat in supporting evidence-based dentistry: comparative mixed methods study</article-title>. <source>J. Med. Internet Res</source>. 25, e51580. doi: <pub-id pub-id-type="doi">10.2196/51580</pub-id><pub-id pub-id-type="pmid">38009003</pub-id></mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Han</surname> <given-names>K.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Chen</surname> <given-names>H.</given-names></name> <name><surname>Chen</surname> <given-names>X.</given-names></name> <name><surname>Guo</surname> <given-names>J.</given-names></name> <name><surname>Liu</surname> <given-names>Z.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>survey on vision transformer</article-title>. <source>IEEE Trans. Pattern Anal. Mach. Intell</source>. <volume>45</volume>, <fpage>87</fpage>&#x02013;<lpage>110</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TPAMI.2022.3152247</pub-id></mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hochreiter</surname> <given-names>S.</given-names></name> <name><surname>Schmidhuber</surname> <given-names>J.</given-names></name></person-group> (<year>1997</year>). <article-title>Long short-term memory</article-title>. <source>Neural Comput</source>. <volume>9</volume>, <fpage>1735</fpage>&#x02013;<lpage>1780</lpage>. doi: <pub-id pub-id-type="doi">10.1162/neco.1997.9.8.1735</pub-id></mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hsieh</surname> <given-names>S.</given-names></name></person-group> (<year>2024</year>). <article-title>Multimodal feature fusion in deep learning for comprehensive dental condition classification</article-title>. <source>J. Xray. Sci. Technol</source>. <volume>32</volume>, <fpage>303</fpage>&#x02013;<lpage>321</lpage>. doi: <pub-id pub-id-type="doi">10.3233/XST-230271</pub-id><pub-id pub-id-type="pmid">38217632</pub-id></mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>H.</given-names></name> <name><surname>Zheng</surname> <given-names>O.</given-names></name> <name><surname>Wang</surname> <given-names>D.</given-names></name> <name><surname>Yin</surname> <given-names>J.</given-names></name> <name><surname>Wang</surname> <given-names>Z.</given-names></name> <name><surname>Ding</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>ChatGPT for shaping the future of dentistry: the potential of multi-modal large language model</article-title>. <source>Int. J. Oral Sci</source>. 2&#x02013;23, 15, 29. doi: <pub-id pub-id-type="doi">10.1038/s41368-023-00239-y</pub-id><pub-id pub-id-type="pmid">37507396</pub-id></mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Iandola</surname> <given-names>F. N.</given-names></name> <name><surname>Han</surname> <given-names>S.</given-names></name> <name><surname>Moskewicz</surname> <given-names>M. W.</given-names></name> <name><surname>Ashraf</surname> <given-names>K.</given-names></name> <name><surname>Dally</surname> <given-names>W. J.</given-names></name> <name><surname>Keutzer</surname> <given-names>K.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>SqueezeNet: AlexNet-level accuracy with 50 &#x000D7; fewer parameters and &#x0003C; 0.5MB model size</article-title>. <source>arXiv</source> [preprint] arXiv:1602.07360. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1602.07360</pub-id></mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kaya</surname> <given-names>E.</given-names></name> <name><surname>Gune&#x000E7;</surname> <given-names>H.</given-names></name> <name><surname>Ayd&#x00131;n</surname> <given-names>K.</given-names></name> <name><surname>Urkmez</surname> <given-names>E.</given-names></name> <name><surname>Duranay</surname> <given-names>R.</given-names></name> <name><surname>Ates</surname> <given-names>H.</given-names></name></person-group> (<year>2022</year>). <article-title>A deep learning approach to permanent tooth germ detection on pediatric panoramic radiographs</article-title>. <source>Imaging Sci. Dent</source>. <volume>52</volume>:<fpage>275</fpage>. doi: <pub-id pub-id-type="doi">10.5624/isd.20220050</pub-id><pub-id pub-id-type="pmid">36238699</pub-id></mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Khanagar</surname> <given-names>S.</given-names></name> <name><surname>Alfouzan</surname> <given-names>K.</given-names></name> <name><surname>Alkadi</surname> <given-names>L.</given-names></name> <name><surname>Albalawi</surname> <given-names>F.</given-names></name> <name><surname>Iyer</surname> <given-names>K.</given-names></name> <name><surname>Awawdeh</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Performance of artificial intelligence (AI) models designed for application in pediatric dentistry&#x02013;a systematic review</article-title>. <source>Appl. Sci</source>. <volume>12</volume>:<fpage>9819</fpage>. doi: <pub-id pub-id-type="doi">10.3390/app12199819</pub-id></mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Krizhevsky</surname> <given-names>A.</given-names></name> <name><surname>Sutskever</surname> <given-names>I.</given-names></name> <name><surname>Hinton</surname> <given-names>G. E.</given-names></name></person-group> (<year>2017</year>). <article-title>ImageNet classification with deep convolutional neural networks</article-title>. <source>Commun. ACM</source>. <volume>60</volume>, <fpage>84</fpage>&#x02013;<lpage>90</lpage>. doi: <pub-id pub-id-type="doi">10.1145/3065386</pub-id></mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>La Rosa</surname> <given-names>S.</given-names></name> <name><surname>Quinzi</surname> <given-names>V.</given-names></name> <name><surname>Palazzo</surname> <given-names>G.</given-names></name> <name><surname>Ronsivalle</surname> <given-names>V.</given-names></name> <name><surname>Lo Giudice</surname> <given-names>A.</given-names></name></person-group> (<year>2024</year>). <article-title>The implications of artificial intelligence in pedodontics: a scoping review of evidence-based literature</article-title>. <source>Healthcare</source> <volume>12</volume>:<fpage>1311</fpage>. doi: <pub-id pub-id-type="doi">10.3390/healthcare12131311</pub-id><pub-id pub-id-type="pmid">38998846</pub-id></mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>LeCun</surname> <given-names>Y.</given-names></name> <name><surname>Bottou</surname> <given-names>L.</given-names></name> <name><surname>Bengio</surname> <given-names>Y.</given-names></name> <name><surname>Haffner</surname> <given-names>P.</given-names></name></person-group> (<year>1998</year>). <article-title>Gradient-based learning applied to document recognition</article-title>. <source>Proceedings of the IEEE</source> <volume>86</volume>, <fpage>2278</fpage>&#x02013;<lpage>2324</lpage>. doi: <pub-id pub-id-type="doi">10.1109/5.726791</pub-id></mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lin</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Deng</surname> <given-names>Y.</given-names></name> <name><surname>Xu</surname> <given-names>D.</given-names></name> <name><surname>Wang</surname> <given-names>M.</given-names></name></person-group> (<year>2024</year>). <article-title>Applications of ChatGPT in critical care medicine: opportunities, challenges, and future prospects</article-title>. <source>JMIR Preprints</source>. 10/05/2024 60412. doi: <pub-id pub-id-type="doi">10.2196/preprints.60412</pub-id></mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mahajan</surname> <given-names>K.</given-names></name> <name><surname>Kunte</surname> <given-names>S.</given-names></name> <name><surname>Patil</surname> <given-names>K.</given-names></name> <name><surname>Shah</surname> <given-names>P.</given-names></name> <name><surname>Shah</surname> <given-names>R.</given-names></name> <name><surname>Jajoo</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Artificial intelligence in pediatric dentistry&#x02013;a systematic review</article-title>. <source>J. Dental Res. Rev</source>. <volume>10</volume>, <fpage>7</fpage>&#x02013;<lpage>12</lpage>. doi: <pub-id pub-id-type="doi">10.4103/jdrr.jdrr_199_22</pub-id></mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Muntean</surname> <given-names>A.</given-names></name> <name><surname>Mesaros</surname> <given-names>A. S.</given-names></name> <name><surname>Festila</surname> <given-names>D.</given-names></name> <name><surname>Mesaros</surname> <given-names>M.</given-names></name></person-group> (<year>2015</year>). <article-title>Modern management of dental decay in children and adolescents &#x02013; a review</article-title>. <source>Clujul Med</source>. <volume>88</volume>, <fpage>137</fpage>&#x02013;<lpage>139</lpage>. doi: <pub-id pub-id-type="doi">10.15386/cjmed-401</pub-id><pub-id pub-id-type="pmid">26528061</pub-id></mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ong</surname> <given-names>S. H.</given-names></name> <name><surname>Kim</surname> <given-names>H.</given-names></name> <name><surname>Song</surname> <given-names>J. S.</given-names></name> <name><surname>Shin</surname> <given-names>T. J.</given-names></name> <name><surname>Hyun</surname> <given-names>H. K.</given-names></name> <name><surname>Jang</surname> <given-names>K. T.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Fully automated deep learning approach to dental development assessment in panoramic radiographs</article-title>. <source>BMC Oral Health</source> <volume>24</volume>:<fpage>426</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s12903-024-04160-6</pub-id><pub-id pub-id-type="pmid">38582843</pub-id></mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="web"><collab>OpenAI</collab> (<year>2023</year>). <source>ChatGPT; Large Language Model, 14 March 2023 Version</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://openai.com/chatgpt">https://openai.com/chatgpt</ext-link> (Accessed December 3, 2024).</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="web"><collab>Oral Health in America: Advances and Challenges (Internet)</collab>. (<year>2021</year>). <article-title>Section 2A, Oral Health Across the Lifespan: Children</article-title>. <publisher-loc>Bethesda (MD)</publisher-loc>: <publisher-name>National Institute of Dental and Craniofacial Research (US)</publisher-name>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/books/NBK578299/">https://www.ncbi.nlm.nih.gov/books/NBK578299/</ext-link> (Accessed January 5, 2025).</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ossowska</surname> <given-names>A.</given-names></name> <name><surname>Kusiak</surname> <given-names>A.</given-names></name> <name><surname>&#x0015A;wietlik</surname> <given-names>D.</given-names></name></person-group> (<year>2022</year>). <article-title>Artificial intelligence in dentistry&#x02013;narrative review</article-title>. <source>Int. J. Environ. Res. Public Health</source> <volume>19</volume>:<fpage>3449</fpage>. doi: <pub-id pub-id-type="doi">10.3390/ijerph19063449</pub-id><pub-id pub-id-type="pmid">35329136</pub-id></mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pham</surname> <given-names>T. D.</given-names></name></person-group> (<year>2025</year>). <article-title>Classification of pediatric dental diseases from panoramic radiographs using natural language transformer and deep learning models</article-title>. <source>medRxiv</source>. doi: <pub-id pub-id-type="doi">10.1101/2025.01.30.25321418</pub-id></mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Saha</surname> <given-names>S.</given-names></name> <name><surname>Xu</surname> <given-names>L.</given-names></name></person-group> (<year>2025</year>). <article-title>Vision transformers on the edge: a comprehensive survey of model compression and acceleration strategies</article-title>. <source>Neurocomputing</source> <volume>643</volume>:<fpage>130417</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.neucom.2025.130417</pub-id></mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Szegedy</surname> <given-names>C.</given-names></name> <name><surname>Liu</surname> <given-names>W.</given-names></name> <name><surname>Jia</surname> <given-names>Y.</given-names></name> <name><surname>Sermanet</surname> <given-names>P.</given-names></name> <name><surname>Reed</surname> <given-names>S.</given-names></name> <name><surname>Anguelov</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2015</year>). <article-title>&#x0201C;Going deeper with convolutions,&#x0201D;</article-title> in <source>Proceedings of the 2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</source>(Minneapolis, MN: Association for Computational Linguistics (ACL)), <fpage>1</fpage>&#x02013;<lpage>9</lpage>.</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tiwari</surname> <given-names>A.</given-names></name> <name><surname>Kumar</surname> <given-names>A.</given-names></name> <name><surname>Jain</surname> <given-names>S.</given-names></name> <name><surname>Dhull</surname> <given-names>K. S.</given-names></name> <name><surname>Sajjanar</surname> <given-names>A.</given-names></name> <name><surname>Puthenkandathil</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Implications of ChatGPT in public health dentistry: a systematic review</article-title>. <source>Cureus</source> <volume>15</volume>:<fpage>e40367</fpage>. doi: <pub-id pub-id-type="doi">10.7759/cureus.40367</pub-id><pub-id pub-id-type="pmid">37456464</pub-id></mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Deng</surname> <given-names>Y.</given-names></name> <name><surname>Zheng</surname> <given-names>Y.</given-names></name> <name><surname>Chattopadhyay</surname> <given-names>P.</given-names></name> <name><surname>Wang</surname> <given-names>L.</given-names></name></person-group> (<year>2025</year>). <article-title>Vision transformers for image classification: a comparative survey</article-title>. <source>Technologies</source> <volume>13</volume>:<fpage>32</fpage>. doi: <pub-id pub-id-type="doi">10.3390/technologies13010032</pub-id></mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>Z.</given-names></name> <name><surname>Gan</surname> <given-names>W.</given-names></name> <name><surname>Xue</surname> <given-names>Z.</given-names></name> <name><surname>Ni</surname> <given-names>Z.</given-names></name> <name><surname>Zheng</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Performance of ChatGPT on nursing licensure examinations in the United States and China: cross-sectional study</article-title>. <source>JMIR Med. Educ</source>. <volume>10</volume>:<fpage>e52746</fpage>. doi: <pub-id pub-id-type="doi">10.2196/52746</pub-id><pub-id pub-id-type="pmid">39363539</pub-id></mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yong</surname> <given-names>L. P. X.</given-names></name> <name><surname>Tung</surname> <given-names>J. Y. M.</given-names></name> <name><surname>Lee</surname> <given-names>Z. Y.</given-names></name> <name><surname>Kuan</surname> <given-names>W. S.</given-names></name> <name><surname>Chua</surname> <given-names>M. T.</given-names></name></person-group> (<year>2024</year>). <article-title>Performance of large language models in patient complaint resolution: web-based cross-sectional survey</article-title>. <source>J. Med. Internet Res</source>. <volume>26</volume>:<fpage>e56413</fpage>. doi: <pub-id pub-id-type="doi">10.2196/56413</pub-id><pub-id pub-id-type="pmid">39121468</pub-id></mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Ye</surname> <given-names>F.</given-names></name> <name><surname>Chen</surname> <given-names>L.</given-names></name> <name><surname>Xu</surname> <given-names>F.</given-names></name> <name><surname>Chen</surname> <given-names>X.</given-names></name> <name><surname>Wu</surname> <given-names>H.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Children&#x00027;s dental panoramic radiographs dataset for caries segmentation and dental disease detection</article-title>. <source>Sci Data</source>. <volume>10</volume>:<fpage>380</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41597-023-02237-5</pub-id><pub-id pub-id-type="pmid">37316638</pub-id></mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zou</surname> <given-names>J.</given-names></name> <name><surname>Meng</surname> <given-names>M.</given-names></name> <name><surname>Law</surname> <given-names>C. S.</given-names></name> <name><surname>Rao</surname> <given-names>Y.</given-names></name> <name><surname>Zhou</surname> <given-names>X.</given-names></name></person-group> (<year>2018</year>). <article-title>Common dental diseases in children and malocclusion</article-title>. <source>Int. J. Oral Sci</source>. <volume>10</volume>:<fpage>7</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41368-018-0012-3</pub-id><pub-id pub-id-type="pmid">29540669</pub-id></mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2348742/overview">Tse-Yen Yang</ext-link>, Taipei Medical University, Taiwan</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1525158/overview">Emre Sefer</ext-link>, &#x000D6;zye&#x0011F;in University, T&#x000FC;rkiye</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/828109/overview">Konda Mani Saravanan</ext-link>, Bharath Institute of Higher Education and Research, India</p>
</fn>
</fn-group>
</back>
</article>