<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Archiving and Interchange DTD v2.3 20070202//EN" "archivearticle.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="systematic-review" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Endocrinol.</journal-id>
<journal-title>Frontiers in Endocrinology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Endocrinol.</abbrev-journal-title>
<issn pub-type="epub">1664-2392</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fendo.2025.1506729</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Endocrinology</subject>
<subj-group>
<subject>Systematic Review</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Beyond genomics: artificial intelligence-powered diagnostics for indeterminate thyroid nodules&#x2014;a systematic review and meta-analysis</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Jassal</surname>
<given-names>Karishma</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2860969/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Edwards</surname>
<given-names>Melissa</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Koohestani</surname>
<given-names>Afsaneh</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Brown</surname>
<given-names>Wendy</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Serpell</surname>
<given-names>Jonathan W.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lee</surname>
<given-names>James C.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Monash University Endocrine Surgery Unit, Alfred Hospital</institution>, <addr-line>Melbourne, VIC</addr-line>, <country>Australia</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Surgery, Central Clinical School, Monash University</institution>, <addr-line>Melbourne, VIC</addr-line>, <country>Australia</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Joseph V Martin, Rutgers University Camden, United States</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Jincao Yao, University of Chinese Academy of Sciences, China</p>
<p>Huang Bin, Zhejiang Hospital, China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Karishma Jassal, <email xlink:href="mailto:Karishma.Jassal@monash.edu">Karishma.Jassal@monash.edu</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>05</day>
<month>05</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>16</volume>
<elocation-id>1506729</elocation-id>
<history>
<date date-type="received">
<day>06</day>
<month>10</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>09</day>
<month>04</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Jassal, Edwards, Koohestani, Brown, Serpell and Lee</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Jassal, Edwards, Koohestani, Brown, Serpell and Lee</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>In recent years, artificial intelligence (AI) tools have become widely studied for thyroid ultrasonography (USG) classification. The real-world applicability of these developed tools as pre-operative diagnostic aids is limited due to model overfitting, clinician trust, and a lack of gold standard surgical histology as ground truth class label. The ongoing dilemma within clinical thyroidology is surgical decision making for indeterminate thyroid nodules (ITN). Genomic sequencing classifiers (GSC) have been utilised for this purpose; however, costs and availability preclude universal adoption creating an inequity gap. We conducted this review to analyse the current evidence of AI in ITN diagnosis without the use of GSC.</p>
</sec>
<sec>
<title>Methods</title>
<p>English language articles evaluating the diagnostic accuracy of AI for ITNs were identified. A systematic search of PubMed, Google Scholar, and Scopus from inception to 18 February 2025 was performed using comprehensive search strategies incorporating MeSH headings and keywords relating to AI, indeterminate thyroid nodules, and pre-operative diagnosis. This systematic review and meta-analysis was conducted in accordance with methods recommended by the Cochrane Collaboration (PROSPERO ID CRD42023438011).</p>
</sec>
<sec>
<title>Results</title>
<p>The search strategy yielded 134 records after the removal of duplicates. A total of 20 models were presented in the seven studies included, five of which were radiological driven, one utilised natural language processing, and one focused on cytology. The pooled meta-analysis incorporated 16 area under the curve (AUC) results derived from 15 models across three studies yielding a combined estimate of 0.82 (95% CI: 0.81&#x2013;0.84) indicating moderate-to-good classification performance across machine learning (ML) and deep learning (DL) architectures. However, substantial heterogeneity was observed, particularly among DL models (I&#xb2; = 99.7%, pooled AUC = 0.85, 95% CI: 0.85&#x2013;0.86). Minimal heterogeneity was observed among ML models (I&#xb2; = 0.7%), with a pooled AUC of 0.75 (95% CI: 0.70&#x2013;0.81). Meta-regression analysis performed suggests potential publication bias or systematic differences in model architectures, dataset composition, and validation methodologies.</p>
</sec>
<sec>
<title>Conclusion</title>
<p>This review demonstrated the burgeoning potential of AI to be of clinical value in surgical decision making for ITNs; however, study-developed models were unsuitable for clinical implementation based on performance alone at their current states or lacked robust independent external validation. There is substantial capacity for further development in this field.</p>
</sec>
<sec>
<title>Systematic Review Registration</title>
<p>
<uri xlink:href="https://www.crd.york.ac.uk/PROSPERO/">https://www.crd.york.ac.uk/PROSPERO/</uri>, identifier CRD42023438011.</p>
</sec>
</abstract>
<kwd-group>
<kwd>artificial intelligence</kwd>
<kwd>thyroid cancer</kwd>
<kwd>thyroid nodule - diagnosis</kwd>
<kwd>meta - analysis</kwd>
<kwd>machine learning</kwd>
</kwd-group>
<counts>
<fig-count count="3"/>
<table-count count="2"/>
<equation-count count="0"/>
<ref-count count="54"/>
<page-count count="12"/>
<word-count count="5548"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Thyroid Endocrinology</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>The prevalence of incidentally detected thyroid nodules in adults is estimated to be between 30% and 70%, the majority of which are inconsequential, and only approximately 5% are ultimately proven to be malignant (<xref ref-type="bibr" rid="B1">1</xref>&#x2013;<xref ref-type="bibr" rid="B4">4</xref>). Evaluation of nodules conventionally begins with ultrasonography (USG) where standardised acquisition of radiological features in accordance to one of several Thyroid Image Reporting and Data Systems (TIRADS) leads to further diagnostic steps (<xref ref-type="bibr" rid="B5">5</xref>&#x2013;<xref ref-type="bibr" rid="B7">7</xref>). Fine-needle aspiration cytology (FNAC) subsequently facilitates the categorisation of thyroid nodules as malignant, benign, or indeterminate according to the six-tiered Bethesda classification (<xref ref-type="bibr" rid="B8">8</xref>). Whilst studies have shown that 95% of samples are adequate for interpretation, 20%&#x2013;25% of aspirates are reported as indeterminate (Bethesda categories III&#x2013;V), with substantial variability in the probability of malignancy within this category (<xref ref-type="bibr" rid="B9">9</xref>&#x2013;<xref ref-type="bibr" rid="B11">11</xref>).</p>
<p>Standard strategies for clarifying the diagnosis are either diagnostic thyroid lobectomy or repeating FNAC typically for Bethesda III lesions at 3 months from the initial procedure to allow for the resolution of inflammatory changes, which is a safe procedure and a practical approach (<xref ref-type="bibr" rid="B8">8</xref>, <xref ref-type="bibr" rid="B12">12</xref>). Clinical and sonographic considerations are recommended when electing for repeat sampling and, in the majority of cases, do not lead to diagnostic resolution potentially risking delaying treatment of malignancy (<xref ref-type="bibr" rid="B8">8</xref>, <xref ref-type="bibr" rid="B12">12</xref>&#x2013;<xref ref-type="bibr" rid="B14">14</xref>). The important caveat in real clinical practice is that the patient still needs to be informed of the highest implied malignancy risk of any FNAC sample, which can lead to confusion and anxiety. Diagnostic lobectomy requires multiple considered steps to preserve parathyroid and recurrent laryngeal nerve function, in addition to the risks of haematoma, infection, and post-operative hypothyroidism (<xref ref-type="bibr" rid="B15">15</xref>&#x2013;<xref ref-type="bibr" rid="B18">18</xref>). Patients with malignancy may subsequently require a second-stage operation for completion of surgical treatment, which can be more technically challenging due to post-operative tissue changes.</p>
<p>More recently, genomic sequencing classifiers (GSC) have been utilised to interrogate indeterminate cytology thyroid nodules (ITNs). GSC displays high specificity and allows avoiding diagnostic surgery in up to 61% of patients on the basis of a benign test (<xref ref-type="bibr" rid="B19">19</xref>&#x2013;<xref ref-type="bibr" rid="B22">22</xref>). This enables a more accurate pre-operative assessment of ITNs. However, the tests are costly, requires additional samples taken, and are not available in many countries. These barriers preclude the universal adoption of GSC, and as such, hemithyroidectomy remains a key diagnostic tool.</p>
<p>Developments in computational technology have led to the development of artificial intelligence (AI) tools beyond GSC that may be useful in thyroid nodule diagnostics. AI tools in thyroid nodule diagnosis are mostly reported using a single diagnostic modality, such as ultrasonographic or cytological characteristics (<xref ref-type="bibr" rid="B23">23</xref>&#x2013;<xref ref-type="bibr" rid="B27">27</xref>). These single-entity tools tend to have functionality within a particular branch of medicine, but the question remains if they are applicable within surgical decision making where the process is multifaceted.</p>
<p>We therefore sought to conduct a systematic review and meta-analysis to appraise the available evidence related to the pre-operative diagnostic accuracy of AI tools for indeterminate cytology thyroid nodules, excluding GSC.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<p>This systematic review and meta-analysis was conducted in accordance with methods recommended by the Cochrane Collaboration and registered with the International Prospective Register of Systematic Reviews (PROSPERO), reference no. CRD42023438011 (<xref ref-type="bibr" rid="B28">28</xref>). Reporting follows the standards of the Preferred Reporting Items for Systematic Reviews and Meta-analysis Statement (PRISMA) (<xref ref-type="bibr" rid="B29">29</xref>, <xref ref-type="bibr" rid="B30">30</xref>).</p>
<sec id="s2_1">
<label>2.1</label>
<title>Search strategy</title>
<p>English language articles evaluating the diagnostic accuracy of AI for ITNs were identified. A systematic search of PubMed, Google Scholar, and Scopus from inception to 18 February 2025 was performed using comprehensive search strategies incorporating MeSH headings and keywords relating to AI, indeterminate thyroid nodules, and diagnosis [Boolean string; <italic>preop* AND (diagno* OR evaluat*) AND (&#x201c;artificial intelligence&#x201d; OR &#x201c;machine learning&#x201d;) AND &#x201c;indeterminate thyroid nodules&#x201d;&#x2014;molecular</italic>]. An additional search was conducted specifically to target cytology-based studies [Boolean string; <italic>(&#x201c;thyroid nodule/pathology&#x201d; OR &#x201c;biopsy, fine-needle/methods&#x201d;) AND (&#x201c;artificial intelligence&#x201d; OR &#x201c;machine learning&#x201d;)].</italic> Screening on the title was performed until saturation, which was reached at 50 studies. The papers in the reference lists of included articles and relevant reviews were reviewed to identify additional eligible publications. The inclusion criteria for this review were developed in accordance with the following PICO framework: Can pre-operative patients with ITNs (P) be evaluated using AI models to predict malignancy (I) in terms of diagnostic accuracy measures (O), compared to standard reference diagnoses, such as final histopathology or other established diagnostic methods (C), excluding studies involving GSC? Both randomised and non-randomised studies were included. Qualitative studies, abstracts, reviews, editorials, and case studies were excluded.</p>
<p>As Bethesda III&#x2013;V nodules are usually managed similarly surgically, the search targeted articles, which included adult patients with ITNs (Bethesda categories III&#x2013;V on FNAC) who underwent surgery. &#x201c;Artificial intelligence&#x201d; was defined as a machine learning (ML) or deep learning (DL) tool that identifies patterns resulting in a prediction. Application of any type of AI models, including classifiers, neural networks, or natural language processing (NLP), was accepted (<xref ref-type="bibr" rid="B31">31</xref>). Both model development and validation studies were included. Only studies that provided a clear distinction between benign and malignant prediction outcomes were considered. For studies that reported results based on histological subtypes or other stratifications, only outcomes relevant to benign and malignant classification were extracted for statistical analysis. Where a study included patients with all Bethesda categories, only outcomes relating to those with indeterminate cytology were considered. The primary outcome measure was model performance, including diagnostic accuracy, area under the curve (AUC), sensitivity, specificity, positive predictive value (PPV), and negative predictive value (NPV).</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Data abstraction</title>
<p>Titles and abstracts were independently and manually screened by two reviewers (KJ and ME) using explicit pre-determined criteria. Inconsistencies were resolved through consultation with a third reviewer (JL). Data were extracted from each eligible study by one reviewer (KJ) using a standardised electronic form.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Risk of bias assessment</title>
<p>The Prediction model Risk Of Bias Assessment Tool (PROBAST), used to evaluate the risk of bias (ROB) and applicability of diagnostic and prognostic prediction model studies, was used to assess the included studies (<xref ref-type="bibr" rid="B32">32</xref>, <xref ref-type="bibr" rid="B33">33</xref>). ROB and concerns regarding applicability were evaluated with respect to the randomisation process, appropriateness of inclusion/exclusion criteria of participants, assessment of predictors of models created, completeness of outcome data, and model analysis. Overall, ROB was judged as low if all domains assessed returned a low-risk result.</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Data synthesis and analysis</title>
<p>Narrative synthesis was used to summarise the main outcomes of interest. Meta-analysis was performed where three or more models assessing a specific outcome measure with an estimate of precision were included. With these criteria, meta-analysis of the area under the curve (AUC) was possible. Statistical analysis was performed using the metan estimation package from Stata/IC for Windows, version 14.2. Given the variability in study designs, random effect models were applied. A value of p &lt; 0.05 was considered statistically significant. Heterogeneity was assessed using Cochran&#x2019;s Q test (Chi-square test) and quantified using I&#xb2;. Meta-regression was conducted using weighted least squares regression, with standard error (SE) of AUC as the predictor and inverse variance (1/SE&#xb2;) as weights. Publication bias was evaluated using Egger&#x2019;s test. A funnel plot was generated using the metafunnel estimation package.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Study selection</title>
<p>The search strategy yielded 134 records after removal of duplicates. Fourteen papers were identified for full text assessment with seven studies meeting the criteria for inclusion in the systematic review (PRISMA flowchart of study selection shown in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>). A summary of results from the included studies and models is given in <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>PRISMA flowchart of identification of studies.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fendo-16-1506729-g001.tif"/>
</fig>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Summary of study characteristics and results.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Author</th>
<th valign="middle" align="center">Subgroup</th>
<th valign="middle" align="center">Bethesda category</th>
<th valign="middle" align="center">Model</th>
<th valign="middle" align="center">Evaluation</th>
<th valign="middle" align="center">Dataset</th>
<th valign="middle" align="center">N</th>
<th valign="middle" align="center">Accuracy</th>
<th valign="middle" align="center">Sensitivity</th>
<th valign="middle" align="center">Specificity</th>
<th valign="middle" align="center">PPV</th>
<th valign="middle" align="center">NPV</th>
<th valign="middle" align="center">AUC 95% CI</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">Gild</td>
<td valign="middle" align="center">ML</td>
<td valign="middle" align="center">III</td>
<td valign="middle" align="center">RF</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">SI&#x2014;Internal</td>
<td valign="middle" align="center">88</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="bottom" align="center">0.750 (0.620&#x2013;0.840)</td>
</tr>
<tr>
<td valign="middle" align="center">Gild</td>
<td valign="middle" align="center">DL</td>
<td valign="middle" align="center">III</td>
<td valign="middle" align="center">ResNet-50</td>
<td valign="middle" align="center">10-fold cross validation</td>
<td valign="middle" align="center">SI&#x2014;Internal</td>
<td valign="middle" align="center">88</td>
<td valign="middle" align="center">74.0</td>
<td valign="middle" align="center">82.0</td>
<td valign="middle" align="center">59.0</td>
<td valign="middle" align="center">56.0</td>
<td valign="middle" align="center">84.0</td>
<td valign="bottom" align="center">0.740 (0.590&#x2013;0.830)</td>
</tr>
<tr>
<td valign="middle" align="center">Gild</td>
<td valign="middle" align="center">DL</td>
<td valign="middle" align="center">III</td>
<td valign="middle" align="center">ThyNet</td>
<td valign="middle" align="center">Direct classification</td>
<td valign="middle" align="center">SI&#x2014;External</td>
<td valign="middle" align="center">88</td>
<td valign="middle" align="center">64.0</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="middle" align="center">Swan</td>
<td valign="middle" align="center">DL</td>
<td valign="middle" align="center">III, IV, V</td>
<td valign="middle" align="center">AIBx - ResNet, ResNext, DenseNet ensemble</td>
<td valign="middle" align="center">Direct classification</td>
<td valign="middle" align="center">SI&#x2014;External</td>
<td valign="middle" align="center">155</td>
<td valign="middle" align="center">53.0</td>
<td valign="middle" align="center">96.3</td>
<td valign="middle" align="center">50.0</td>
<td valign="middle" align="center">27.9</td>
<td valign="middle" align="center">81.5</td>
<td valign="middle" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="middle" align="center">Keutgen</td>
<td valign="middle" align="center">ML</td>
<td valign="middle" align="center">III, IV, V</td>
<td valign="middle" align="center">BANN</td>
<td valign="middle" align="center">5-fold cross validation</td>
<td valign="middle" align="center">SI&#x2014;Internal</td>
<td valign="middle" align="center">19</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="bottom" align="center">0.880 (0.700&#x2013;1.060)</td>
</tr>
<tr>
<td valign="middle" align="center">Keutgen</td>
<td valign="middle" align="center">ML</td>
<td valign="middle" align="center">III, IV, V</td>
<td valign="middle" align="center">BANN</td>
<td valign="middle" align="center">Direct classification</td>
<td valign="middle" align="center">SI&#x2014;External</td>
<td valign="middle" align="center">20</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="bottom" align="center">0.680 (0.460&#x2013;0.900)</td>
</tr>
<tr>
<td valign="middle" align="center">Luong</td>
<td valign="middle" align="center">ML</td>
<td valign="middle" align="center">III, IV, V</td>
<td valign="middle" align="center">RF</td>
<td valign="middle" align="center">10-fold cross validation</td>
<td valign="middle" align="center">MC&#x2014;Internal</td>
<td valign="middle" align="center">355</td>
<td valign="middle" align="center">79.1</td>
<td valign="middle" align="center">75.5</td>
<td valign="middle" align="center">82.4</td>
<td valign="middle" align="center">80.3</td>
<td valign="middle" align="center">79.0</td>
<td valign="bottom" align="center">0.859 (0.700&#x2013;0.970)</td>
</tr>
<tr>
<td valign="middle" align="center">Luong</td>
<td valign="middle" align="center">ML</td>
<td valign="middle" align="center">III, IV, V</td>
<td valign="middle" align="center">KNN</td>
<td valign="middle" align="center">10-fold cross validation</td>
<td valign="middle" align="center">MC&#x2014;Internal</td>
<td valign="middle" align="center">355</td>
<td valign="middle" align="center">64.4</td>
<td valign="middle" align="center">52.7</td>
<td valign="middle" align="center">75.3</td>
<td valign="middle" align="center">66.9</td>
<td valign="middle" align="center">63.5</td>
<td valign="bottom" align="center">0.664 (0.460&#x2013;0.800)</td>
</tr>
<tr>
<td valign="middle" align="center">Luong</td>
<td valign="middle" align="center">ML</td>
<td valign="middle" align="center">III, IV, V</td>
<td valign="middle" align="center">Ridge</td>
<td valign="middle" align="center">10-fold cross validation</td>
<td valign="middle" align="center">MC&#x2014;Internal</td>
<td valign="middle" align="center">355</td>
<td valign="middle" align="center">65.7</td>
<td valign="middle" align="center">58.6</td>
<td valign="middle" align="center">72.3</td>
<td valign="middle" align="center">66.6</td>
<td valign="middle" align="center">58.5</td>
<td valign="bottom" align="center">0.694 (0.480&#x2013;0.850)</td>
</tr>
<tr>
<td valign="middle" align="center">Luong</td>
<td valign="middle" align="center">ML</td>
<td valign="middle" align="center">III, IV, V</td>
<td valign="middle" align="center">GNB</td>
<td valign="middle" align="center">10-fold cross validation</td>
<td valign="middle" align="center">MC&#x2014;Internal</td>
<td valign="middle" align="center">355</td>
<td valign="middle" align="center">61.4</td>
<td valign="middle" align="center">30.9</td>
<td valign="middle" align="center">89.8</td>
<td valign="middle" align="center">74.2</td>
<td valign="middle" align="center">58.5</td>
<td valign="bottom" align="center">0.694 (0.520&#x2013;0.870)</td>
</tr>
<tr>
<td valign="middle" align="center">Luong</td>
<td valign="middle" align="center">ML</td>
<td valign="middle" align="center">III, IV, V</td>
<td valign="middle" align="center">SVM</td>
<td valign="middle" align="center">10-fold cross validation</td>
<td valign="middle" align="center">MC&#x2014;Internal</td>
<td valign="middle" align="center">355</td>
<td valign="middle" align="center">63.1</td>
<td valign="middle" align="center">60.8</td>
<td valign="middle" align="center">65.3</td>
<td valign="middle" align="center">62.2</td>
<td valign="middle" align="center">64.5</td>
<td valign="bottom" align="center">0.683 (0.490&#x2013;0.840)</td>
</tr>
<tr>
<td valign="middle" align="center">Luong</td>
<td valign="middle" align="center">ML</td>
<td valign="middle" align="center">III, IV, V</td>
<td valign="middle" align="center">ET</td>
<td valign="middle" align="center">10-fold cross validation</td>
<td valign="middle" align="center">MC&#x2014;Internal</td>
<td valign="middle" align="center">355</td>
<td valign="middle" align="center">74.8</td>
<td valign="middle" align="center">70.5</td>
<td valign="middle" align="center">78.8</td>
<td valign="middle" align="center">76.0</td>
<td valign="middle" align="center">74.8</td>
<td valign="bottom" align="center">0.832 (0.660&#x2013;0.940)</td>
</tr>
<tr>
<td valign="middle" align="center">Luong</td>
<td valign="middle" align="center">ML</td>
<td valign="middle" align="center">III, IV, V</td>
<td valign="middle" align="center">AB</td>
<td valign="middle" align="center">10-fold cross validation</td>
<td valign="middle" align="center">MC&#x2014;Internal</td>
<td valign="middle" align="center">355</td>
<td valign="middle" align="center">72.1</td>
<td valign="middle" align="center">65.1</td>
<td valign="middle" align="center">78.7</td>
<td valign="middle" align="center">74.5</td>
<td valign="middle" align="center">71.2</td>
<td valign="bottom" align="center">0.778 (0.620&#x2013;0.910)</td>
</tr>
<tr>
<td valign="middle" align="center">Luong</td>
<td valign="middle" align="center">ML</td>
<td valign="middle" align="center">III, IV, V</td>
<td valign="middle" align="center">GB</td>
<td valign="middle" align="center">10-fold cross validation</td>
<td valign="middle" align="center">MC&#x2014;Internal</td>
<td valign="middle" align="center">355</td>
<td valign="middle" align="center">77.7</td>
<td valign="middle" align="center">74.5</td>
<td valign="middle" align="center">80.7</td>
<td valign="middle" align="center">78.7</td>
<td valign="middle" align="center">77.8</td>
<td valign="bottom" align="center">0.830 (0.680&#x2013;0.950)</td>
</tr>
<tr>
<td valign="middle" align="center">Saini</td>
<td valign="middle" align="center">ML</td>
<td valign="middle" align="center">III</td>
<td valign="middle" align="center">ANN</td>
<td valign="middle" align="center">Direct classification</td>
<td valign="middle" align="center">MC&#x2014;Internal</td>
<td valign="middle" align="center">11</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">100</td>
<td valign="middle" align="center">100</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="bottom" align="center">1.000 (0.540&#x2013;1.000)</td>
</tr>
<tr>
<td valign="middle" align="center">Chen</td>
<td valign="middle" align="center">ML</td>
<td valign="middle" align="center">III, IV, V</td>
<td valign="middle" align="center">SVM</td>
<td valign="middle" align="center">5-fold cross validation</td>
<td valign="middle" align="center">SI&#x2014;Internal</td>
<td valign="middle" align="center">194</td>
<td valign="middle" align="center">71.8</td>
<td valign="middle" align="center">93.8</td>
<td valign="middle" align="center">56.5</td>
<td valign="middle" align="center">60</td>
<td valign="middle" align="center">92.9</td>
<td valign="bottom" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="middle" align="center">Yao</td>
<td valign="middle" align="center">DL</td>
<td valign="middle" align="center">IV</td>
<td valign="middle" align="center">ResNet50</td>
<td valign="middle" align="center">10-fold cross validation</td>
<td valign="middle" align="center">MC&#x2014;Internal</td>
<td valign="middle" align="center">1670</td>
<td valign="middle" align="center">79.1</td>
<td valign="middle" align="center">86.5</td>
<td valign="middle" align="center">65.8</td>
<td valign="middle" align="center">81.5</td>
<td valign="middle" align="center">74.5</td>
<td valign="bottom" align="center">0.803 (0.794&#x2013;0.812)</td>
</tr>
<tr>
<td valign="middle" align="center">Yao</td>
<td valign="middle" align="center">DL</td>
<td valign="middle" align="center">IV</td>
<td valign="middle" align="center">RadImageNet</td>
<td valign="middle" align="center">10-fold cross validation</td>
<td valign="middle" align="center">MC&#x2014;Internal</td>
<td valign="middle" align="center">1670</td>
<td valign="middle" align="center">81.6</td>
<td valign="middle" align="center">85.4</td>
<td valign="middle" align="center">69.3</td>
<td valign="middle" align="center">84.0</td>
<td valign="middle" align="center">77.2</td>
<td valign="bottom" align="center">0.836 (0.830&#x2013;0.842)</td>
</tr>
<tr>
<td valign="middle" align="center">Yao</td>
<td valign="middle" align="center">DL</td>
<td valign="middle" align="center">IV</td>
<td valign="middle" align="center">ThyNet</td>
<td valign="middle" align="center">10-fold cross validation</td>
<td valign="middle" align="center">MC&#x2014;Internal</td>
<td valign="middle" align="center">1670</td>
<td valign="middle" align="center">80.4</td>
<td valign="middle" align="center">88.7</td>
<td valign="middle" align="center">69.9</td>
<td valign="middle" align="center">83.4</td>
<td valign="middle" align="center">73.8</td>
<td valign="bottom" align="center">0.840 (0.834&#x2013;0.846)</td>
</tr>
<tr>
<td valign="middle" align="center">Yao</td>
<td valign="middle" align="center">DL</td>
<td valign="middle" align="center">IV</td>
<td valign="middle" align="center">Swin Transformer</td>
<td valign="middle" align="center">10-fold cross validation</td>
<td valign="middle" align="center">MC&#x2014;Internal</td>
<td valign="middle" align="center">1670</td>
<td valign="middle" align="center">90.8</td>
<td valign="middle" align="center">92.7</td>
<td valign="middle" align="center">89.6</td>
<td valign="middle" align="center">93.9</td>
<td valign="middle" align="center">85.9</td>
<td valign="bottom" align="center">0.935 (0.929&#x2013;0.941)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>ML, machine learning; DL, deep learning; RF, random forest; BANN, Bayesian artificial neural network; KNN, K-Nearest Neighbour; GNB, Gaussian Na&#xef;ve Bayes; SVM, support vector machine; ET, Extra Trees; AB, AdaBoost; GB, gradient boosting; SI, single institution; MC, multicentre.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Study characteristics</title>
<p>A total of 20 models were presented in the seven studies included. Of these, 17 models from six studies were independently developed by the corresponding research groups (<xref ref-type="bibr" rid="B34">34</xref>&#x2013;<xref ref-type="bibr" rid="B38">38</xref>), and two studies (<xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B39">39</xref>) presented external evaluations of previously constructed models without additional pretraining (<xref ref-type="bibr" rid="B40">40</xref>, <xref ref-type="bibr" rid="B41">41</xref>). Five studies (<xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B36">36</xref>, <xref ref-type="bibr" rid="B37">37</xref>, <xref ref-type="bibr" rid="B39">39</xref>, <xref ref-type="bibr" rid="B42">42</xref>) in this review utilised USG images or characteristics, one study (<xref ref-type="bibr" rid="B35">35</xref>) employed an NLP approach, and one study (<xref ref-type="bibr" rid="B38">38</xref>) focused on cytological analysis.</p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Model outcome measures</title>
<p>Five studies based their outcome measures on surgical histopathology sourced from previously established databases (<xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B36">36</xref>, <xref ref-type="bibr" rid="B37">37</xref>, <xref ref-type="bibr" rid="B39">39</xref>, <xref ref-type="bibr" rid="B42">42</xref>). One study utilised histopathology to determine malignant outcomes, while a combination of histopathology and close follow-up was used for benign diagnoses (<xref ref-type="bibr" rid="B38">38</xref>). In the remaining study, a previously validated clinical NLP software (Apache cTAKES) extracted data from electronic medical record pathology reports to determine outcomes (<xref ref-type="bibr" rid="B35">35</xref>, <xref ref-type="bibr" rid="B43">43</xref>). Performance metrics of most models were reported using standardised classification metrics, namely, AUC, accuracy, sensitivity, specificity, PPV, and NPV.</p>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Imaging-based models</title>
<p>Two previously developed USG recognition models were externally evaluated without institution-specific fine-tuning, retraining, or adaptation in separate studies. Gild et&#xa0;al. (<xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B36">36</xref>) tested ThyNet&#x2019;s performance on their patient dataset. ThyNet is a DL network with a reported accuracy of 89.1% in its original study (<xref ref-type="bibr" rid="B41">41</xref>). ThyNet achieved an overall accuracy of 64% in this external evaluation (<xref ref-type="bibr" rid="B34">34</xref>). Swan et&#xa0;al. (<xref ref-type="bibr" rid="B39">39</xref>) retrospectively analysed the performance of AIBx (<xref ref-type="bibr" rid="B40">40</xref>) on Bethesda III&#x2013;V nodules. AIBx is a USG image similarity AI model for the risk stratification of thyroid nodules. The external evaluation of AIBx vs. European Thyroid Association TIRADS for ITNs reports an accuracy of 53.0% vs. 32.2%, PPV of 27.9% vs. 25.2%, NPV of 81.5% vs. 91.7%, sensitivity of 96.3% vs. 63.0%, and specificity of 50.0% vs. 12.5% (<xref ref-type="bibr" rid="B39">39</xref>).</p>
<p>One study (<xref ref-type="bibr" rid="B34">34</xref>) tested the performance of their two trained models: an image classification convolutional neural network (CNN) utilising the ResNet-50 (<xref ref-type="bibr" rid="B44">44</xref>) architecture and a random forest (RF) classifier for first-order statistics of extracted radiomic features. Only Bethesda III nodules were included. The reported AUC for internal validation of the CNN model was 0.74 and 0.75 for the RF radiomics model.</p>
<p>Similarly, Keutgen et&#xa0;al. (<xref ref-type="bibr" rid="B42">42</xref>) extracted radiomics features from thyroid nodule USG images obtained from two institutions and utilised a two-class Bayesian artificial neural network classifier to predict the final surgical histopathology of indeterminate cytology nodules. Internal validation results demonstrated an AUC of 0.88 for malignant vs. benign classification and 0.68 on external validation.</p>
<p>A study by Yao et&#xa0;al. (<xref ref-type="bibr" rid="B36">36</xref>) evaluated multiple AI models for diagnosing Bethesda IV nodules using USG imaging data collected from five hospitals. Four AI models were trained using a transfer-learning approach, including Swin Transformer, ThyNet, RadImageNet, and ResNet-50 to predict histological outcomes of follicular thyroid cancer (FTC) vs. follicular variant papillary thyroid cancer (FVPTC) vs. benign nodules (<xref ref-type="bibr" rid="B41">41</xref>, <xref ref-type="bibr" rid="B45">45</xref>). Model performance was consistent across test sets and 10-fold cross validation, with Swin Transformer achieving the highest AUC (0.917&#x2013;0.945). PPV and NPV were 93.9% and 85.9%, respectively.</p>
<p>Chen et&#xa0;al. (<xref ref-type="bibr" rid="B37">37</xref>) trained a support vector machine (SVM) classifier to distinguish benign nodules from malignant ones utilising five ultrasound input parameters along with nodule size, patient age, and sex. Two radiologists, blinded to clinical and histopathological outcomes, independently reviewed and scored the ultrasound features according to the American College of Radiology TIRADS (ACR TIRADS) criteria&#x2014;composition, echogenicity, shape, margin, and echogenic foci. A third senior radiologist resolved any disagreements. The model achieved a sensitivity of 93.8%, with a specificity of 56.5%. The NPV for Bethesda III and IV nodules was 93.9% and 93.8%, respectively. Compared to the 2017 ACR TIRADS, the SVM model demonstrated superior performance in distinguishing benign ITNs.</p>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Natural language-processing models</title>
<p>A study by Luong et&#xa0;al. (<xref ref-type="bibr" rid="B35">35</xref>) utilised a previously validated NLP model, the Mayo clinical text analysis and knowledge extraction system (cTAKES) (<xref ref-type="bibr" rid="B43">43</xref>), to construct several classifier models. This retrospective study included 355 Bethesda III&#x2013;V nodules from adult patients investigating the utility of cTAKES NLP analysis of readily available electronic medical records (EMR) in predicting malignancy for ITNs. Features extracted from the EMR were age of first FNAC, nodule diameter, height, width, echogenicity, presence of calcification on USG, FNAC results, &#x201c;largest dimension on cytology,&#x201d; race, and sex.</p>
<p>The performance of the following eight classifiers were evaluated: Gradient Boosting, SVM, Ridge, Gaussian Na&#xef;ve Bayes, K-Nearest Neighbour, RF, Extra Trees, and AdaBoost. On average, the accuracy of the classifiers tested was 70.0%, sensitivity 61.1%, specificity 77.9%, PPV 72.4%, NPV 69.4%, and AUC 0.754. The RF classifier performed the best overall, with an accuracy of 79.1%, sensitivity of 75.5%, specificity of 82.4%, PPV of 80.3%, NPV of 79.0%, and AUC of 0.859. The K-Nearest Neighbour classifier produced the least successful results with 64.4% accuracy, 52.7% sensitivity, 75.3% specificity, 66.9% PPV, 63.5% NPV, and 0.664 AUC.</p>
</sec>
<sec id="s3_6">
<label>3.6</label>
<title>Cytology-based models</title>
<p>Saini et&#xa0;al. developed an artificial neural network (ANN) model to predict the risk of malignancy in Bethesda category III nodules based on FNAC features. Cytological features were subjectively graded by two independent observers and used as input parameters within the ANN that was constructed for binary classification. The features assessed included nuclear pleomorphism, microfollicle formation, nuclear grooving, intranuclear inclusions, nucleoli prominence, Hurthle cell changes, colloid presence, cellularity, and nuclear chromatin characteristics. Each parameter was graded on a semi-quantitative scale from zero to three based on its prevalence in the smear. The model successfully classified all benign and malignant cases within the study&#x2019;s test set, with an AUC of 1, indicating perfect discrimination (<xref ref-type="bibr" rid="B38">38</xref>).</p>
</sec>
<sec id="s3_7">
<label>3.7</label>
<title>Meta-analysis</title>
<sec id="s3_7_1">
<label>3.7.1</label>
<title>Model variability and generalisation challenges</title>
<p>Dissimilarities between studies were acknowledged and accepted to reach a unified conclusion. A fundamental challenge in AI applications is the lack of generalisability, as many models demonstrate high accuracy in controlled environments but underperform when applied to diverse clinical settings. Given that AI in medical diagnostics is still in a relatively early phase of development, there is considerable experimentation with a wide range of models and methodologies. Consequently, the studies included in this review employed a variety of AI algorithms, with notable differences in their training and validation processes. Although this heterogeneity may initially appear to be a limitation, it is reflective of the ongoing iterative process of AI development. Moreover, this diversity strengthens our analysis by providing a more comprehensive evaluation of AI model performance across different contexts. By integrating these disparate results, we gain a broader understanding of the current capabilities and limitations of AI in the pre-operative diagnosis of indeterminate thyroid nodules, which is essential for guiding future research and development.</p>
</sec>
<sec id="s3_7_2">
<label>3.7.2</label>
<title>Pooled analysis</title>
<p>Our pooled meta-analysis incorporated 16 AUC results derived from 15 distinct models across three studies. The AUC values from the two models presented by Gild et&#xa0;al. (<xref ref-type="bibr" rid="B34">34</xref>), the four models from Yao et&#xa0;al. (<xref ref-type="bibr" rid="B36">36</xref>), along with the eight models developed by Luong et&#xa0;al. (<xref ref-type="bibr" rid="B35">35</xref>), as well as the results of the model by Keutgen et&#xa0;al. (<xref ref-type="bibr" rid="B42">42</xref>), tested on two distinct cohorts, were combined for analysis (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>). The model by Saini et&#xa0;al. (<xref ref-type="bibr" rid="B38">38</xref>) was excluded due to an AUC of 1.0 indicating perfect separation in a limited cohort (N = 11), which raises concerns regarding a meaningful estimate of real-world model discrimination. The pooled analysis of AUC across studies yielded a combined estimate of 0.82 (95% CI: 0.81&#x2013;0.84) indicating moderate to good classification performance across ML and DL models (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>). However, there is considerable heterogeneity among the studies, as indicated by a calculated I&#xb2; value of 99.3%. The funnel plot was asymmetrical (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>).</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Forest plot of a random effects meta-analysis of area under the curve (AUC) for the observed AI models predicting malignancy in indeterminate cytology thyroid nodules. BANN, Bayesian artificial neural network.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fendo-16-1506729-g002.tif"/>
</fig>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Funnel plot with pseudo 95% confidence limits of the area under the curve (AUC) versus the standard error (SE) for the each of the included studies (<italic>N</italic> = 16). RF, random forest; BANN, Bayesian artificial neural network; KNN, K-Nearest Neighbour; GNB, Gaussian Na&#xef;ve Bayes; SVM, support vector machine; ET, Extra Trees; AB, AdaBoost; GB, gradient boosting.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fendo-16-1506729-g003.tif"/>
</fig>
</sec>
<sec id="s3_7_3">
<label>3.7.3</label>
<title>Heterogeneity analysis</title>
<p>To further investigate the sources of heterogeneity, a subgroup analysis was conducted to compare the performance of ML and DL models. This revealed minimal heterogeneity in ML models (I&#xb2; = 0.7%, pooled AUC = 0.75, 95% CI: 0.70&#x2013;0.81), whereas DL models exhibited substantial heterogeneity (I&#xb2; = 99.7%, pooled AUC = 0.85, 95% CI: 0.85&#x2013;0.86) suggesting inconsistent performance. (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>) Egger&#x2019;s test was significant (p = 0.0006) indicating potential publication bias or systematic differences in study characteristics amongst DL models. Meta-regression found no significant association between SE and AUC (p = 0.568), suggesting that heterogeneity is not explained by study precision alone and may instead be influenced by differences in DL model architectures, dataset composition, or validation methodologies (<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B35">35</xref>).</p>
</sec>
</sec>
<sec id="s3_8">
<label>3.8</label>
<title>Risk of bias assessment with the PROBAST</title>
<p>Overall, the PROBAST assessment revealed a &#x201c;low risk of bias&#x201d; for the studies by Keutgen et&#xa0;al. (<xref ref-type="bibr" rid="B42">42</xref>) and Yao et&#xa0;al. (<xref ref-type="bibr" rid="B36">36</xref>), while the other studies were judged to have an overall &#x201c;high risk of bias&#x201d; in one or more domains (<xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>). All studies exhibited an overall &#x201c;low concern&#x201d; for applicability to the review question. <xref ref-type="supplementary-material" rid="SM3">
<bold>Supplementary Material 3</bold>
</xref> provides a narrative risk of bias analysis for each study.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Tabular presentation of PROBAST results.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Study</th>
<th valign="bottom" colspan="4" align="center">Risk of bias</th>
<th valign="bottom" colspan="3" align="center">Applicability</th>
<th valign="bottom" colspan="2" align="center">Overall</th>
</tr>
<tr>
<th valign="middle" align="center">Participants</th>
<th valign="middle" align="center">Predictors</th>
<th valign="middle" align="center">Outcome</th>
<th valign="middle" align="center">Analysis</th>
<th valign="middle" align="center">Participants</th>
<th valign="middle" align="center">Predictors</th>
<th valign="middle" align="center">Outcome</th>
<th valign="middle" align="center">Risk of bias</th>
<th valign="middle" align="center">Applicability</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">Gild 2021(<xref ref-type="bibr" rid="B34">34</xref>)</td>
<td valign="middle" align="center">&#x2212;</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">&#x2212;</td>
<td valign="middle" align="center">&#x2212;</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">&#x2212;</td>
<td valign="middle" align="center">+</td>
</tr>
<tr>
<td valign="middle" align="center">Swan 2022(<xref ref-type="bibr" rid="B39">39</xref>)</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">&#x2212;</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">&#x2212;</td>
<td valign="middle" align="center">+</td>
</tr>
<tr>
<td valign="middle" align="center">Keutgen 2022(<xref ref-type="bibr" rid="B42">42</xref>)</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
</tr>
<tr>
<td valign="middle" align="center">Luong 2021(<xref ref-type="bibr" rid="B35">35</xref>)</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">&#x2212;</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">&#x2212;</td>
<td valign="middle" align="center">+</td>
</tr>
<tr>
<td valign="middle" align="center">Yao 2023(<xref ref-type="bibr" rid="B36">36</xref>)</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
</tr>
<tr>
<td valign="middle" align="center">Chen 2022(<xref ref-type="bibr" rid="B37">37</xref>)</td>
<td valign="middle" align="center">&#x2212;</td>
<td valign="middle" align="center">&#x2212;</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">&#x2212;</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">&#x2212;</td>
<td valign="middle" align="center">+</td>
</tr>
<tr>
<td valign="middle" align="center">Saini 2022(<xref ref-type="bibr" rid="B38">38</xref>)</td>
<td valign="middle" align="center">&#x2212;</td>
<td valign="middle" align="center">&#x2212;</td>
<td valign="middle" align="center">&#x2212;</td>
<td valign="middle" align="center">&#x2212;</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">+</td>
<td valign="middle" align="center">?</td>
<td valign="middle" align="center">&#x2212;</td>
<td valign="middle" align="center">+</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>PROBAST, Prediction model Risk Of Bias Assessment Tool; ROB, risk of bias.</p>
</fn>
<fn>
<p>+ indicates low ROB/low concern regarding applicability, &#x2212; indicates high ROB/high concern regarding applicability, and? indicates unclear ROB/unclear concern regarding applicability.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<sec id="s4_1">
<label>4.1</label>
<title>Main findings</title>
<p>The efficacy of AI tools for the pre-operative diagnosis of ITNs without the use of GSC was assessed by seven studies, five of which were radiologically driven, one cytology based, and one of which utilised NLP on unstructured EMR data. The 16 AI models suitable for meta-analysis had varying performances and accuracies, with a pooled AUC of 0.82. All included studies demonstrate the potential of AI to be of clinical value; however, there are limitations and substantial capacity for further development.</p>
<p>The externally validated AIBx model achieved an accuracy of 51% for all included nodules and 53% for ITNs, which restricts external institution clinical implementation currently based on performance alone (<xref ref-type="bibr" rid="B39">39</xref>). Similarly, the model developed by Keutgen et&#xa0;al. (<xref ref-type="bibr" rid="B42">42</xref>) demonstrated an internal validation AUC of 0.88 indicative of strong predictive performance within its own institution. However, this markedly decreased to 0.68 upon external validation suggesting a potential issue with overfitting. Overfitting occurs when a model performs exceptionally well on the training data but fails to generalise effectively to new, unseen datasets. This phenomenon can lead to inflated performance metrics during initial assessments, which may not reflect the model&#x2019;s true applicability in clinical settings. Unlike studies that exhibit strong internal performance but degrade significantly upon external validation, Yao et&#xa0;al. (<xref ref-type="bibr" rid="B36">36</xref>) employed multicentre data within a 10-fold cross validation framework, rather than testing on an independent external dataset. As such, the consistently high AUCs reported in their study (AUC range: 0.80&#x2013;0.94) may reflect the advantages of training and validation strategies rather than true external generalisability. While the study did compare model performance across independent test sets within their multicentre dataset, which provides some assessment of generalisability across institutions, this validation, however, was not structured to specifically evaluate performance in distinguishing benign from malignant cases across an entirely unseen cohort. These findings underscore the importance of considering dataset handling and model evaluation design when interpreting validation results.</p>
<p>A similar methodology was employed by Gild et&#xa0;al. (<xref ref-type="bibr" rid="B34">34</xref>), who also applied 10-fold cross validation but on a significantly smaller, single-centre dataset (N = 88). Their ResNet-50 model achieved an AUC of 0.740 (95% CI: 0.590&#x2013;0.830) notably lower than the AUC of 0.803 (95% CI: 0.794&#x2013;0.812) reported by Yao et&#xa0;al. (<xref ref-type="bibr" rid="B36">36</xref>) for the same architecture. This discrepancy likely reflects differences in dataset size and diversity, as the multicentre cohort provided greater heterogeneity and a larger sample for training. The greater performance variability in the results of Gild et&#xa0;al. suggests that their model was more susceptible to overfitting due to the limited dataset size. Yao et&#xa0;al. (<xref ref-type="bibr" rid="B36">36</xref>) also retrained ThyNet on their dataset achieving a higher AUC of 0.840 (95% CI: 0.834&#x2013;0.846). In contrast, Gild et&#xa0;al. (<xref ref-type="bibr" rid="B34">34</xref>) tested it directly on their dataset, where it yielded an overall accuracy of 0.64. The lower performance of ThyNet in this setting suggests that, despite being specifically trained for thyroid imaging on 18,049 images, it struggled to generalise effectively when applied to an unseen dataset without adaptation.</p>
<p>It is notable that the Swin Transformer, despite being originally trained on a general purpose ImageNet-1000 (<xref ref-type="bibr" rid="B46">46</xref>) dataset, outperformed the institutionally fine-tuned ThyNet. This may be attributed to the Swin Transformer&#x2019;s shifted window attention mechanism, which enhances the model&#x2019;s ability to process medical images more effectively (<xref ref-type="bibr" rid="B47">47</xref>). The observed performance advantage aligns with our heterogeneity analysis, which suggests that variations in model architectures contribute significantly to differences in model performance. The lack of a significant association between SE and AUC in the meta-regression further reinforces this notion indicating that small-study effects do not fully explain the observed heterogeneity. Instead, systematic differences in DL architectures and methodological choices emerge as primary contributors. These findings emphasise the need for standardised evaluation frameworks and rigorous validation practices in AI research to enhance reproducibility.</p>
<p>The NLP-driven models produced a mean accuracy of 70% and mean AUC of 0.754 across all classifiers (<xref ref-type="bibr" rid="B35">35</xref>). Notably, the study indicated that echogenicity and calcification were of low feature importance in predicting malignancy, a finding that contradicts the established TIRADS criteria. This discrepancy may be attributable to the high rate of missingness associated with these two variables, thereby limiting the generalisability of the results. Data imputation techniques were utilised to populate the missing values of these categorical variables. However, echogenicity and calcification had a missing rate of 99% and 88%, respectively, and in such high proportions of incomplete data, estimates are likely to be biased.</p>
<p>Both Saini et&#xa0;al. (<xref ref-type="bibr" rid="B38">38</xref>) and Chen et&#xa0;al. (<xref ref-type="bibr" rid="B37">37</xref>) rely on manually assessed imaging or cytological parameters making their models prone to inter-reader variability and limiting reproducibility. The ANN is trained on semi-quantitative cytological features, which depend on subjective grading by independent observers, introducing variability in how key predictors are assessed (<xref ref-type="bibr" rid="B38">38</xref>). Chen et&#xa0;al. (<xref ref-type="bibr" rid="B37">37</xref>) similarly employs an SVM model trained on USG features manually evaluated by radiologists. The retrospective reassessment of included images, all obtained using high-frequency linear transducers, potentially leads to inconsistencies in real-world applications. Since neither study incorporates automated feature extraction, their performance may vary across institutions and readers with different expertise levels. Without independent validation, the generalisability of these models remains uncertain. Future DL approaches that extract imaging features directly from raw data could enhance clinical applicability by reducing dependence on subjective interpretation.</p>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Related works</title>
<p>Swan et&#xa0;al. (<xref ref-type="bibr" rid="B39">39</xref>) externally validated AIBx, which was developed in 2019 at Mercy Hospital, USA, utilising USG images of thyroid nodules obtained from patients who underwent biopsy or thyroid surgery between February 2012 and February 2017. Only nodules with a definitive diagnosis of benign or malignant were included in AIBx&#x2019;s construct. A total of 482 nodules fulfilled the inclusion criteria, with all available images used to create their image similarity AI model. The architecture comprised of a 34-layer CNN known as ResNet-34. The CNN generated image embeddings, which are <italic>N</italic>-dimensional vectors representing unique images. These embeddings were used to find similar images from a database using a nearest neighbour algorithm. The output includes <italic>N</italic> number of nearest neighbours along with their corresponding labels of benign vs. malignant (<xref ref-type="bibr" rid="B48">48</xref>, <xref ref-type="bibr" rid="B49">49</xref>). The model was internally validated using 103 thyroid nodules that underwent biopsy or surgery from March 2017 to July 2018. Accuracy, sensitivity, specificity, PPV and NPV of the model were 81.5%, 87.8%, 78.5%, 65.9%, and 93.2%, respectively (<xref ref-type="bibr" rid="B40">40</xref>). Compared to USG thyroid cancer risk stratification systems, AIBx exhibited comparable performance suggesting that from an institutional perspective, the model has the potential to avoid unnecessary FNAC (<xref ref-type="bibr" rid="B5">5</xref>, <xref ref-type="bibr" rid="B6">6</xref>).</p>
<p>Similar to AIBx, ThyNet was designed as a strategy to help radiologists avoid unnecessary FNAC. Its structure is an integrated network of ResNet (<xref ref-type="bibr" rid="B50">50</xref>), ResNext (<xref ref-type="bibr" rid="B51">51</xref>), and DenseNet (<xref ref-type="bibr" rid="B50">50</xref>), which, when evaluated individually on internal validation sets, achieved AUCs of 0.9376, 0.9348, and 0.9401, respectively, in classifying nodules into benign or malignant. After model ensemble, the AUC achieved was 0.9504, which outperformed any one individual model. In a simulated scenario, a radiologist assisted by ThyNet strategy was reported to decrease the number of FNAC from 61.9% to 35.2%, and the missed malignancy rate decreased from 18.9% to 17.0%. In the real-world clinical setting test of ThyNet, the AUC of a thyroid nodule diagnosis, where radiologists reviewed static images only, was 0.823 (95% CI 0.812&#x2013;0.835); the AUC of a diagnosis where radiologists reviewed both videos and images improved to 0.862 (0.851&#x2013;0.872; p &lt; 0.0001); and finally, when radiologists were assisted by ThyNet, the AUC improved to 0.873 (0.863&#x2013;0.883; p &lt; 0.0001) (<xref ref-type="bibr" rid="B41">41</xref>). These findings suggest that the ThyNet system could potentially be used to complement the decision-making process of FNAC alongside radiologists; however, as a stand-alone diagnostic system for Bethesda III nodules, it has restricted applicability (<xref ref-type="bibr" rid="B34">34</xref>).</p>
<p>The clinical narrative has unique characteristics different from other forms of literature and text. NLP within healthcare leverages this distinctive lexicon, and these models are trained to extract precise information from large amounts of unstructured clinical text while considering contextual factors. This form of language-based AI has been explored in aiding the interpretation of thyroid USG reports as these can be rather challenging due to the lack of standardised synoptic reporting despite the TIRADS score. In two studies by the same group, USG reports were interpretated by clinicians as a gold standard and compared with NLP data extraction using cTAKES. Results suggest the need for improved synoptic reporting of thyroid USG, as NLP was effective in automated extraction of data from USG reports; however, the lack of standardised synoptic reporting caused a significant difference between gold standard and NLP performance (<xref ref-type="bibr" rid="B43">43</xref>, <xref ref-type="bibr" rid="B52">52</xref>, <xref ref-type="bibr" rid="B53">53</xref>).</p>
</sec>
<sec id="s4_3">
<label>4.3</label>
<title>Limitations</title>
<p>A meta-analysis for the performance AI models in healthcare presents inherent challenges. Traditional meta-analysis requires studies to have similar interventions; however, at present, AI research attempts to investigate intrinsically different model architectures and their optimal applicability tested on identical patient cohorts. These difficulties are a result of data scarcity, early-stage research, and a presently evolving landscape. However, to provide a synthesised inference of the current available evidence, we did conduct a meta-analysis. Additionally, there are limitations to AI tools in healthcare apart from a model&#x2019;s performance compared with a human expert. The ability for widespread application and adaptability is a major drawback. This challenge arises from a dataset bias, as most AI models are trained on a single institution or hubs&#x2019; data. Once trained on a particular demographic, AI models tend to lose their diversity in transferability and are unable to perform as well in an external setting similar to the findings from the external validation of AIBx and ThyNet (<xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B39">39</xref>).</p>
<p>Informed consent is significant in decision-making tools. AI as a &#x201c;black box,&#x201d; however, presents a narrative challenge. It is a difficult and time-consuming effort to explicate a process that lacks a state of explainability. Radiologically driven models in this review generally lacked meaningful decision-making interpretability or model uncertainty assessments. Heat maps were explored alongside image classification algorithms but were often unhelpful for clinical decision support. Alternative approaches, such as image similarity algorithms, have provided more intuitive interfaces for clinicians allowing them to review matched USG images. Yet, the underlying indexing mechanisms remain opaque (<xref ref-type="bibr" rid="B40">40</xref>). Feature energy mapping has also been investigated as a way to visualise model attention, but without clear correlation to established radiological markers, its clinical relevance remains uncertain (<xref ref-type="bibr" rid="B36">36</xref>). A potential solution to these challenges is the integration of interactive interpretability frameworks, such as those used in ThyGPT, which allows clinicians to query AI-generated heat maps, adjust inputs, and observe changes in diagnostic predictions (<xref ref-type="bibr" rid="B54">54</xref>). It additionally incorporates language models that generate structured explanations based on clinical guidelines. While this does not fully resolve model opacity, it improves clinician oversight and aligns AI interpretations more closely with expert reasoning.</p>
</sec>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusion</title>
<p>This review highlights the current lack of clinically applicable evidence to support the reliable pre-operative diagnosis of ITNs using AI. These tools have a potential role in the risk stratification of thyroid nodules and are in their early stages of establishment. There is a need to investigate the generalisability of models created, as the majority are developed and tested within an institutional setting. Consideration must also be given to ethical issues and trust surrounding the use of AI in healthcare.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Material</bold>
</xref>. Further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>KJ: Conceptualization, Data curation, Writing &#x2013; original draft. ME: Data curation, Formal Analysis, Writing &#x2013; review &amp; editing. AK: Investigation, Writing &#x2013; review &amp; editing. WB: Conceptualization, Methodology, Writing &#x2013; review &amp; editing. JS: Methodology, Writing &#x2013; review &amp; editing. JL: Conceptualization, Data curation, Writing &#x2013; review &amp; editing.</p>
</sec>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare that no financial support was received for the research and/or publication of this article.</p>
</sec>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
</sec>
<sec id="s11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s12" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fendo.2025.1506729/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fendo.2025.1506729/full#supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
<supplementary-material xlink:href="DataSheet2.docx" id="SM2" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
<supplementary-material xlink:href="DataSheet3.docx" id="SM3" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reiners</surname> <given-names>C</given-names>
</name>
<name>
<surname>Wegscheider</surname> <given-names>K</given-names>
</name>
<name>
<surname>Schicha</surname> <given-names>H</given-names>
</name>
<name>
<surname>Theissen</surname> <given-names>P</given-names>
</name>
<name>
<surname>Vaupel</surname> <given-names>R</given-names>
</name>
<name>
<surname>Wrbitzky</surname> <given-names>R</given-names>
</name>
<etal/>
</person-group>. <article-title>Prevalence of thyroid disorders in the working population of Germany: ultrasonography screening in 96,278 unselected employees</article-title>. <source>Thyroid</source>. (<year>2004</year>) <volume>14</volume>(<issue>11</issue>):<page-range>926&#x2013;32</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1089/thy.2004.14.926</pub-id>
</citation>
</ref>
<ref id="B2">
<label>2</label>
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Tan</surname> <given-names>GH</given-names>
</name>
<name>
<surname>Gharib</surname> <given-names>H</given-names>
</name>
</person-group>. <article-title>Thyroid incidentalomas: management approaches to nonpalpable nodules discovered incidentally on thyroid imaging</article-title>(<year>1997</year>). Available online at: <uri xlink:href="http://annals.org/">http://annals.org/</uri> (Accessed <access-date>April 28, 2025</access-date>).</citation>
</ref>
<ref id="B3">
<label>3</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shweel</surname> <given-names>M</given-names>
</name>
<name>
<surname>Mansour</surname> <given-names>E</given-names>
</name>
</person-group>. <article-title>Diagnostic performance of combined elastosonography scoring and high-resolution ultrasonography for the differentiation of benign and Malignant thyroid nodules</article-title>. <source>Eur J Radiol</source>. (<year>2013</year>) <volume>82</volume>:<fpage>995</fpage>&#x2013;<lpage>1001</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ejrad.2013.02.002</pub-id>
</citation>
</ref>
<ref id="B4">
<label>4</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mazzaferri</surname> <given-names>E</given-names>
</name>
</person-group>. <article-title>Thyroid cancer in thyroid nodules: finding a needle in the haystack</article-title>. <source>Am J Med</source>. (<year>1992</year>) <volume>93</volume>(<issue>4</issue>):<page-range>359&#x2013;62</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/0002-9343(92)90163-6</pub-id>
</citation>
</ref>
<ref id="B5">
<label>5</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hekimsoy</surname> <given-names>&#x130;</given-names>
</name>
<name>
<surname>&#xd6;zt&#xfc;rk</surname> <given-names>E</given-names>
</name>
<name>
<surname>Ertan</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Orman</surname> <given-names>MN</given-names>
</name>
<name>
<surname>Kavuk&#xe7;u</surname> <given-names>G</given-names>
</name>
<name>
<surname>&#xd6;zgen</surname> <given-names>AG</given-names>
</name>
<etal/>
</person-group>. <article-title>Diagnostic performance rates of the ACR-tirads and EU-tirads based on histopathological evidence</article-title>. <source>Diagn Interventional Radiol</source>. (<year>2021</year>) <volume>27</volume>:<page-range>511&#x2013;8</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.5152/dir.2021.20813</pub-id>
</citation>
</ref>
<ref id="B6">
<label>6</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tessler</surname> <given-names>FN</given-names>
</name>
<name>
<surname>Middleton</surname> <given-names>WD</given-names>
</name>
<name>
<surname>Grant</surname> <given-names>EG</given-names>
</name>
<name>
<surname>Hoang</surname> <given-names>JK</given-names>
</name>
<name>
<surname>Berland</surname> <given-names>LL</given-names>
</name>
<name>
<surname>Teefey</surname> <given-names>SA</given-names>
</name>
<etal/>
</person-group>. <article-title>ACR thyroid imaging, reporting and data system (TI-RADS): white paper of the ACR TI-RADS committee</article-title>. <source>J Am Coll Radiol</source>. (<year>2017</year>) <volume>14</volume>:<page-range>587&#x2013;95</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jacr.2017.01.046</pub-id>
</citation>
</ref>
<ref id="B7">
<label>7</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Russ</surname> <given-names>G</given-names>
</name>
<name>
<surname>Bonnema</surname> <given-names>SJ</given-names>
</name>
<name>
<surname>Erdogan</surname> <given-names>MF</given-names>
</name>
<name>
<surname>Durante</surname> <given-names>C</given-names>
</name>
<name>
<surname>Ngu</surname> <given-names>R</given-names>
</name>
<name>
<surname>Leenhardt</surname> <given-names>L</given-names>
</name>
</person-group>. <article-title>European thyroid association guidelines for ultrasound Malignancy risk stratification of thyroid nodules in adults: the EU-TIRADS</article-title>. <source>Eur Thyroid J</source>. (<year>2017</year>) <volume>6</volume>:<page-range>225&#x2013;37</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1159/000478927</pub-id>
</citation>
</ref>
<ref id="B8">
<label>8</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cibas</surname> <given-names>ES</given-names>
</name>
<name>
<surname>Ali</surname> <given-names>SZ</given-names>
</name>
</person-group>. <article-title>The Bethesda system for reporting thyroid cytopathology</article-title>. <source>Am J Clin Pathol</source>. (<year>2009</year>) <volume>132</volume>:<page-range>658&#x2013;65</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1309/AJCPPHLWMI3JV4LA</pub-id>
</citation>
</ref>
<ref id="B9">
<label>9</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bongiovanni</surname> <given-names>M</given-names>
</name>
<name>
<surname>Spitale</surname> <given-names>A</given-names>
</name>
<name>
<surname>Faquin</surname> <given-names>WC</given-names>
</name>
<name>
<surname>Mazzucchelli</surname> <given-names>L</given-names>
</name>
<name>
<surname>Baloch</surname> <given-names>ZW</given-names>
</name>
</person-group>. <article-title>The Bethesda system for reporting thyroid cytopathology: A meta-analysis</article-title>. <source>Acta Cytologic</source>. (<year>2012</year>) <volume>56</volume>:<page-range>333&#x2013;9</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1159/000339959</pub-id>
</citation>
</ref>
<ref id="B10">
<label>10</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Faquin</surname> <given-names>WC</given-names>
</name>
<name>
<surname>Bongiovanni</surname> <given-names>M</given-names>
</name>
<name>
<surname>Sadow</surname> <given-names>PM</given-names>
</name>
</person-group>. <article-title>Update in thyroid fine needle aspiration</article-title>. <source>Endocr Pathol</source>. (<year>2011</year>) <volume>22</volume>:<page-range>178&#x2013;83</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s12022-011-9182-7</pub-id>
</citation>
</ref>
<ref id="B11">
<label>11</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ohori</surname> <given-names>NP</given-names>
</name>
<name>
<surname>Schoedel</surname> <given-names>KE</given-names>
</name>
</person-group>. <article-title>Variability in the atypia of undetermined significance/follicular lesion of undetermined significance diagnosis in the Bethesda System for Reporting Thyroid Cytopathology: Sources and recommendations</article-title>. <source>Acta Cytol</source>. (<year>2011</year>) <volume>55</volume>:<page-range>492&#x2013;8</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1159/000334218</pub-id>
</citation>
</ref>
<ref id="B12">
<label>12</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cibas</surname> <given-names>ES</given-names>
</name>
<name>
<surname>Ali</surname> <given-names>SZ</given-names>
</name>
</person-group>. <article-title>The 2017 bethesda system for reporting thyroid cytopathology</article-title>. <source>Thyroid</source>. (<year>2017</year>) <volume>27</volume>:<page-range>1341&#x2013;6</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1089/thy.2017.0500</pub-id>
</citation>
</ref>
<ref id="B13">
<label>13</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Benido Silva</surname> <given-names>V</given-names>
</name>
<name>
<surname>Borges Duarte</surname> <given-names>D</given-names>
</name>
<name>
<surname>Teresa Pereira</surname> <given-names>M</given-names>
</name>
<name>
<surname>Couto de Carvalho</surname> <given-names>A</given-names>
</name>
<name>
<surname>Freitas</surname> <given-names>C</given-names>
</name>
</person-group>. <article-title>Fine-needle aspiration cytology repetition in thyroid nodules with non-diagnostic findings or atypia of undetermined significance/follicular lesions of undetermined significance: Does time matters</article-title>? <source>Ann Endocrinol (Paris)</source>. (<year>2022</year>) <volume>83</volume>:<page-range>232&#x2013;6</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ando.2022.04.001</pub-id>
</citation>
</ref>
<ref id="B14">
<label>14</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Doubi</surname> <given-names>A</given-names>
</name>
<name>
<surname>Alrayes</surname> <given-names>NS</given-names>
</name>
<name>
<surname>Alqubaisi</surname> <given-names>AK</given-names>
</name>
<name>
<surname>Al-Dhahri</surname> <given-names>SF</given-names>
</name>
</person-group>. <article-title>The value of repeating fine-needle aspiration for thyroid nodules</article-title>. <source>Ann Saudi Med</source>. (<year>2021</year>) <volume>41</volume>:<fpage>36</fpage>&#x2013;<lpage>42</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5144/0256-4947.2021.36</pub-id>
</citation>
</ref>
<ref id="B15">
<label>15</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ronen</surname> <given-names>O</given-names>
</name>
<name>
<surname>Oichman</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>National differences in cost analysis of Afirma Genomic sequencing classifier</article-title>. <source>Clin Endocrinol (Oxf)</source>. (<year>2021</year>) <volume>94</volume>:<page-range>717&#x2013;24</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/cen.14400</pub-id>
</citation>
</ref>
<ref id="B16">
<label>16</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Serpell</surname> <given-names>JW</given-names>
</name>
<name>
<surname>Grodski</surname> <given-names>S</given-names>
</name>
<name>
<surname>Yeung</surname> <given-names>M</given-names>
</name>
<name>
<surname>Swann</surname> <given-names>J</given-names>
</name>
<name>
<surname>Kemp</surname> <given-names>S</given-names>
</name>
<name>
<surname>Johnson</surname> <given-names>W</given-names>
</name>
</person-group>. <article-title>Hemithyroidectomy: A heuristics perspective</article-title>. <source>ANZ J Surg</source>. (<year>2008</year>) <volume>78</volume>:<page-range>1122&#x2013;7</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/j.1445-2197.2008.04764.x</pub-id>
</citation>
</ref>
<ref id="B17">
<label>17</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Su</surname> <given-names>SY</given-names>
</name>
<name>
<surname>Grodski</surname> <given-names>S</given-names>
</name>
<name>
<surname>Serpell</surname> <given-names>JW</given-names>
</name>
</person-group>. <article-title>Hypothyroidism following hemithyroidectomy: A retrospective review</article-title>. <source>Ann Surg</source>. (<year>2009</year>) <volume>250</volume>:<page-range>991&#x2013;4</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1097/SLA.0b013e3181ae5426</pub-id>
</citation>
</ref>
<ref id="B18">
<label>18</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Weiss</surname> <given-names>A</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>KC</given-names>
</name>
<name>
<surname>Brumund</surname> <given-names>KT</given-names>
</name>
<name>
<surname>Chang</surname> <given-names>DC</given-names>
</name>
<name>
<surname>Bouvet</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>Risk factors for hematoma after thyroidectomy: Results from the nationwide inpatient sample</article-title>. In: <source>Surgery (United states)</source>. <publisher-loc>United States</publisher-loc>: <publisher-name>Mosby Inc</publisher-name> (<year>2014</year>). p. <fpage>399</fpage>&#x2013;<lpage>404</lpage>.</citation>
</ref>
<ref id="B19">
<label>19</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nikiforov</surname> <given-names>YE</given-names>
</name>
<name>
<surname>Steward</surname> <given-names>DL</given-names>
</name>
<name>
<surname>Carty</surname> <given-names>SE</given-names>
</name>
<name>
<surname>Sippel</surname> <given-names>RS</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>SP</given-names>
</name>
<name>
<surname>Sosa</surname> <given-names>JA</given-names>
</name>
<etal/>
</person-group>. <article-title>Performance of a multigene genomic classifier in thyroid nodules with indeterminate cytology: A prospective blinded multicenter study</article-title>. <source>JAMA Oncol</source>. (<year>2019</year>) <volume>5</volume>:<page-range>204&#x2013;12</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1001/jamaoncol.2018.4616</pub-id>
</citation>
</ref>
<ref id="B20">
<label>20</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Livhits</surname> <given-names>MJ</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>CY</given-names>
</name>
<name>
<surname>Kuo</surname> <given-names>EJ</given-names>
</name>
<name>
<surname>Nguyen</surname> <given-names>DT</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>J</given-names>
</name>
<name>
<surname>Tseng</surname> <given-names>CH</given-names>
</name>
<etal/>
</person-group>. <article-title>Effectiveness of molecular testing techniques for diagnosis of indeterminate thyroid nodules: A randomized clinical trial</article-title>. <source>JAMA Oncol</source>. (<year>2021</year>) <volume>7</volume>:<page-range>70&#x2013;7</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1001/jamaoncol.2020.5935</pub-id>
</citation>
</ref>
<ref id="B21">
<label>21</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nikiforova</surname> <given-names>MN</given-names>
</name>
<name>
<surname>Mercurio</surname> <given-names>S</given-names>
</name>
<name>
<surname>Wald</surname> <given-names>AI</given-names>
</name>
<name>
<surname>Barbi de Moura</surname> <given-names>M</given-names>
</name>
<name>
<surname>Callenberg</surname> <given-names>K</given-names>
</name>
<name>
<surname>Santana-Santos</surname> <given-names>L</given-names>
</name>
<etal/>
</person-group>. <article-title>Analytical performance of the ThyroSeq v3 genomic classifier for cancer diagnosis in thyroid nodules</article-title>. <source>Cancer</source>. (<year>2018</year>) <volume>124</volume>:<page-range>1682&#x2013;90</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/cncr.v124.8</pub-id>
</citation>
</ref>
<ref id="B22">
<label>22</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Randolph</surname> <given-names>GW</given-names>
</name>
<name>
<surname>Sosa</surname> <given-names>JA</given-names>
</name>
<name>
<surname>Hao</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Angell</surname> <given-names>TE</given-names>
</name>
<name>
<surname>Shonka</surname> <given-names>DC</given-names>
</name>
<name>
<surname>LiVolsi</surname> <given-names>VA</given-names>
</name>
<etal/>
</person-group>. <article-title>Preoperative identification of medullary thyroid carcinoma (MTC): clinical validation of the afirma MTC RNA-sequencing classifier</article-title>. <source>Thyroid</source>. (<year>2022</year>) <volume>32</volume>:<page-range>1069&#x2013;76</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1089/thy.2022.0189</pub-id>
</citation>
</ref>
<ref id="B23">
<label>23</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>B</given-names>
</name>
<name>
<surname>Tian</surname> <given-names>J</given-names>
</name>
<name>
<surname>Pei</surname> <given-names>S</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Y</given-names>
</name>
<name>
<surname>He</surname> <given-names>X</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>Y</given-names>
</name>
<etal/>
</person-group>. <article-title>Machine learning-assisted system for thyroid nodule diagnosis</article-title>. <source>Thyroid</source>. (<year>2019</year>) <volume>29</volume>:<page-range>858&#x2013;67</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1089/thy.2018.0380</pub-id>
</citation>
</ref>
<ref id="B24">
<label>24</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>X</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>VC</given-names>
</name>
<name>
<surname>Rong</surname> <given-names>J</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>JC</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>F</given-names>
</name>
</person-group>. <article-title>Deep convolutional neural networks in thyroid disease detection: A multi-classification comparison by ultrasonography and computed tomography</article-title>. <source>Comput Methods Programs BioMed</source>. (<year>2022</year>) <volume>220</volume>:<elocation-id>106823</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.cmpb.2022.106823</pub-id>
</citation>
</ref>
<ref id="B25">
<label>25</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sanyal</surname> <given-names>P</given-names>
</name>
<name>
<surname>Dr</surname> <given-names>TM</given-names>
</name>
<name>
<surname>Barui</surname> <given-names>S</given-names>
</name>
<name>
<surname>Das</surname> <given-names>A</given-names>
</name>
<name>
<surname>Gangopadhyay</surname> <given-names>P</given-names>
</name>
</person-group>. <article-title>Artificial intelligence in cytopathology: A neural network to identify papillary carcinoma on thyroid fine-needle aspiration cytology smears</article-title>. <source>J Pathol Inform</source>. (<year>2018</year>) <volume>9</volume>:<fpage>43</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.4103/jpi.jpi_43_18</pub-id>
</citation>
</ref>
<ref id="B26">
<label>26</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>L</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>S</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>S</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>C</given-names>
</name>
<name>
<surname>Tian</surname> <given-names>G</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>Y</given-names>
</name>
<etal/>
</person-group>. <article-title>Automatic thyroid nodule recognition and diagnosis in ultrasound imaging with the YOLOv2 neural network</article-title>. <source>World J Surg Oncol</source>. (<year>2019</year>) <volume>17</volume>:<fpage>12</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12957-019-1558-z</pub-id>
</citation>
</ref>
<ref id="B27">
<label>27</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jeong</surname> <given-names>EY</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>HL</given-names>
</name>
<name>
<surname>Ha</surname> <given-names>EJ</given-names>
</name>
<name>
<surname>Park</surname> <given-names>SY</given-names>
</name>
<name>
<surname>Cho</surname> <given-names>YJ</given-names>
</name>
<name>
<surname>Han</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>Computer-aided diagnosis system for thyroid nodules on ultrasonography: diagnostic performance and reproducibility based on the experience level of operators</article-title>. <source>Eur Radiol</source>. (<year>2019</year>) <volume>29</volume>:<page-range>1978&#x2013;85</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00330-018-5772-9</pub-id>
</citation>
</ref>
<ref id="B28">
<label>28</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jassal</surname> <given-names>K</given-names>
</name>
<name>
<surname>Edwards</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>Beyond genomics: artificial intelligence powered diagnostics for indeterminate thyroid nodules - A systematic review and meta-analysis</article-title>. <source>PROSPERO</source>. (<year>2023</year>), <fpage>CRD42023438011</fpage>. <uri xlink:href="https://www.crd.york.ac.uk/PROSPERO/view/CRD42023438011">https://www.crd.york.ac.uk/PROSPERO/view/CRD42023438011</uri>.</citation>
</ref>
<ref id="B29">
<label>29</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Macaskill</surname> <given-names>P</given-names>
</name>
</person-group>. <article-title>Analysing and presenting results</article-title>. In: <person-group person-group-type="editor">
<name>
<surname>Deeks</surname> <given-names>JJ</given-names>
</name>
<name>
<surname>Bossuyt</surname> <given-names>PM</given-names>
</name>
<name>
<surname>Gatsonis</surname> <given-names>C</given-names>
</name>
</person-group>, editors. <source>Cochrane Handbook for Systematic Reviews of Diagnostic Test Accuracy. Version10</source>, vol. <volume>10</volume>. <publisher-name>The Cochrane Collaboration</publisher-name> (<year>2010</year>).</citation>
</ref>
<ref id="B30">
<label>30</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Moher</surname> <given-names>D</given-names>
</name>
</person-group>. <source>Preferred Reporting Items for Systematic Reviews and Meta-analysis Statement</source>. (<year>2009</year>) <publisher-loc>Ottawa, Ontario, Canada</publisher-loc>: <publisher-name>The PRISMA Group</publisher-name>.</citation>
</ref>
<ref id="B31">
<label>31</label>
<citation citation-type="book">
<person-group person-group-type="editor">
<name>
<surname>Sammut</surname> <given-names>C</given-names>
</name>
<name>
<surname>Webb</surname> <given-names>GI</given-names>
</name>
</person-group>. (Eds.). <source>Encyclopedia of Machine Learning</source>. (<year>2011</year>) (<publisher-loc>New York, NY, USA</publisher-loc>: <publisher-name>Springer</publisher-name>). doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-0-387-30164-8</pub-id>
</citation>
</ref>
<ref id="B32">
<label>32</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wolff</surname> <given-names>RF</given-names>
</name>
<name>
<surname>Moons</surname> <given-names>KGM</given-names>
</name>
<name>
<surname>Riley</surname> <given-names>RD</given-names>
</name>
<name>
<surname>Whiting</surname> <given-names>PF</given-names>
</name>
<name>
<surname>Westwood</surname> <given-names>M</given-names>
</name>
<name>
<surname>Collins</surname> <given-names>GS</given-names>
</name>
<etal/>
</person-group>. <article-title>PROBAST: A tool to assess the risk of bias and applicability of prediction model studies</article-title>. <source>Ann Intern Med</source>. (<year>2019</year>) <volume>170</volume>:<page-range>51&#x2013;8</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.7326/M18-1376</pub-id>
</citation>
</ref>
<ref id="B33">
<label>33</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Moons</surname> <given-names>KGM</given-names>
</name>
<name>
<surname>Wolff</surname> <given-names>RF</given-names>
</name>
<name>
<surname>Riley</surname> <given-names>RD</given-names>
</name>
<name>
<surname>Whiting</surname> <given-names>PF</given-names>
</name>
<name>
<surname>Westwood</surname> <given-names>M</given-names>
</name>
<name>
<surname>Collins</surname> <given-names>GS</given-names>
</name>
<etal/>
</person-group>. <article-title>PROBAST: A tool to assess risk of bias and applicability of prediction model studies: Explanation and elaboration</article-title>. In: <source>Annals of Internal Medicine</source>, vol. <volume>170</volume>. <publisher-name>American College of Physicians</publisher-name> (<year>2019</year>). p. <fpage>W1</fpage>&#x2013;<lpage>33</lpage>.</citation>
</ref>
<ref id="B34">
<label>34</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gild</surname> <given-names>ML</given-names>
</name>
<name>
<surname>Chan</surname> <given-names>M</given-names>
</name>
<name>
<surname>Gajera</surname> <given-names>J</given-names>
</name>
<name>
<surname>Lurie</surname> <given-names>B</given-names>
</name>
<name>
<surname>Gandomkar</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Clifton-Bligh</surname> <given-names>RJ</given-names>
</name>
</person-group>. <article-title>Risk stratification of indeterminate thyroid nodules using ultrasound and machine learning algorithms</article-title>. <source>Clin Endocrinol (Oxf)</source>. (<year>2022</year>) <volume>96</volume>:<page-range>646&#x2013;52</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/cen.14612</pub-id>
</citation>
</ref>
<ref id="B35">
<label>35</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luong</surname> <given-names>G</given-names>
</name>
<name>
<surname>Idarraga</surname> <given-names>AJ</given-names>
</name>
<name>
<surname>Hsiao</surname> <given-names>V</given-names>
</name>
<name>
<surname>Schneider</surname> <given-names>DF</given-names>
</name>
</person-group>. <article-title>Risk stratifying indeterminate thyroid nodules with machine learning</article-title>. <source>J Surg Res</source>. (<year>2022</year>) <volume>270</volume>:<page-range>214&#x2013;20</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jss.2021.09.015</pub-id>
</citation>
</ref>
<ref id="B36">
<label>36</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yao</surname> <given-names>J</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>J</given-names>
</name>
<name>
<surname>Lei</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Xiong</surname> <given-names>J</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>B</given-names>
</name>
<etal/>
</person-group>. <article-title>AI diagnosis of Bethesda category IV thyroid nodules</article-title>. <source>iScience</source>. (<year>2023</year>) <volume>26</volume>:<elocation-id>108114</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.isci.2023.108114</pub-id>
</citation>
</ref>
<ref id="B37">
<label>37</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>L</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>M</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Q</given-names>
</name>
<name>
<surname>Kumar</surname> <given-names>V</given-names>
</name>
<name>
<surname>Duan</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>KA</given-names>
</name>
<etal/>
</person-group>. <article-title>Machine learning&#x2013;assisted diagnostic system for indeterminate thyroid nodules</article-title>. <source>Ultrasound Med Biol</source>. (<year>2022</year>) <volume>48</volume>:<page-range>1547&#x2013;54</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ultrasmedbio.2022.03.020</pub-id>
</citation>
</ref>
<ref id="B38">
<label>38</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Saini</surname> <given-names>T</given-names>
</name>
<name>
<surname>Saikia</surname> <given-names>UN</given-names>
</name>
<name>
<surname>Dey</surname> <given-names>P</given-names>
</name>
</person-group>. <article-title>An artificial neural network for the prediction of the risk of Malignancy in category III Bethesda thyroid lesions</article-title>. <source>Cytopathology</source>. (<year>2023</year>) <volume>34</volume>:<fpage>48</fpage>&#x2013;<lpage>54</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/cyt.13180</pub-id>
</citation>
</ref>
<ref id="B39">
<label>39</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Swan</surname> <given-names>KZ</given-names>
</name>
<name>
<surname>Thomas</surname> <given-names>J</given-names>
</name>
<name>
<surname>Nielsen</surname> <given-names>VE</given-names>
</name>
<name>
<surname>Jespersen</surname> <given-names>ML</given-names>
</name>
<name>
<surname>Bonnema</surname> <given-names>SJ</given-names>
</name>
</person-group>. <article-title>External validation of AIBx, an artificial intelligence model for risk stratification, in thyroid nodules</article-title>. <source>Eur Thyroid J</source>. (<year>2022</year>) <volume>11</volume>:<elocation-id>e210129</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1530/ETJ-21-0129</pub-id>
</citation>
</ref>
<ref id="B40">
<label>40</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Thomas</surname> <given-names>J</given-names>
</name>
<name>
<surname>Haertling</surname> <given-names>T</given-names>
</name>
</person-group>. <article-title>AIBx, artificial intelligence model to risk stratify thyroid nodules</article-title>. <source>Thyroid</source>. (<year>2020</year>) <volume>30</volume>:<page-range>878&#x2013;84</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1089/thy.2019.0752</pub-id>
</citation>
</ref>
<ref id="B41">
<label>41</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peng</surname> <given-names>S</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Lv</surname> <given-names>W</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>L</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>Q</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>H</given-names>
</name>
<etal/>
</person-group>. <article-title>Deep learning-based artificial intelligence model to assist thyroid nodule diagnosis and management: a multicentre diagnostic study</article-title>. <source>Lancet Digit Health</source>. (<year>2021</year>) <volume>3</volume>:<page-range>e250&#x2013;9</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/S2589-7500(21)00041-8</pub-id>
</citation>
</ref>
<ref id="B42">
<label>42</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Keutgen</surname> <given-names>XM</given-names>
</name>
<name>
<surname>Li</surname> <given-names>H</given-names>
</name>
<name>
<surname>Memeh</surname> <given-names>K</given-names>
</name>
<name>
<surname>Conn Busch</surname> <given-names>J</given-names>
</name>
<name>
<surname>Williams</surname> <given-names>J</given-names>
</name>
<name>
<surname>Lan</surname> <given-names>L</given-names>
</name>
<etal/>
</person-group>. <article-title>A machine-learning algorithm for distinguishing Malignant from benign indeterminate thyroid nodules using ultrasound radiomic features</article-title>. <source>J Med Imaging</source>. (<year>2022</year>) <volume>9</volume>:<elocation-id>034501</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1117/1.JMI.9.3.034501</pub-id>
</citation>
</ref>
<ref id="B43">
<label>43</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Savova</surname> <given-names>GK</given-names>
</name>
<name>
<surname>Masanz</surname> <given-names>JJ</given-names>
</name>
<name>
<surname>Ogren</surname> <given-names>PV</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>J</given-names>
</name>
<name>
<surname>Sohn</surname> <given-names>S</given-names>
</name>
<name>
<surname>Kipper-Schuler</surname> <given-names>KC</given-names>
</name>
<etal/>
</person-group>. <article-title>Mayo clinical Text Analysis and Knowledge Extraction System (cTAKES): Architecture, component evaluation and applications</article-title>. <source>J Am Med Inf Assoc</source>. (<year>2010</year>) <volume>17</volume>:<page-range>507&#x2013;13</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1136/jamia.2009.001560</pub-id>
</citation>
</ref>
<ref id="B44">
<label>44</label>
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>He</surname> <given-names>K</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>S</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>J</given-names>
</name>
</person-group>. <article-title>Deep residual learning for image recognition</article-title> (<year>2015</year>). Available online at: <uri xlink:href="http://arxiv.org/abs/1512.03385">http://arxiv.org/abs/1512.03385</uri> (Accessed <access-date>April 28, 2025</access-date>).</citation>
</ref>
<ref id="B45">
<label>45</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>He</surname> <given-names>K</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>S</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>J</given-names>
</name>
</person-group>. <article-title>Deep residual learning for image recognition</article-title>. In: <source>2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</source>. <publisher-name>IEEE</publisher-name> (<year>2016</year>). p. <page-range>770&#x2013;8</page-range>.</citation>
</ref>
<ref id="B46">
<label>46</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Deng</surname> <given-names>J</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>W</given-names>
</name>
<name>
<surname>Socher</surname> <given-names>R</given-names>
</name>
<name>
<surname>Li</surname> <given-names>LJ</given-names>
</name>
<name>
<surname>Li</surname> <given-names>K</given-names>
</name>
<name>
<surname>Li</surname> <given-names>F-F</given-names>
</name>
</person-group>. <article-title>ImageNet: A large-scale hierarchical image database</article-title>. In: <source>2009 IEEE Conference on Computer Vision and Pattern Recognition</source>. <publisher-name>IEEE</publisher-name> (<year>2009</year>). p. <page-range>248&#x2013;55</page-range>.</citation>
</ref>
<ref id="B47">
<label>47</label>
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Cao</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>H</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Z</given-names>
</name>
<etal/>
</person-group>. <article-title>Swin transformer: hierarchical vision transformer using shifted windows</article-title> . Available online at: <uri xlink:href="https://github">https://github</uri> (Accessed <access-date>April 28, 2025</access-date>).</citation>
</ref>
<ref id="B48">
<label>48</label>
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>O&#x2019;Shea</surname> <given-names>K</given-names>
</name>
<name>
<surname>Nash</surname> <given-names>R</given-names>
</name>
</person-group>. <article-title>An introduction to convolutional neural networks</article-title> (<year>2015</year>). Available online at: <uri xlink:href="http://arxiv.org/abs/1511.08458">http://arxiv.org/abs/1511.08458</uri> (Accessed <access-date>April 28, 2025</access-date>).</citation>
</ref>
<ref id="B49">
<label>49</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Cunningham</surname> <given-names>P</given-names>
</name>
<name>
<surname>Delany</surname> <given-names>SJ</given-names>
</name>
</person-group>. <article-title>K-nearest neighbour classifiers-A tutorial</article-title>. In: <source>ACM Computing Surveys</source>, vol. <volume>54</volume>. <publisher-name>Association for Computing Machinery</publisher-name> (<year>2021</year>).</citation>
</ref>
<ref id="B50">
<label>50</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Huang</surname> <given-names>G</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Z</given-names>
</name>
<name>
<surname>van der Maaten</surname> <given-names>L</given-names>
</name>
<name>
<surname>Weinberger</surname> <given-names>KQ</given-names>
</name>
</person-group>. <article-title>Densely connected convolutional networks</article-title>. In: <source>2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</source>. <publisher-name>IEEE</publisher-name> (<year>2017</year>). p. <page-range>2261&#x2013;9</page-range>.</citation>
</ref>
<ref id="B51">
<label>51</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Xie</surname> <given-names>S</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R</given-names>
</name>
<name>
<surname>Doll&#xe1;r</surname> <given-names>P</given-names>
</name>
<name>
<surname>Tu</surname> <given-names>Z</given-names>
</name>
<name>
<surname>He</surname> <given-names>K</given-names>
</name>
</person-group>. <source>Aggregated Residual Transformations for Deep Neural Networks</source>. (<year>2016</year>) <publisher-loc>Ithaca, New York, USA</publisher-loc>: <publisher-name>Cornell University Library</publisher-name>.  Available online at: <uri xlink:href="arXiv.org">arXiv.org</uri>.</citation>
</ref>
<ref id="B52">
<label>52</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Dedhia</surname> <given-names>PH</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>K</given-names>
</name>
<name>
<surname>Song</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Larose</surname> <given-names>E</given-names>
</name>
<name>
<surname>Imbus</surname> <given-names>JR</given-names>
</name>
<name>
<surname>Peissig</surname> <given-names>PL</given-names>
</name>
<etal/>
</person-group>. <source>Ambiguous and Incomplete: Natural Language Processing Reveals Problematic Reporting Styles in Thyroid Ultrasound Reports Corresponding Author: Running title: Natural Language Processing in Thyroid Ultrasounds</source>. (<publisher-loc>Durham, NC, USA</publisher-loc>: <publisher-name>Research Square</publisher-name>).</citation>
</ref>
<ref id="B53">
<label>53</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>KJ</given-names>
</name>
<name>
<surname>Dedhia</surname> <given-names>PH</given-names>
</name>
<name>
<surname>Imbus</surname> <given-names>JR</given-names>
</name>
<name>
<surname>Schneider</surname> <given-names>DF</given-names>
</name>
</person-group>. <article-title>Thyroid ultrasound reports: will TI-RADS improve natural language processing capture of critical thyroid nodule features</article-title>? <source>J Surg Res</source>. (<year>2020</year>) <volume>256</volume>:<page-range>557&#x2013;63</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jss.2020.07.018</pub-id>
</citation>
</ref>
<ref id="B54">
<label>54</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Yao</surname> <given-names>J</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Lei</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>K</given-names>
</name>
<name>
<surname>Li</surname> <given-names>X</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>J</given-names>
</name>
<etal/>
</person-group>. <source>AI-Generated Content Enhanced Computer-Aided Diagnosis Model for Thyroid Nodules: A ChatGPT-Style Assistant</source>. (<year>2024</year>) <publisher-loc>Ithaca, New York, USA</publisher-loc>: <publisher-name>Cornell University Library</publisher-name>. Available online at: <uri xlink:href="arXiv.or">arXiv.or</uri>g. </citation>
</ref>
</ref-list>
</back>
</article>