<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article article-type="brief-report" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Digit. Health</journal-id><journal-title-group>
<journal-title>Frontiers in Digital Health</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Digit. Health</abbrev-journal-title></journal-title-group>
<issn pub-type="epub">2673-253X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fdgth.2026.1656161</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Brief Research Report</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Prediction of maturity-onset diabetes of the young subtypes using machine learning</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author"><name><surname>Figueroa</surname><given-names>Israel</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x2021;</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/3191217/overview"/><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role></contrib>
<contrib contrib-type="author"><name><surname>Flores</surname><given-names>Ricardo</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x2021;</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/3425752/overview"/><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role></contrib>
<contrib contrib-type="author"><name><surname>Mill&#x00E1;n</surname><given-names>Andrea</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x2021;</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/1327380/overview" /><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role></contrib>
<contrib contrib-type="author"><name><surname>de Dios</surname><given-names>Alejandro</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x2021;</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Frechtel</surname><given-names>Gustavo Daniel</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x2021;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/742347/overview" />
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="funding-acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding-acquisition</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role></contrib>
<contrib contrib-type="author" corresp="yes"><name><surname>L&#x00F3;pez</surname><given-names>Ariel Pablo</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x002A;</xref>
<xref ref-type="author-notes" rid="an1"><sup>&#x2020;</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x2021;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3292834/overview" />
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="funding-acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding-acquisition</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role></contrib>
<contrib contrib-type="author" corresp="yes"><name><surname>Mennickent</surname><given-names>Daniela</given-names></name>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x002A;</xref>
<xref ref-type="author-notes" rid="an1"><sup>&#x2020;</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x2021;</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/1870028/overview" /><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role></contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Facultad de Ingenier&#x00ED;a, Universidad Cat&#x00F3;lica de la Sant&#x00ED;sima Concepci&#x00F3;n</institution>, <city>Concepci&#x00F3;n</city>, <country country="cl">Chile</country></aff>
<aff id="aff2"><label>2</label><institution>Departamento de Electr&#x00F3;nica e Inform&#x00E1;tica, Universidad T&#x00E9;cnica Federico Santa Mar&#x00ED;a</institution>, <city>Concepci&#x00F3;n</city>, <country country="cl">Chile</country></aff>
<aff id="aff3"><label>3</label><institution>Departamento de Ingenier&#x00ED;a Inform&#x00E1;tica y Ciencias de la Computaci&#x00F3;n, Facultad de Ingenier&#x00ED;a, Universidad de Concepci&#x00F3;n</institution>, <city>Concepci&#x00F3;n</city>, <country country="cl">Chile</country></aff>
<aff id="aff4"><label>4</label><institution>C&#x00E1;tedra de Gen&#x00E9;tica, Facultad de Farmacia y Bioqu&#x00ED;mica, Universidad de Buenos Aires</institution>, <city>Buenos Aires</city>, <country country="ar">Argentina</country></aff>
<aff id="aff5"><label>5</label><institution>Divisi&#x00F3;n Nutrici&#x00F3;n, Hospital de Cl&#x00ED;nicas, Facultad de Medicina, Universidad de Buenos Aires</institution>, <city>Buenos Aires</city>, <country country="ar">Argentina</country></aff>
<aff id="aff6"><label>6</label><institution>Departamento de Ciencias B&#x00E1;sicas y Morfolog&#x00ED;a, Facultad de Medicina, Universidad Cat&#x00F3;lica de la Sant&#x00ED;sima Concepci&#x00F3;n</institution>, <city>Concepci&#x00F3;n</city>, <country country="cl">Chile</country></aff>
<author-notes>
<corresp id="cor1"><label>&#x002A;</label><bold>Correspondence:</bold> Daniela Mennickent <email xlink:href="mailto:dmennickent@ucsc.cl">dmennickent@ucsc.cl</email> Ariel Pablo L&#x00F3;pez <email xlink:href="mailto:aplopez@ffyb.uba.ar">aplopez@ffyb.uba.ar</email></corresp>
<fn fn-type="equal" id="an1"><label>&#x2020;</label><p>These authors have contributed equally to this work and share last authorship</p></fn>
<fn fn-type="other" id="fn001"><label>&#x2021;</label><p>ORCID Israel Figueroa <uri xlink:href="https://orcid.org/0009-0000-3299-751X">orcid.org/0009-0000-3299-751X</uri> Ricardo Flores <uri xlink:href="https://orcid.org/0000-0002-9466-3993">orcid.org/0000-0002-9466-3993</uri> Andrea Mill&#x00E1;n <uri xlink:href="https://orcid.org/0000-0002-5568-3254">orcid.org/0000-0002-5568-3254</uri> Alejandro de Dios <uri xlink:href="https://orcid.org/0000-0002-2581-3353">orcid.org/0000-0002-2581-3353</uri> Gustavo Daniel Frechtel <uri xlink:href="https://orcid.org/0000-0002-7287-8520">orcid.org/0000-0002-7287-8520</uri> Ariel Pablo L&#x00F3;pez <uri xlink:href="https://orcid.org/0000-0003-2301-2832">orcid.org/0000-0003-2301-2832</uri> Daniela Mennickent <uri xlink:href="https://orcid.org/0000-0002-6962-7632">orcid.org/0000-0002-6962-7632</uri></p></fn>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-03-26"><day>26</day><month>03</month><year>2026</year></pub-date>
<pub-date publication-format="electronic" date-type="collection"><year>2026</year></pub-date>
<volume>8</volume><elocation-id>1656161</elocation-id>
<history>
<date date-type="received"><day>20</day><month>02</month><year>2026</year></date>
<date date-type="rev-recd"><day>08</day><month>01</month><year>2026</year></date>
<date date-type="accepted"><day>09</day><month>03</month><year>2026</year></date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2026 Figueroa, Flores, Mill&#x00E1;n, de Dios, Frechtel, L&#x00F3;pez and Mennickent.</copyright-statement>
<copyright-year>2026</copyright-year><copyright-holder>Figueroa, Flores, Mill&#x00E1;n, de Dios, Frechtel, L&#x00F3;pez and Mennickent</copyright-holder><license><ali:license_ref start_date="2026-03-26">https://creativecommons.org/licenses/by/4.0/</ali:license_ref><license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p></license>
</permissions>
<abstract><sec><title>Introduction</title>
<p>Maturity-onset diabetes of the young (MODY) is a monogenic type of diabetes caused by different pathogenic genetic variants in glucose metabolism-related genes, with GCK-MODY and HFN1A-MODY subtypes being the most frequent. Diagnosing the specific MODY subtype is essential for correct treatment and follow-up, but it requires gene sequencing, a time-consuming and costly process that depends on highly skilled professionals. Therefore, it is mandatory to develop tools that allow to correctly determine in which order to study the involved genes, reducing the number of sequencing procedures to find the causal variant and making the diagnostic process more efficient. This proof-of-concept study evaluates machine learning as a complement to clinical characterization and genetic testing, by optimizing binary classification models for explainable prediction of MODY subtypes, with a focus on GCK-MODY and HFN1A-MODY.</p>
</sec><sec><title>Methods</title>
<p>To meet this aim, we analyzed medical data from a diabetes cohort from Buenos Aires, Argentina. By employing imputation and oversampling techniques we created 10 datasets for each subtype to feed a pipeline that trained, optimized and evaluated 10 machine learning techniques.</p>
</sec><sec><title>Results</title>
<p>Gaussian Naive Bayes achieved the best predictive power for GCK-MODY with a ROC AUC score of 0.724, meanwhile Random Forest yielded 0.712 for HNF1A-MODY. SHAP analysis provided insights into feature importance, highlighting the explainability of our approach.</p>
</sec><sec><title>Discussion and conclusion</title>
<p>This novel study demonstrates for the first time the viability of machine learning as a supplementary tool prior to MODY genetic testing, by providing cost-effective and explainable models able to assist health professionals in the diagnosis of MODY subtypes.</p>
</sec>
</abstract>
<kwd-group>
<kwd>classification</kwd>
<kwd>diabetes</kwd>
<kwd>explainable AI</kwd>
<kwd>machine learning</kwd>
<kwd>MODY</kwd>
<kwd>subtypes</kwd>
</kwd-group><funding-group><funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work was financially supported by Gobierno Regional del Biob&#x00ED;o through FIC-R BIP 40036152-0 &#x201C;Capital Humano Avanzado en Inteligencia Artificial para el Biob&#x00ED;o&#x201D; (DM, IF), and by Instituto Universitario de la Fundaci&#x00F3;n H&#x00E9;ctor Barcel&#x00F3;, Proyectos de investigaci&#x00F3;n cient&#x00ED;fica -Grado y posgrado 2023, Secretar&#x00ED;a de Ciencia y Tecnolog&#x00ED;a (AL). The article processing charge was covered by Universidad Cat&#x00F3;lica de la Sant&#x00ED;sima Concepci&#x00F3;n (DM).</funding-statement></funding-group><counts>
<fig-count count="2"/>
<table-count count="2"/><equation-count count="18"/><ref-count count="44"/><page-count count="11"/><word-count count="0"/></counts><custom-meta-group><custom-meta><meta-name>section-at-acceptance</meta-name><meta-value>Health Informatics</meta-value></custom-meta></custom-meta-group>
</article-meta>
</front>
<body><sec id="s1" sec-type="intro"><label>1</label><title>Introduction</title>
<p>Diabetes mellitus (DM) represents a complex and heterogeneous group of metabolic disorders marked by chronic hyperglycemia due to defects in insulin secretion, insulin action, or both. This multifaceted condition manifests in various phenotypic forms, ranging from autoimmune-mediated type 1 diabetes (T1DM) to the insulin resistance-dominated type 2 diabetes (T2DM) [<xref ref-type="bibr" rid="B1">1</xref>, <xref ref-type="bibr" rid="B2">2</xref>]. Beyond these classifications, atypical and hybrid forms challenge conventional diagnostic frameworks, contributing to diagnostic uncertainty and misclassification [<xref ref-type="bibr" rid="B3">3</xref>, <xref ref-type="bibr" rid="B4">4</xref>]. Management encompasses pharmacological interventions such as insulin therapy, oral antihyperglycemic drugs and/or non-pharmacological strategies, including dietary modifications and structured physical activity, tailored to the individual patient&#x2019;s needs [<xref ref-type="bibr" rid="B5">5</xref>&#x2013;<xref ref-type="bibr" rid="B8">8</xref>].</p>
<p>Maturity-onset diabetes of the young (MODY) is a monogenic type of diabetes that accounts for approximately 1&#x0025;&#x2013;5&#x0025; of all diabetes cases [<xref ref-type="bibr" rid="B3">3</xref>], with data suggesting that prevalence varies by country and remains poorly defined in several regions worldwide [<xref ref-type="bibr" rid="B9">9</xref>, <xref ref-type="bibr" rid="B10">10</xref>]. It is characterized by onset at a young age, typically before 25 years, and an autosomal dominant inheritance pattern [<xref ref-type="bibr" rid="B11">11</xref>&#x2013;<xref ref-type="bibr" rid="B13">13</xref>]. Despite these defining features, MODY is frequently underdiagnosed or misclassified as T1DM or T2DM, with estimates suggesting that up to 80&#x0025; of cases are incorrectly classified [<xref ref-type="bibr" rid="B3">3</xref>, <xref ref-type="bibr" rid="B14">14</xref>]. Genetic testing remains the gold standard for confirming a MODY diagnosis; however, its accessibility is limited by high costs, time requirements, and the need for specialized molecular diagnostics expertise [<xref ref-type="bibr" rid="B8">8</xref>, <xref ref-type="bibr" rid="B15">15</xref>, <xref ref-type="bibr" rid="B16">16</xref>].</p>
<p>To date, 14 monogenic subtypes of MODY have been described, with GCK-MODY and HNF1A-MODY being the most prevalent. Each subtype is associated with pathogenic variants in a specific gene, relatively characteristic clinical features, and distinct treatment requirements [<xref ref-type="bibr" rid="B3">3</xref>, <xref ref-type="bibr" rid="B17">17</xref>, <xref ref-type="bibr" rid="B18">18</xref>]. While certain MODY forms may only require lifestyle modifications, others may benefit from the use of particular pharmacological interventions [<xref ref-type="bibr" rid="B19">19</xref>&#x2013;<xref ref-type="bibr" rid="B22">22</xref>]. However, the clinical presentation of MODY is highly heterogeneous and often overlaps with other forms of diabetes, complicating clinical recognition [<xref ref-type="bibr" rid="B23">23</xref>, <xref ref-type="bibr" rid="B24">24</xref>]. For example, HNF1A-MODY patients are frequently misdiagnosed as T1DM, while GCK-MODY patients are often classified as having T2DM [<xref ref-type="bibr" rid="B25">25</xref>]. Such misclassifications may lead to unnecessary or inappropriate treatment, increased healthcare costs, and avoidable complications [<xref ref-type="bibr" rid="B26">26</xref>, <xref ref-type="bibr" rid="B27">27</xref>]. In this context, timely and accurate genetic diagnosis is essential to enable precision treatment and improve long-term outcomes [<xref ref-type="bibr" rid="B28">28</xref>, <xref ref-type="bibr" rid="B29">29</xref>].</p>
<p>The diagnostic process for MODY involves the analysis of regulatory and coding regions of the 14 known MODY-related genes to identify pathogenic variants. Although the number of genes to be investigated is limited, the number of potential mutations within each gene is substantial, making comprehensive sequencing indispensable for accurate diagnosis. In clinical practice, Sanger sequencing is often employed in a sequential, gene-by-gene approach, prioritizing the gene most likely implicated based on the patient&#x2019;s clinical profile. However, due to the overlapping and ambiguous nature of MODY phenotypes, this process is frequently inefficient, time-consuming, and costly [<xref ref-type="bibr" rid="B10">10</xref>, <xref ref-type="bibr" rid="B30">30</xref>, <xref ref-type="bibr" rid="B31">31</xref>]. These challenges are exacerbated in low and middle-income countries, including much of Latin America, where access to genetic testing is constrained by financial, infrastructural, and logistical limitations [<xref ref-type="bibr" rid="B32">32</xref>, <xref ref-type="bibr" rid="B33">33</xref>]. Moreover, uncertainty regarding pathogenicity assessment, particularly for rare or low-penetrant variants, further complicates diagnostic interpretation [<xref ref-type="bibr" rid="B10">10</xref>, <xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B35">35</xref>].</p>
<p>In recent years, machine learning has emerged as a promising strategy to support MODY identification by leveraging routinely collected clinical data to guide genetic testing more efficiently. Moreover, the development of explainable machine learning models may allow for interpretable predictions [<xref ref-type="bibr" rid="B36">36</xref>], enabling health professionals to better understand the model&#x2019;s results, and thus to make more informed clinical decisions in a cost-effective manner. Statistical and computational models, such as the University of Exeter MODY probability calculator, have demonstrated improved discrimination between MODY and other diabetes types and increased diagnostic yield in selected populations [<xref ref-type="bibr" rid="B37">37</xref>&#x2013;<xref ref-type="bibr" rid="B39">39</xref>]. However, these models were largely developed in White European cohorts and may not generalize optimally to ethnically diverse populations, where differences in phenotype distribution and prevalence of young-onset T2DM are evident [<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B40">40</xref>, <xref ref-type="bibr" rid="B41">41</xref>]. Importantly, existing tools focus on identifying MODY as a broad category and do not address the classification of individual MODY subtypes.</p>
<p>This gap motivates the present study, which aims to evaluate whether explainable machine learning models trained on routinely collected clinical data can predict specific MODY subtypes with meaningful performance. We hypothesize that such models can discriminate between the presence and absence of individual MODY subtypes better than chance while providing interpretable insights into the clinical variables driving these predictions. To this end, we propose a modular proof-of-concept pipeline that frames each MODY subtype as an independent binary classification task, enabling flexible, subtype-specific, and cost-effective clinical evaluation.</p>
</sec>
<sec id="s2" sec-type="methods"><label>2</label><title>Methods</title>
<sec id="s2a"><label>2.1</label><title>Ethical aspects</title>
<p>This study was approved by the Ethics Committee of the Hospital de Cl&#x00ED;nicas Jos&#x00E9; de San Mart&#x00ED;n, Facultad de Medicina, Universidad de Buenos Aires, Buenos Aires, Argentina, and was performed in accordance to the World Medical Association Declaration of Helsinki. Participants provided written informed consent to be included in the study.</p>
<p>The informed consent process was carried out during the clinical consultation by the treating physician. Prior to signing the consent form, patients were provided with an information sheet describing in detail the objectives of the study, the type of data collected, the nature of their participation, and the voluntary and confidential character of the research. Patients were allowed to take this information sheet home and consider their participation without coercion. Data were anonymized prior to analysis, and participants retained the right to withdraw from the study at any time. Clinical care was provided according to standard medical practice and was not affected by the patient&#x2019;s decision to participate or not in the study.</p>
</sec>
<sec id="s2b"><label>2.2</label><title>Dataset</title>
<p>The dataset used in this study was derived from the anonymized medical records of 520 persons with diabetes treated at Hospital de Cl&#x00ED;nicas Jos&#x00E9; de San Mart&#x00ED;n. From this cohort, 222 patients underwent Sanger sequencing for GCK-MODY, and 122 for HNF1A-MODY, confirming or discarding the respective diagnosis. In addition, 4 subjects were tested for HNF4A-MODY or HNF1B-MODY, however they were excluded from further analyses due to the very limited sample size. Only records with confirmed Sanger test results for GCK-MODY or HNF1A-MODY were retained, while all remaining entries were discarded.</p>
<p>Approximately 39&#x0025; of the dataset consisted of missing values. To address this problem and maximize data usability for machine learning modelling, different imputation strategies were implemented, as detailed in <xref ref-type="sec" rid="s2d1a">Section 2.4.1.1</xref>.</p>
<sec id="s2b1"><label>2.2.1</label><title>Predictive variables</title>
<p>The dataset comprised 7 variables for prediction, obtained from clinical interviews, physical examination, and routine enzymatic blood laboratory assays. These data were recorded on standard clinical practice by physicians specialized in MODY management during patient assessment and follow-up. These included two binary features (sex and family history of diabetes) and five numerical variables (body mass index &#x2014;BMI&#x2014;, blood glycosylated hemoglobin &#x2014;HbA1c&#x2014; levels, age at diagnosis, fasting blood glucose levels, and post-load blood glucose levels 120&#x2009;min after the consumption of 75&#x2009;g of glucose). These variables are routinely used in the medical practice of Hospital de Cl&#x00ED;nicas Jos&#x00E9; de San Mart&#x00ED;n to distinguish persons with MODY from persons with other diabetes phenotypes or from persons without diabetes, and to partially guide medical suspicion regarding which persons are candidates for specific pathogenic variants in MODY-related genes. The selection of these 7 variables was based on prior clinical knowledge of their association with certain cases of GCK-MODY and HNF1A-MODY, as well as on their availability in the hospital records.</p>
</sec>
<sec id="s2b2"><label>2.2.2</label><title>Target variables</title>
<p>Given that MODY variants are not mutually exclusive, binary classification models were trained separately for each target class. Therefore, the dataset was partitioned based on Sanger sequencing results to create distinct subsets for GCK-MODY (162 positive and 60 negative) and HNF1A-MODY (48 positive and 74 negative).</p>
</sec>
</sec>
<sec id="s2c"><label>2.3</label><title>Exploratory statistical analysis</title>
<p>Univariate analyses were performed to assess differences between the MODY-positive and MODY-negative groups across clinical and biochemical variables. For this purpose, the analyses were conducted using the original data, without applying any preprocessing steps. Continuous variables were first tested for normality using the Shapiro-Wilk test. If normally distributed, comparisons were made using the <italic>t</italic>-test and summarized with mean and standard deviation; otherwise, the Mann-Whitney U test was used, reporting median and interquartile range. For categorical variables, group differences were evaluated using Fisher&#x2019;s exact test, with frequencies and percentages reported. Statistical significance was set at <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM1"><mml:mi>p</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mn>0.05</mml:mn></mml:math></inline-formula>.</p>
<p>In addition to the statistical analysis, the pipeline included the generation of histograms, count plots, and boxplots for selected variables, grouped by class. A global histogram grid was produced to visualize the distribution of all features through density plots. A heatmap of the correlation matrix was computed using Pearson correlation coefficients, and principal component analysis was performed after autoscaling the data as a pretreatment step.</p>
</sec>
<sec id="s2d"><label>2.4</label><title>Machine learning pipeline</title>
<p>A comprehensive machine learning workflow was designed and implemented to address a binary classification task, with the aim of systematically evaluating the impact of different methodological choices on predictive performance. The pipeline encompassed data preprocessing, multiple classification techniques, and model optimization and evaluation, including hyperparameter tuning, validation, and performance assessment. To ensure robustness and to assess the sensitivity of the models to data characteristics, both imputed and non-imputed datasets were analyzed, as well as balanced and imbalanced versions of the data. This strategy enabled a controlled comparison of multiple machine learning algorithms with heterogeneous assumptions and modeling capacities, providing a broad perspective on their suitability for the problem under investigation.</p>
<sec id="s2d1"><label>2.4.1</label><title>Data preprocessing</title>
<sec id="s2d1a"><label>2.4.1.1</label><title>Missing values and class imbalance</title>
<p>Missing values were handled using two imputation methods: Multivariate Imputation by Chained Equations (MICE) [<xref ref-type="bibr" rid="B42">42</xref>] and KNN Imputer [<xref ref-type="bibr" rid="B43">43</xref>]. To evaluate the impact of missing values, we created three levels of datasets:</p>
<p>
<list list-type="bullet">
<list-item>
<p>Zero Imputation: Using only records with no missing values.</p></list-item>
<list-item>
<p>Half Impute: Including records with fewer than three missing variables.</p></list-item>
<list-item>
<p>Full Dataset: Incorporating all records with imputed values.</p></list-item>
</list>For datasets with half and full imputation, both MICE and KNN Imputer methods were evaluated. To address class imbalance, oversampling dataset was additionally evaluated. The oversampling was applied only for the training split. In total, five base datasets (one original and four imputed) were generated, each with and without oversampling, resulting in ten datasets used for model evaluation.</p>
</sec>
<sec id="s2d1b"><label>2.4.1.2</label><title>Data splitting and normalization</title>
<p>Data preprocessing included 80/20 training-validation split, stratified by class to preserve target proportions, and feature standardization using StandardScaler to ensure zero mean and unit variance on training data for both binary and numerical variables.</p>
</sec>
</sec>
<sec id="s2d2"><label>2.4.2</label><title>Evaluated classification techniques</title>
<p>The assessed algorithms can be broadly categorized into traditional parametric models and more flexible non-parametric approaches. The parametric models assume an explicit functional form linking the input features to the class membership probability. This group includes Logistic Regression, which models the probability of the positive class through a sigmoid transformation of a linear predictor; the Perceptron and the Stochastic Gradient Descent Classifier (SGD), which iteratively optimize linear decision functions by minimizing a specified loss function; Linear Discriminant Analysis (LDA), which derives linear decision boundaries under the assumption of Gaussian class-conditional distributions with shared covariance matrices; and Partial Least Squares Discriminant Analysis (PLS-DA), which projects the predictors onto a latent space that maximizes covariance with the response variable. In contrast, non-parametric or semi-parametric models impose fewer assumptions on the data-generating process and are capable of capturing complex non-linear relationships. These include Support Vector Machine (SVC) with kernel functions, which construct optimal separating hyperplanes in high-dimensional feature spaces; K-Nearest Neighbors (KNeighbors), which performs classification based on local similarity among observations; Gaussian Naive Bayes (GaussianNB), which combines simple probabilistic assumptions with empirical estimates of feature distributions; and ensemble-based methods such as Random Forest and XGBClassifier (XGBoost), which aggregate multiple decision trees to enhance predictive accuracy and generalization. Together, these 10 complementary modeling paradigms enabled a comprehensive assessment of classification performance across a wide range of statistical assumptions and learning capacities.</p>
</sec>
<sec id="s2d3"><label>2.4.3</label><title>Model optimization and evaluation</title>
<p>For each machine learning algorithm, a hyperparameter search space was defined (<xref ref-type="sec" rid="s12">Supplementary Table S1</xref>) and systematically explored using grid search implemented via Ray Tune in combination with GridSearchCV. The model configuration achieving the highest Receiver Operating Characteristic Area Under the Curve (ROC AUC) on the training data was identified as the best-performing model. This model was then evaluated on the validation split to compute performance metrics, including ROC AUC, sensitivity, specificity, positive predictive value (PPV), negative predictive value (NPV), and Brier score, which were retained for subsequent analysis. Model validation was performed using bootstrap resampling to generate multiple randomized training and validation splits, enhancing robustness against data variability. For each bootstrap iteration, a hyperparameter search was conducted. Finally, the metrics were averaged across bootstrap iterations to obtain stable and unbiased performance estimates.</p>
</sec>
</sec>
<sec id="s2e"><label>2.5</label><title>Explainability</title>
<p>To provide the explainability of machine learning models, SHapley Additive exPlanations (SHAP) values [<xref ref-type="bibr" rid="B44">44</xref>] were employed. SHAP is a unified framework based on cooperative game theory that assigns each feature a consistent and locally accurate importance value for individual predictions. Unlike other explainability techniques, SHAP is model-agnostic, allowing its application to any machine learning model regardless of its internal architecture or training process, thus enabling robust and interpretable explanations across different classifiers.</p>
</sec>
<sec id="s2f"><label>2.6</label><title>Software</title>
<p>The pipeline was implemented in Python (version 3.10) and executed within a Miniconda environment. The main libraries used include <monospace>scikit-learn</monospace>, <monospace>shap</monospace>, <monospace>imbalanced-learn</monospace>, <monospace>xgboost</monospace>, and <monospace>ray-tune</monospace>. To facilitate reproducibility, the complete codebase, environment specifications, and anonymized datasets required to rerun the same experiments are publicly available at <ext-link ext-link-type="uri" xlink:href="https://github.com/ifiguero/mody_2024">https://github.com/ifiguero/mody&#x005F;2024</ext-link>.</p>
</sec>
</sec>
<sec id="s3" sec-type="results"><label>3</label><title>Results</title>
<sec id="s3a"><label>3.1</label><title>GCK-MODY dataset</title>
<sec id="s3a1"><label>3.1.1</label><title>Univariate and preliminary analysis</title>
<p>Univariate analyses of the zero-imputation GCK-MODY dataset are summarized in <xref ref-type="table" rid="T1">Table&#x00A0;1A</xref> and provide an initial characterization of differences between diagnostic groups. Three metabolic variables&#x2014;fasting glucose, post-load glucose, and HbA1c&#x2014;were significantly associated with diagnostic status, with individuals carrying a positive GCK-MODY diagnosis exhibiting higher mean values across all three parameters. These findings establish a baseline description of the variables most strongly linked to diagnostic outcomes and motivate further exploratory analyses aimed at understanding how these differences manifest across the data.</p>
<table-wrap id="T1" position="float"><label>Table&#x00A0;1</label>
<caption><p>Characteristics of the study groups.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Variable</th>
<th valign="top" align="left">Unit</th>
<th valign="top" align="center">Negative (<italic>n</italic>&#x2009;&#x003D;&#x2009;30)</th>
<th valign="top" align="center">Positive (<italic>n</italic>&#x2009;&#x003D;&#x2009;43)</th>
<th valign="top" align="center"><italic>p</italic> value</th>
<th valign="top" align="center">Total (<italic>n</italic>&#x2009;&#x003D;&#x2009;73)</th>
</tr>
</thead>
<tbody>
<tr>
<th valign="top" align="left" colspan="6">(A) GCK-MODY positive and negative patients</th>
</tr>
<tr>
<td valign="top" align="left">Age at diagnosis</td>
<td valign="top" align="left">years</td>
<td valign="top" align="center">14.5 (11.0&#x2013;30.0)</td>
<td valign="top" align="center">12.0 (10.0&#x2013;20.5)</td>
<td valign="top" align="center">0.085 (NS)</td>
<td valign="top" align="center">13.0 (10.0&#x2013;24.0)</td>
</tr>
<tr>
<td valign="top" align="left">Body mass index</td>
<td valign="top" align="left">Kg/m<sup>2</sup></td>
<td valign="top" align="center">20.8 <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM2"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 3.5</td>
<td valign="top" align="center">20.1 <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM3"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 3.1</td>
<td valign="top" align="center">0.372 (NS)</td>
<td valign="top" align="center">20.4 <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM4"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 3.2</td>
</tr>
<tr>
<td valign="top" align="left">Fasting glucose</td>
<td valign="top" align="left">mg/dL</td>
<td valign="top" align="center">113 (100&#x2013;117)</td>
<td valign="top" align="center">119 (114&#x2013;126)</td>
<td valign="top" align="center">0.011 (*)</td>
<td valign="top" align="center">115 (109&#x2013;124)</td>
</tr>
<tr>
<td valign="top" align="left">Post-load glucose</td>
<td valign="top" align="left">mg/dL</td>
<td valign="top" align="center">137 (119&#x2013;150)</td>
<td valign="top" align="center">147 (136&#x2013;160)</td>
<td valign="top" align="center">0.037 (*)</td>
<td valign="top" align="center">144 (131&#x2013;158)</td>
</tr>
<tr>
<td valign="top" align="left">HbA1c</td>
<td valign="top" align="left">&#x0025;</td>
<td valign="top" align="center">5.9 <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM5"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.6</td>
<td valign="top" align="center">6.2 <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM6"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.4</td>
<td valign="top" align="center">0.027 (*)</td>
<td valign="top" align="center">6.1 <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM7"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.5</td>
</tr>
<tr>
<td valign="top" align="left">Sex&#x2003;(Female)</td>
<td valign="top" align="left">&#x0025;</td>
<td valign="top" align="center">60.0 (18/30)</td>
<td valign="top" align="center">62.8 (27/43)</td>
<td valign="top" align="center">0.812 (NS)</td>
<td valign="top" align="center">61.6 (45/73)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;&#x2003;(Male)</td>
<td valign="top" align="left">&#x0025;</td>
<td valign="top" align="center">40.0 (12/30)</td>
<td valign="top" align="center">37.2 (16/43)</td>
<td valign="top" align="center"/>
<td valign="top" align="center">38.4 (28/73)</td>
</tr>
<tr>
<td valign="top" align="left">Family history</td>
<td valign="top" align="left">&#x0025;</td>
<td valign="top" align="center">90.0 (27/30)</td>
<td valign="top" align="center">97.7 (42/43)</td>
<td valign="top" align="center">0.299 (NS)</td>
<td valign="top" align="center">94.5 (69/73)</td>
</tr>
<tr>
<th valign="top" align="left">Variable</th>
<th valign="top" align="left">Unit</th>
<th valign="top" align="center">Negative (<italic>n</italic>&#x2009;&#x003D;&#x2009;19)</th>
<th valign="top" align="center">Positive (<italic>n</italic>&#x2009;&#x003D;&#x2009;7)</th>
<th valign="top" align="center"><italic>p</italic> value</th>
<th valign="top" align="center">Total (<italic>n</italic>&#x2009;&#x003D;&#x2009;26)</th>
</tr>
<tr>
<th valign="top" align="left" colspan="6">(B) HNF1A-MODY positive and negative patients</th>
</tr>
<tr>
<td valign="top" align="left">Age at diagnosis</td>
<td valign="top" align="left">years</td>
<td valign="top" align="center">23.0 (12.0&#x2013;29.0)</td>
<td valign="top" align="center">11.0 (11.0&#x2013;19.0)</td>
<td valign="top" align="center">0.132 (NS)</td>
<td valign="top" align="center">19.0 (11.0&#x2013;26.0)</td>
</tr>
<tr>
<td valign="top" align="left">Body mass index</td>
<td valign="top" align="left">Kg/m<sup>2</sup></td>
<td valign="top" align="center">20.9 (18.0&#x2013;23.0)</td>
<td valign="top" align="center">19.2 (18.0&#x2013;22.1)</td>
<td valign="top" align="center">0.977 (NS)</td>
<td valign="top" align="center">19.6 (18.0&#x2013;23.0)</td>
</tr>
<tr>
<td valign="top" align="left">Fasting glucose</td>
<td valign="top" align="left">mg/dL</td>
<td valign="top" align="center">111.2 <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM8"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 18.4</td>
<td valign="top" align="center">106.0 <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM9"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 15.1</td>
<td valign="top" align="center">0.476 (NS)</td>
<td valign="top" align="center">109.8 <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM10"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 17.5</td>
</tr>
<tr>
<td valign="top" align="left">Post-load glucose</td>
<td valign="top" align="left">mg/dL</td>
<td valign="top" align="center">198.2 <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM11"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 66.3</td>
<td valign="top" align="center">268.4 <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM12"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 78.6</td>
<td valign="top" align="center">0.063 (NS)</td>
<td valign="top" align="center">217.1 <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM13"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 75.2</td>
</tr>
<tr>
<td valign="top" align="left">HbA1c</td>
<td valign="top" align="left">&#x0025;</td>
<td valign="top" align="center">6.1 (5.7&#x2013;6.5)</td>
<td valign="top" align="center">6.6 (5.9&#x2013;6.7)</td>
<td valign="top" align="center">0.469 (NS)</td>
<td valign="top" align="center">6.2 (5.7&#x2013;6.7)</td>
</tr>
<tr>
<td valign="top" align="left">Sex &#x2003;(Female)</td>
<td valign="top" align="left">&#x0025;</td>
<td valign="top" align="center">68.4 (13/19)</td>
<td valign="top" align="center">42.9 (3/7)</td>
<td valign="top" align="center">0.369 (NS)</td>
<td valign="top" align="center">61.5 (16/26)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;&#x2003;(Male)</td>
<td valign="top" align="left">&#x0025;</td>
<td valign="top" align="center">31.6 (6/19)</td>
<td valign="top" align="center">57.1 (4/7)</td>
<td valign="top" align="center"/>
<td valign="top" align="center">38.5 (10/26)</td>
</tr>
<tr>
<td valign="top" align="left">Family history</td>
<td valign="top" align="left">&#x0025;</td>
<td valign="top" align="center">100.0 (19/19)</td>
<td valign="top" align="center">85.7 (6/7)</td>
<td valign="top" align="center">0.269 (NS)</td>
<td valign="top" align="center">96.2 (25/26)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TF1"><p>Continuous variables with normal distribution are presented as mean &#x002B; standard deviation. Continuous variables with non-normal distribution are presented as median (interquartile range). Categorical variables are presented as percentage (proportion). &#x002A;: <italic>p</italic> &#x003C; 0.05. NS: not significant.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>To visually assess these univariate associations, boxplots (<xref ref-type="sec" rid="s12">Supplementary Figures S1A&#x2013;S2A</xref>) were first examined, revealing observable shifts in central tendency between diagnostic classes for several continuous predictors in the Full dataset. These patterns were complemented by distribution plots (<xref ref-type="sec" rid="s12">Supplementary Figure S3A</xref>), which further illustrated differences in the overall shape and spread of both continuous and binary variables across groups. Despite these visible shifts, substantial overlap between distributions was consistently observed, indicating that individual variables, when considered in isolation, provide limited discriminative power for separating diagnostic classes.</p>
<p>Building on these observations, correlation maps (<xref ref-type="sec" rid="s12">Supplementary Figure S4A</xref>) were used to examine relationships among predictors and their association with diagnostic status. Age at diagnosis showed negative correlations with most variables, whereas family history of diabetes, HbA1c, fasting glucose, and post-load glucose were positively correlated with a positive GCK-MODY diagnosis, reinforcing the trends identified in the univariate analyses. Finally, principal component analysis biplots (<xref ref-type="fig" rid="F1">Figure&#x00A0;1A</xref>) provided a multivariate overview of the data structure. While some tendency toward clustering by diagnostic status was apparent, considerable intra-class dispersion and partial overlap between groups persisted. Collectively, these results highlight a complex and partially overlapping feature space, underscoring the need for multivariate and machine learning approaches to effectively address the classification task.</p>
<fig id="F1" position="float"><label>Figure&#x00A0;1</label>
<caption><p>Principal Component Analysis (PCA) of MODY datasets. <bold>(A)</bold> PCA of the GCK-MODY dataset and <bold>(B)</bold> PCA of the HNF1A-MODY dataset. Each point represents an individual sample projected onto the first two principal components. Samples with pathogenic variants confirmed by Sanger sequencing are shown in red, while samples testing negative are shown in blue. Variable loadings corresponding to the input features are overlaid, illustrating their contributions to the principal component space. <italic>BMI</italic> corresponds to body mass index, <italic>base glu</italic> to fasting blood glucose levels, <italic>glu 120</italic> to post-load blood glucose levels 120&#x2009;min after the consumption of 75&#x2009;g of glucose.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-08-1656161-g001.tif"><alt-text content-type="machine-generated">Principal component analysis (PCA) biplots. Panel A displays the GCK-MODY dataset and Panel B the HNF1A-MODY dataset. Blue and red points represent negative and positive cases respectively, showing data spread along two principal components, with feature loadings indicated by green vectors for variables such as age, family history, glucose, HbA1c, sex, and BMI.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3a2"><label>3.1.2</label><title>Evaluation of machine learning models</title>
<p>A total of 2,500 machine learning models were trained and evaluated for GCK-MODY prediction. Performance metrics are summarized in <xref ref-type="table" rid="T2">Table&#x00A0;2A</xref> for ROC AUC and in <xref ref-type="sec" rid="s12">Supplementary Tables S2A&#x2013;S6A</xref> for sensitivity, specificity, PPV, NPV, and Brier score. The final hyperparameter configurations for the best-performing models are reported in <xref ref-type="sec" rid="s12">Supplementary Table S7</xref>.</p>
<table-wrap id="T2" position="float"><label>Table&#x00A0;2</label>
<caption><p>Average ROC AUC scores for the classification models.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left"/>
<th valign="top" align="center" colspan="2">Zero</th>
<th valign="top" align="center" colspan="2">Half KNN</th>
<th valign="top" align="center" colspan="2">Half MICE</th>
<th valign="top" align="center" colspan="2">Full KNN</th>
<th valign="top" align="center" colspan="2">Full MICE</th>
</tr>
<tr>
<th valign="top" align="left">Model</th>
<th valign="top" align="center">unb</th>
<th valign="top" align="center">ovr</th>
<th valign="top" align="center">unb</th>
<th valign="top" align="center">ovr</th>
<th valign="top" align="center">unb</th>
<th valign="top" align="center">ovr</th>
<th valign="top" align="center">unb</th>
<th valign="top" align="center">ovr</th>
<th valign="top" align="center">unb</th>
<th valign="top" align="center">ovr</th>
</tr>
</thead>
<tbody>
<tr>
<th valign="top" align="left" colspan="11">(A) Performance (ROC AUC) for the GCK-MODY dataset</th>
</tr>
<tr>
<td valign="top" align="left">GaussianNB</td>
<td valign="top" align="center">0.653</td>
<td valign="top" align="center">0.664</td>
<td valign="top" align="center">0.654</td>
<td valign="top" align="center">0.671</td>
<td valign="top" align="center">0.656</td>
<td valign="top" align="center">0.674</td>
<td valign="top" align="center"><bold>0.652</bold></td>
<td valign="top" align="center"><bold>0.682</bold></td>
<td valign="top" align="center"><bold>0.702</bold></td>
<td valign="top" align="center"><underline><bold>0.724</bold></underline></td>
</tr>
<tr>
<td valign="top" align="left">KNeighbors</td>
<td valign="top" align="center">0.589</td>
<td valign="top" align="center">0.630</td>
<td valign="top" align="center">0.649</td>
<td valign="top" align="center"><bold>0.673</bold></td>
<td valign="top" align="center">0.625</td>
<td valign="top" align="center">0.639</td>
<td valign="top" align="center">0.593</td>
<td valign="top" align="center">0.661</td>
<td valign="top" align="center">0.612</td>
<td valign="top" align="center"><underline>0.678</underline></td>
</tr>
<tr>
<td valign="top" align="left">LDA</td>
<td valign="top" align="center">0.639</td>
<td valign="top" align="center"><underline><bold>0.687</bold></underline></td>
<td valign="top" align="center">0.663</td>
<td valign="top" align="center">0.667</td>
<td valign="top" align="center"><bold>0.686</bold></td>
<td valign="top" align="center">0.672</td>
<td valign="top" align="center">0.592</td>
<td valign="top" align="center">0.612</td>
<td valign="top" align="center">0.623</td>
<td valign="top" align="center">0.665</td>
</tr>
<tr>
<td valign="top" align="left">Logistic regression</td>
<td valign="top" align="center">0.648</td>
<td valign="top" align="center">0.669</td>
<td valign="top" align="center"><bold>0.666</bold></td>
<td valign="top" align="center">0.664</td>
<td valign="top" align="center">0.661</td>
<td valign="top" align="center"><underline><bold>0.680</bold></underline></td>
<td valign="top" align="center">0.577</td>
<td valign="top" align="center">0.609</td>
<td valign="top" align="center">0.603</td>
<td valign="top" align="center">0.670</td>
</tr>
<tr>
<td valign="top" align="left">Perceptron</td>
<td valign="top" align="center">0.618</td>
<td valign="top" align="center">0.642</td>
<td valign="top" align="center">0.616</td>
<td valign="top" align="center"><underline>0.665</underline></td>
<td valign="top" align="center">0.618</td>
<td valign="top" align="center">0.592</td>
<td valign="top" align="center">0.534</td>
<td valign="top" align="center">0.522</td>
<td valign="top" align="center">0.602</td>
<td valign="top" align="center">0.558</td>
</tr>
<tr>
<td valign="top" align="left">PLS DA</td>
<td valign="top" align="center"><bold>0.657</bold></td>
<td valign="top" align="center"><underline><bold>0.687</bold></underline></td>
<td valign="top" align="center">0.656</td>
<td valign="top" align="center">0.657</td>
<td valign="top" align="center">0.673</td>
<td valign="top" align="center">0.667</td>
<td valign="top" align="center">0.580</td>
<td valign="top" align="center">0.608</td>
<td valign="top" align="center">0.577</td>
<td valign="top" align="center">0.654</td>
</tr>
<tr>
<td valign="top" align="left">Random forest</td>
<td valign="top" align="center">0.611</td>
<td valign="top" align="center">0.620</td>
<td valign="top" align="center">0.665</td>
<td valign="top" align="center">0.667</td>
<td valign="top" align="center">0.674</td>
<td valign="top" align="center"><underline>0.679</underline></td>
<td valign="top" align="center">0.630</td>
<td valign="top" align="center">0.662</td>
<td valign="top" align="center">0.670</td>
<td valign="top" align="center">0.676</td>
</tr>
<tr>
<td valign="top" align="left">SGD</td>
<td valign="top" align="center">0.653</td>
<td valign="top" align="center">0.626</td>
<td valign="top" align="center">0.663</td>
<td valign="top" align="center">0.629</td>
<td valign="top" align="center">0.674</td>
<td valign="top" align="center"><underline>0.670</underline></td>
<td valign="top" align="center">0.568</td>
<td valign="top" align="center">0.617</td>
<td valign="top" align="center">0.613</td>
<td valign="top" align="center">0.656</td>
</tr>
<tr>
<td valign="top" align="left">SVC</td>
<td valign="top" align="center">0.651</td>
<td valign="top" align="center"><underline>0.669</underline></td>
<td valign="top" align="center">0.650</td>
<td valign="top" align="center">0.655</td>
<td valign="top" align="center">0.661</td>
<td valign="top" align="center">0.668</td>
<td valign="top" align="center">0.650</td>
<td valign="top" align="center">0.631</td>
<td valign="top" align="center">0.654</td>
<td valign="top" align="center"><underline>0.669</underline></td>
</tr>
<tr>
<td valign="top" align="left">XGBoost</td>
<td valign="top" align="center">0.600</td>
<td valign="top" align="center">0.620</td>
<td valign="top" align="center">0.624</td>
<td valign="top" align="center">0.585</td>
<td valign="top" align="center">0.631</td>
<td valign="top" align="center">0.636</td>
<td valign="top" align="center">0.612</td>
<td valign="top" align="center">0.639</td>
<td valign="top" align="center">0.637</td>
<td valign="top" align="center"><underline>0.658</underline></td>
</tr>
<tr>
<th valign="top" align="left" colspan="11">(B) Performance (ROC AUC) for the HNF1A-MODY dataset</th>
</tr>
<tr>
<td valign="top" align="left">GaussianNB</td>
<td valign="top" align="center"><bold>0.515</bold></td>
<td valign="top" align="center">0.510</td>
<td valign="top" align="center">0.524</td>
<td valign="top" align="center">0.573</td>
<td valign="top" align="center">0.517</td>
<td valign="top" align="center">0.554</td>
<td valign="top" align="center">0.591</td>
<td valign="top" align="center">0.579</td>
<td valign="top" align="center"><underline>0.625</underline></td>
<td valign="top" align="center">0.619</td>
</tr>
<tr>
<td valign="top" align="left">KNeighbors</td>
<td valign="top" align="center">0.480</td>
<td valign="top" align="center">0.550</td>
<td valign="top" align="center"><bold>0.576</bold></td>
<td valign="top" align="center"><bold>0.628</bold></td>
<td valign="top" align="center"><bold>0.559</bold></td>
<td valign="top" align="center"><bold>0.587</bold></td>
<td valign="top" align="center">0.563</td>
<td valign="top" align="center">0.531</td>
<td valign="top" align="center"><underline>0.632</underline></td>
<td valign="top" align="center">0.623</td>
</tr>
<tr>
<td valign="top" align="left">LDA</td>
<td valign="top" align="center">0.485</td>
<td valign="top" align="center">0.550</td>
<td valign="top" align="center">0.506</td>
<td valign="top" align="center"><underline>0.588</underline></td>
<td valign="top" align="center">0.511</td>
<td valign="top" align="center">0.585</td>
<td valign="top" align="center">0.482</td>
<td valign="top" align="center">0.537</td>
<td valign="top" align="center">0.463</td>
<td valign="top" align="center">0.549</td>
</tr>
<tr>
<td valign="top" align="left">Logistic regression</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.510</td>
<td valign="top" align="center">0.497</td>
<td valign="top" align="center"><underline>0.581</underline></td>
<td valign="top" align="center">0.492</td>
<td valign="top" align="center">0.579</td>
<td valign="top" align="center">0.481</td>
<td valign="top" align="center">0.539</td>
<td valign="top" align="center">0.487</td>
<td valign="top" align="center">0.545</td>
</tr>
<tr>
<td valign="top" align="left">Perceptron</td>
<td valign="top" align="center">0.505</td>
<td valign="top" align="center"><underline>0.555</underline></td>
<td valign="top" align="center">0.550</td>
<td valign="top" align="center">0.512</td>
<td valign="top" align="center">0.549</td>
<td valign="top" align="center">0.528</td>
<td valign="top" align="center">0.507</td>
<td valign="top" align="center">0.540</td>
<td valign="top" align="center">0.478</td>
<td valign="top" align="center">0.468</td>
</tr>
<tr>
<td valign="top" align="left">PLS DA</td>
<td valign="top" align="center">0.490</td>
<td valign="top" align="center">0.540</td>
<td valign="top" align="center">0.511</td>
<td valign="top" align="center"><underline>0.603</underline></td>
<td valign="top" align="center">0.496</td>
<td valign="top" align="center">0.583</td>
<td valign="top" align="center">0.485</td>
<td valign="top" align="center">0.541</td>
<td valign="top" align="center">0.461</td>
<td valign="top" align="center">0.555</td>
</tr>
<tr>
<td valign="top" align="left">Random forest</td>
<td valign="top" align="center">0.440</td>
<td valign="top" align="center"><bold>0.565</bold></td>
<td valign="top" align="center">0.525</td>
<td valign="top" align="center">0.568</td>
<td valign="top" align="center">0.543</td>
<td valign="top" align="center">0.551</td>
<td valign="top" align="center"><bold>0.619</bold></td>
<td valign="top" align="center"><bold>0.639</bold></td>
<td valign="top" align="center"><bold><underline>0.712</underline></bold></td>
<td valign="top" align="center"><bold>0.703</bold></td>
</tr>
<tr>
<td valign="top" align="left">SGD</td>
<td valign="top" align="center">0.475</td>
<td valign="top" align="center">0.545</td>
<td valign="top" align="center">0.496</td>
<td valign="top" align="center">0.560</td>
<td valign="top" align="center">0.529</td>
<td valign="top" align="center">0.529</td>
<td valign="top" align="center">0.467</td>
<td valign="top" align="center">0.541</td>
<td valign="top" align="center">0.479</td>
<td valign="top" align="center"><underline>0.569</underline></td>
</tr>
<tr>
<td valign="top" align="left">SVC</td>
<td valign="top" align="center">0.505</td>
<td valign="top" align="center">0.465</td>
<td valign="top" align="center">0.489</td>
<td valign="top" align="center">0.530</td>
<td valign="top" align="center">0.490</td>
<td valign="top" align="center">0.528</td>
<td valign="top" align="center">0.526</td>
<td valign="top" align="center">0.590</td>
<td valign="top" align="center">0.602</td>
<td valign="top" align="center"><underline>0.627</underline></td>
</tr>
<tr>
<td valign="top" align="left">XGBoost</td>
<td valign="top" align="center">0.495</td>
<td valign="top" align="center">0.545</td>
<td valign="top" align="center">0.504</td>
<td valign="top" align="center">0.562</td>
<td valign="top" align="center">0.498</td>
<td valign="top" align="center">0.560</td>
<td valign="top" align="center">0.589</td>
<td valign="top" align="center">0.600</td>
<td valign="top" align="center">0.665</td>
<td valign="top" align="center"><underline>0.673</underline></td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TF2"><p>For each dataset (Zero, Half KNN, Half MICE, Full KNN and Full MICE), two separate columns are shown: one for the original unbalanced sample (unb) and the other for the oversampled data (ovr). Bold indicates the best score for the sample, and underlining indicates the best score for the technique. </p></fn>
</table-wrap-foot>
</table-wrap>
<p>GaussianNB classifier achieved the highest predictive performance, with a ROC AUC of 0.724, using the dataset processed with full MICE imputation and oversampling. This result reflects moderate discrimination ability. KNeighbors, LDA, logistic regression, and PLS-DA models achieved comparable performance. In contrast, the XGBoost yielded the lowest predictive power, with a ROC AUC of 0.658 under the same preprocessing conditions.</p>
<p>Across models, oversampled datasets consistently outperformed their unbalanced counterparts, and MICE-based imputations generally resulted in better performance than KNN imputations. Calibration analysis based on the Brier score showed that GaussianNB exhibited improved calibration when trained on oversampled data, whereas oversampling led to increased miscalibration for all other models.</p>
<p>The best-performing GCK-MODY model achieved a sensitivity of 0.885 and a specificity of 0.570, with a PPV of 0.853 and an NPV of 0.656. These results indicate good ability to correctly identify true GCK-MODY cases, albeit at the expense of limited performance in correctly identifying true negatives.</p>
<p>Overall, these results indicate that machine learning models trained on routinely collected clinical variables can discriminate GCK-MODY cases from non-cases better than chance, albeit with moderate accuracy. The observed performance supports the feasibility of predicting GCK-MODY subtypes.</p>
</sec>
<sec id="s3a3"><label>3.1.3</label><title>Models&#x2019; explainability according to SHAP values</title>
<p>To interpret the predictions of the best-performing GCK-MODY model, SHAP-based explainability analyses were conducted. <xref ref-type="fig" rid="F2">Figure&#x00A0;2A</xref> summarizes the overall contribution of each variable to the model&#x2019;s predictions. age at diagnosis, fasting glucose and HbA1c emerged as the most influential factors driving positive GCK-MODY predictions. Conversely, fasting glucose, BMI, post-load glucose, and age at diagnosis were the primary contributors to negative predictions.</p>
<fig id="F2" position="float"><label>Figure&#x00A0;2</label>
<caption><p>SHAP decision plots illustrating feature contributions to model predictions. <bold>(A)</bold> Gaussian Naive Bayes model trained on the GCK-MODY dataset and <bold>(B)</bold> Random Forest model trained on the HNF1A-MODY dataset. Each line represents an individual subject, with model predictions starting from the expected value at the bottom of the plot. Rows correspond to input variables, and horizontal shifts indicate the contribution of each variable to the final prediction. Lines colored in red correspond to positive class predictions (class 1), while lines in blue correspond to negative class predictions (class 0). <italic>BMI</italic> corresponds to body mass index, <italic>base glu</italic> to fasting blood glucose levels, <italic>glu 120</italic> to post-load blood glucose levels 120&#x2009;min after the consumption of 75&#x2009;g of glucose.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-08-1656161-g002.tif"><alt-text content-type="machine-generated">SHAP decision plots highlighting feature contributions for individual predictions are presented as two line graphs. Panel A displays results for a Gaussian Naive Bayes model applied to the GCK-MODY dataset, with model output values on the x-axis and features such as age at diagnosis, glucose, BMI, and sex on the y-axis. Lines are colored from blue (lower output values) to red (higher output values). The final position of each line on the x-axis determines the predicted class (0 or 1). Panel B shows a similar plot for a Random Forest model applied to the HNF1A-MODY dataset using the same features and color scheme, with lines illustrating different prediction paths.</alt-text>
</graphic>
</fig>
<p>Although the relative importance of predictors varied across individual samples, the model consistently relied on variables that were also statistically significant in univariate analyses, namely fasting glucose, post-load glucose, and HbA1c. This concordance between traditional statistical analysis and model explainability supports the internal coherence of the predictive framework. Additionally, <xref ref-type="sec" rid="s12">Supplementary Figure S5</xref> illustrates SHAP explanations at the individual level, demonstrating how specific variables influenced model predictions for selected samples and enabling transparent case-by-case assessment.</p>
<p>Importantly, the alignment between SHAP-derived feature contributions and clinically established biomarkers of GCK-MODY indicates that the model&#x2019;s predictions are driven by meaningful and interpretable clinical signals. This supports the hypothesis that explainable machine learning approaches can provide transparent insights into the variables underlying subtype-specific MODY predictions.</p>
</sec>
</sec>
<sec id="s3b"><label>3.2</label><title>HNF1A-MODY dataset</title>
<sec id="s3b1"><label>3.2.1</label><title>Univariate and preliminary analysis</title>
<p>Univariate analysis results for the zero-imputation HNF1A-MODY dataset are reported in <xref ref-type="table" rid="T1">Table&#x00A0;1B</xref> and reveal a markedly different pattern compared with the GCK-MODY cohort. None of the evaluated variables reached statistical significance at the univariate level. Nonetheless, post-load glucose and age at diagnosis exhibited trends toward association with diagnostic status, with <italic>p</italic>-values of 0.063 and 0.132, respectively, suggesting weak and inconclusive signals that warrant further multivariate investigation.</p>
<p>Univariate visualizations were subsequently examined to further characterize these patterns. Box plots of continuous variables and summaries of binary predictors (<xref ref-type="sec" rid="s12">Supplementary Figures S1B&#x2013;S2B</xref>) showed only modest differences in central tendency between diagnostic classes, with substantial overlap across groups. Distribution plots (<xref ref-type="sec" rid="s12">Supplementary Figure S3B</xref>) reinforced this observation, indicating limited discriminative capacity when individual variables were considered in isolation. Consistent with these findings, correlation maps (<xref ref-type="sec" rid="s12">Supplementary Figure S4B</xref>) revealed a structure broadly similar to that observed in the GCK-MODY analysis, with age at diagnosis negatively correlated with most predictors. In contrast to the GCK-MODY dataset, however, post-load glucose emerged as the only variable exhibiting a noticeable positive correlation with HNF1A-MODY diagnosis, underscoring a narrower set of potentially informative features.</p>
<p>Principal component analysis biplots (<xref ref-type="fig" rid="F1">Figure&#x00A0;1B</xref>) further corroborated the limited separability suggested by the univariate analyses, showing pronounced overlap between diagnostic classes and an absence of well-defined clustering by disease status. Collectively, these descriptive, visual, and exploratory findings highlight the greater difficulty of class separation in the HNF1A-MODY dataset and reinforce the need for multivariate modeling approaches to capture subtle and potentially nonlinear relationships within the data.</p>
</sec>
<sec id="s3b2"><label>3.2.2</label><title>Evaluation of machine learning models</title>
<p>For HNF1A-MODY, 2,500 models were similarly trained and evaluated. Performance metrics are summarized in <xref ref-type="table" rid="T2">Table&#x00A0;2B</xref> and <xref ref-type="sec" rid="s12">Supplementary Tables S2B&#x2013;S6B</xref>. The Random Forest classifier achieved the highest ROC AUC of 0.712, corresponding to moderate classification performance. This result was obtained using the full MICE imputation with unbalanced classes, although comparable performance was observed across datasets with full imputations.</p>
<p>KNeighbors models achieved their best results on datasets with half imputations, while the Perceptron classifier showed the lowest overall performance, reaching a maximum ROC AUC of 0.555 on the zero-imputation oversampled dataset. These findings underscore the sensitivity of model performance to imputation strategy and class balancing. Calibration analysis revealed evidence of miscalibration across all models, which generally worsened with oversampling.</p>
<p>The best-performing HNF1A-MODY model achieved a sensitivity of 0.468 and a specificity of 0.757, with PPV and NPV values of 0.579 and 0.682, respectively. These metrics indicate limited ability to identify true positive cases, but a moderate capacity to correctly rule out HNF1A-MODY in negative individuals.</p>
<p>Overall, these findings indicate that machine learning models trained on routinely collected clinical data can discriminate HNF1A-MODY cases from non-cases better than chance, although with lower sensitivity compared to GCK-MODY. This supports the feasibility of the proposed modular framework while highlighting subtype-specific differences in achievable predictive performance.</p>
</sec>
<sec id="s3b3"><label>3.2.3</label><title>Models&#x2019; explainability according to SHAP values</title>
<p>SHAP-based explainability analysis was applied to the best-performing HNF1A-MODY model to elucidate the variables driving its predictions. As shown in <xref ref-type="table" rid="T2">Figure&#x00A0;2B</xref>, age at diagnosis, fasting glucose, post-load glucose, and HbA1c were the most influential variables contributing to negative predictions. In contrast, BMI, post-load glucose, and age at diagnosis had the greatest impact on positive predictions.</p>
<p>Even though univariate analyses did not identify statistically significant predictors for HNF1A-MODY, SHAP results indicate that the model consistently assigned substantial importance to post-load glucose and age at diagnosis. Notably, these variables exhibited trends toward statistical significance in descriptive analyses, suggesting that multivariate modeling was able to capture subtle patterns not evident in univariate testing alone.</p>
<p>Despite the absence of statistically significant predictors in univariate analyses, SHAP-based explanations suggest that the model relied on clinically plausible variables to inform its predictions. This indicates that explainable machine learning can uncover multivariate patterns relevant to HNF1A-MODY while maintaining transparency in how individual clinical features contribute to subtype-specific predictions.</p>
</sec>
</sec>
</sec>
<sec id="s4" sec-type="discussion"><label>4</label><title>Discussion</title>
<sec id="s4a"><label>4.1</label><title>Strengths of this study</title>
<p>To our knowledge, this is the first study to address the prediction of MODY subtypes using clinical data and machine learning. Previous studies have explored the differentiation of MODY from other diabetes types (e.g., T1DM and T2DM) but not the classification of MODY subtypes [<xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B37">37</xref>].</p>
<p>In this context, this work also provides a systematic evaluation of state-of-the-art machine learning models for generating explainable predictions in an underrepresented Latin American population. Most existing predictive tools and probability calculators for MODY have been developed and validated predominantly in European ancestry cohorts, raising concerns about their transferability across ethnic groups [<xref ref-type="bibr" rid="B33">33</xref>]. By focusing on a Latin American dataset, this study addresses an important gap in the literature and provides evidence that explainable machine learning approaches can be meaningfully applied in populations that are often overlooked in precision diabetes research. Expanding and diversifying available datasets remains a key priority, as larger and more representative cohorts would enable the development of more robust and generalizable models. Such efforts are essential for improving diagnostic accuracy and clinical applicability, especially given the documented limitations of existing tools when applied to non-European populations [<xref ref-type="bibr" rid="B29">29</xref>].</p>
<p>Focusing on GCK-MODY and HNF1A-MODY&#x2014;the two most prevalent MODY subtypes&#x2014;offers clear and clinically meaningful benefits that justify their prioritization in predictive modeling. These subtypes differ substantially in prognosis and treatment, making their accurate identification particularly impactful. GCK-MODY is typically characterized by mild, stable hyperglycemia that often does not require pharmacological intervention, whereas HNF1A-MODY is frequently misdiagnosed as T1DM and consequently treated unnecessarily with insulin [<xref ref-type="bibr" rid="B3">3</xref>]. Accurate identification of these subtypes therefore has immediate therapeutic implications, with the potential to improve glycemic control, quality of life, and cost-effectiveness of care.</p>
<p>Machine learning represents a promising approach to complement costly or invasive diagnostic procedures. However, its clinical adoption depends critically on the interpretability of model predictions. In high-stakes domains such as healthcare, clinicians must understand the rationale behind each model decision. To address this, we employed the SHAP library to enhance model explainability. To our knowledge, this is the first study to apply SHAP to assess the contribution of input features in the classification of MODY subtypes. Notably, the features identified as important by SHAP are consistent with those used in medical practice to suspect GCK-MODY and HNF1A-MODY, suggesting that the model&#x2019;s reasoning aligns with established clinical knowledge. Together, the ability of machine learning models to support the identification of MODY subtypes and the integration of explainability techniques reinforces their potential role in advancing precision medicine for monogenic diabetes.</p>
<p>In this context, the integration of SHAP-based explainability provides novel insights into how predictive models weight clinical and biochemical variables, moving beyond black-box predictions. Unlike traditional methods that typically offer only global interpretability, SHAP enables a detailed examination of the factors driving individual predictions. This feature is particularly valuable when two patients receive the same predicted label but differ substantially in the underlying contributing variables. Moreover, SHAP facilitates the identification of cases in which the model prediction diverges from the ground truth, allowing clinicians to recognize potential misclassifications and to better understand class overlap. Together, these insights can guide iterative model refinement and foster greater clinical trust and usability of machine learning&#x2013;based decision support systems.</p>
<p>The models developed achieved moderate predictive power for GCK-MODY and HNF1A-MODY. Based on its performance metrics, the GCK-MODY model could serve as a screening tool prior to genetic confirmation through sequencing, but not as an exclusion test, since a negative result does not fully rule out the diagnosis. In contrast, the HNF1A-MODY model&#x2014;given its tendency to miss a substantial proportion of positive cases&#x2014;has limited value as a screening tool. However, it may still be useful as a complementary aid in ruling out this diagnosis when combined with clinical evaluation.</p>
<p>Finally, the proposed pipeline was designed with scalability and flexibility in mind, enabling the integration of additional MODY subtypes, larger datasets, and new clinical or biochemical features. This modular architecture facilitates the inclusion of new predictors and supports extended model selection and hyperparameter optimization procedures.</p>
</sec>
<sec id="s4b"><label>4.2</label><title>Limitations and future work</title>
<p>The small number of available cases represents a central limitation of this study and reflects a common challenge in MODY research. These monogenic forms of diabetes are rare, costly to confirm via Sanger sequencing, and often underdiagnosed, which substantially limits data availability. Increasing the size and diversity of the dataset would likely improve model accuracy by reducing uncertainty associated with data scarcity. Although imputation techniques performed satisfactorily in handling missing values, minimizing data gaps would further enhance prediction reliability. Increasing sample size would also allow exploration of more complex artificial intelligence architectures, such as deep learning, representing a promising avenue for future research. These models require considerably larger datasets for reliable training, but may be better suited to capture intricate data patterns, potentially improving predictive accuracy and robustness.</p>
<p>Given the low overall prevalence of MODY (approximately 1&#x0025;&#x2013;5&#x0025; of all diabetes cases [<xref ref-type="bibr" rid="B3">3</xref>]) and the even lower frequency of its individual subtypes, achieving balanced datasets for model training remains a considerable challenge. Even though balancing the training data can improve predictive accuracy, it may also introduce model miscalibration, as reflected by higher Brier scores on the oversampled dataset shown in <xref ref-type="sec" rid="s12">Supplementary Table S6</xref>. Since real-world clinical data are inherently imbalanced, models trained on artificially balanced datasets may not generalize effectively to practical settings. Future work should therefore focus on developing strategies that address class imbalance while maintaining robust model calibration and real-world applicability.</p>
<p>Future work should also include a systematic evaluation of SHAP-based explainability outputs in collaboration with healthcare professionals. This will help validate the correspondence between model-derived feature weights and expert clinical reasoning, ensuring that the models remain interpretable, trustworthy, and aligned with real-world medical decision-making.</p>
<p>The proposed pipeline can also be extended to datasets including other MODY subtypes. Incorporating additional clinical and biochemical variables into the input features&#x2014;such as renal or cardiovascular pathophysiological features when studying HNF4A-MODY or HNF1B-MODY&#x2014;may further optimize the models&#x2019; ability to predict individual MODY subtypes. Expanding the feature set could enable the identification of more informative patterns and improve classification performance.</p>
</sec>
</sec>
<sec id="s5" sec-type="conclusions"><label>5</label><title>Conclusion</title>
<p>This study demonstrates the feasibility of predicting specific MODY subtypes from routinely collected medical data in patients with prior clinical suspicion of MODY using machine learning models. Despite their moderate performance and limited scope&#x2014;addressing only 2 of the 14 currently known MODY subtypes&#x2014;, our results highlight the potential of machine learning-based tools to assist healthcare professionals in identifying MODY subtypes more efficiently. Such tools may save valuable time and resources in the diagnostic process and support more informed decisions regarding patient management and treatment selection, particularly in settings where access to genetic testing is limited or delayed.</p>
<p>Taken together, the contribution of this work lies not only in establishing this proof of concept, but also in doing so through a clinically grounded and explainable machine learning framework, evaluated in an underrepresented Latin American population. By focusing on GCK-MODY and HNF1A-MODY&#x2014;the two most prevalent MODY subtypes with well-established clinical relevance&#x2014;and adopting a flexible, modular methodological design, this study provides a foundation that can be refined and expanded as larger and more diverse datasets become available.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability"><title>Data availability statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found below: <ext-link ext-link-type="uri" xlink:href="https://github.com/ifiguero/mody_2024">https://github.com/ifiguero/mody_2024</ext-link>.</p>
</sec>
<sec id="s7" sec-type="ethics-statement"><title>Ethics statement</title>
<p>The studies involving humans were approved by Ethics Committee of the Hospital de Cl&#x00ED;nicas Jos&#x00E9; de San Mart&#x00ED;n, Facultad de Medicina, Universidad de Buenos Aires, Buenos Aires, Argentina. The studies were conducted in accordance with the local legislation and institutional requirements. The participants provided their written informed consent to participate in this study.</p>
</sec>
<sec id="s8" sec-type="author-contributions"><title>Author contributions</title>
<p>IF: Data curation, Formal analysis, Writing &#x2013; original draft, Methodology, Investigation, Visualization, Software. RF: Resources, Investigation, Validation, Software, Writing &#x2013; review &#x0026; editing, Conceptualization, Formal analysis, Methodology. AM: Investigation, Writing &#x2013; review &#x0026; editing, Validation, Methodology. AdD: Data curation, Writing &#x2013; review &#x0026; editing. GF: Supervision, Writing &#x2013; review &#x0026; editing, Funding acquisition, Project administration. AL: Conceptualization, Investigation, Data curation, Writing &#x2013; review &#x0026; editing, Validation, Resources, Funding acquisition, Project administration. DM: Investigation, Supervision, Funding acquisition, Writing &#x2013; review &#x0026; editing, Conceptualization, Validation, Data curation, Methodology.</p>
</sec>
<ack><title>Acknowledgments</title>
<p>The authors gratefully acknowledge all the patients who voluntarily participated in this study.</p>
</ack>
<sec id="s10" sec-type="COI-statement"><title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s11" sec-type="ai-statement"><title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec id="s13" sec-type="disclaimer"><title>Publisher&#x0027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s12" sec-type="supplementary-material"><title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fdgth.2026.1656161/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fdgth.2026.1656161/full&#x0023;supplementary-material</ext-link></p>
<supplementary-material xlink:href="Datasheet1.pdf" id="sm1" mimetype="application/pdf"/>
</sec>
<ref-list><title>References</title>
<ref id="B1"><label>1.</label><mixed-citation publication-type="journal"><collab>American Diabetes Association</collab>. <article-title>Diagnosis and classification of diabetes mellitus</article-title>. <source>Diabetes Care</source>. (<year>2013</year>) <volume>37</volume>:<fpage>S81</fpage>&#x2013;<lpage>90</lpage>. <pub-id pub-id-type="doi">10.2337/dc14-S081</pub-id></mixed-citation></ref>
<ref id="B2"><label>2.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hope</surname> <given-names>SV</given-names></name> <name><surname>Wienand-Barnett</surname> <given-names>S</given-names></name> <name><surname>Shepherd</surname> <given-names>M</given-names></name> <name><surname>King</surname> <given-names>SM</given-names></name> <name><surname>Fox</surname> <given-names>C</given-names></name> <name><surname>Khunti</surname> <given-names>K</given-names></name></person-group>, et al. <article-title>Practical classification guidelines for diabetes in patients treated with insulin: a cross-sectional study of the accuracy of diabetes diagnosis</article-title>. <source>Br J Gen Pract</source>. (<year>2016</year>) <volume>66</volume>:<fpage>e315</fpage>&#x2013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.3399/bjgp16x684961</pub-id><pub-id pub-id-type="pmid">27080317</pub-id></mixed-citation></ref>
<ref id="B3"><label>3.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zecevic</surname> <given-names>K</given-names></name> <name><surname>Volcansek</surname> <given-names>S</given-names></name> <name><surname>Katsiki</surname> <given-names>N</given-names></name> <name><surname>Rizzo</surname> <given-names>M</given-names></name> <name><surname>Milardovic</surname> <given-names>TM</given-names></name> <name><surname>Stoian</surname> <given-names>AP</given-names></name></person-group>, et al. <article-title>Maturity-onset diabetes of the young (MODY) &#x2013; in search of ideal diagnostic criteria and precise treatment</article-title>. <source>Prog Cardiovasc Dis</source>. (<year>2024</year>) <volume>85</volume>:<fpage>14</fpage>&#x2013;<lpage>25</lpage>. <pub-id pub-id-type="doi">10.1016/j.pcad.2024.03.004</pub-id><pub-id pub-id-type="pmid">38513726</pub-id></mixed-citation></ref>
<ref id="B4"><label>4.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nie</surname> <given-names>D</given-names></name> <name><surname>Lou</surname> <given-names>X</given-names></name></person-group>. <article-title>A novel glucokinase mutation causing maturity-onset diabetes of the young: a case report</article-title>. <source>Br J Hosp Med</source>. (<year>2025</year>) <volume>86</volume>:<fpage>1</fpage>&#x2013;<lpage>11</lpage>. <pub-id pub-id-type="doi">10.12968/hmed.2024.1056</pub-id></mixed-citation></ref>
<ref id="B5"><label>5.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Skyler</surname> <given-names>JS</given-names></name> <name><surname>Bakris</surname> <given-names>GL</given-names></name> <name><surname>Bonifacio</surname> <given-names>E</given-names></name> <name><surname>Darsow</surname> <given-names>T</given-names></name> <name><surname>Eckel</surname> <given-names>RH</given-names></name> <name><surname>Groop</surname> <given-names>L</given-names></name></person-group>, et al. <article-title>Differentiation of diabetes by pathophysiology, natural history, and prognosis</article-title>. <source>Diabetes</source>. (<year>2017</year>) <volume>66</volume>:<fpage>241</fpage>&#x2013;<lpage>55</lpage>. <pub-id pub-id-type="doi">10.2337/db16-0806</pub-id><pub-id pub-id-type="pmid">27980006</pub-id></mixed-citation></ref>
<ref id="B6"><label>6.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chung</surname> <given-names>WK</given-names></name> <name><surname>Erion</surname> <given-names>K</given-names></name> <name><surname>Florez</surname> <given-names>JC</given-names></name> <name><surname>Hattersley</surname> <given-names>AT</given-names></name> <name><surname>Hivert</surname> <given-names>MF</given-names></name> <name><surname>Lee</surname> <given-names>CG</given-names></name></person-group>, et al. <article-title>Precision medicine in diabetes: a consensus report from the american diabetes association (ADA) and the european association for the study of diabetes (EASD)</article-title>. <source>Diabetes Care</source>. (<year>2020</year>) <volume>43</volume>:<fpage>1617</fpage>&#x2013;<lpage>35</lpage>. <pub-id pub-id-type="doi">10.2337/dci20-0022</pub-id><pub-id pub-id-type="pmid">32561617</pub-id></mixed-citation></ref>
<ref id="B7"><label>7.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rubio-Cabezas</surname> <given-names>O</given-names></name> <name><surname>Hattersley</surname> <given-names>AT</given-names></name> <name><surname>Nj&#x00F8;lstad</surname> <given-names>PR</given-names></name> <name><surname>Mlynarski</surname> <given-names>W</given-names></name> <name><surname>Ellard</surname> <given-names>S</given-names></name> <name><surname>White</surname> <given-names>N</given-names></name></person-group>, et al. <article-title>The diagnosis and management of monogenic diabetes in children and adolescents: monogenic diabetes in children and adolescents</article-title>. <source>Pediatr Diabetes</source>. (<year>2014</year>) <volume>15</volume>:<fpage>47</fpage>&#x2013;<lpage>64</lpage>. <pub-id pub-id-type="doi">10.1111/pedi.12192</pub-id><pub-id pub-id-type="pmid">25182307</pub-id></mixed-citation></ref>
<ref id="B8"><label>8.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shepherd</surname> <given-names>M</given-names></name> <name><surname>Shields</surname> <given-names>B</given-names></name> <name><surname>Hammersley</surname> <given-names>S</given-names></name> <name><surname>Hudson</surname> <given-names>M</given-names></name> <name><surname>McDonald</surname> <given-names>TJ</given-names></name> <name><surname>Colclough</surname> <given-names>K</given-names></name></person-group>, et al. <article-title>Systematic population screening, using biomarkers and genetic testing, identifies 2.5 monogenic diabetes</article-title>. <source>Diabetes Care</source>. (<year>2016</year>) <volume>39</volume>:<fpage>1879</fpage>&#x2013;<lpage>88</lpage>. <pub-id pub-id-type="doi">10.2337/dc16-0645</pub-id><pub-id pub-id-type="pmid">27271189</pub-id></mixed-citation></ref>
<ref id="B9"><label>9.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nkonge</surname> <given-names>KM</given-names></name> <name><surname>Nkonge</surname> <given-names>DK</given-names></name> <name><surname>Nkonge</surname> <given-names>TN</given-names></name></person-group>. <article-title>The epidemiology, molecular pathogenesis, diagnosis, and treatment of maturity-onset diabetes of the young (MODY)</article-title>. <source>Clin Diabetes Endocrinol</source>. (<year>2020</year>) <volume>6</volume>:<fpage>20</fpage>. <pub-id pub-id-type="doi">10.1186/s40842-020-00112-5</pub-id><pub-id pub-id-type="pmid">33292863</pub-id></mixed-citation></ref>
<ref id="B10"><label>10.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hasballa</surname> <given-names>I</given-names></name> <name><surname>Maggi</surname> <given-names>D</given-names></name></person-group>. <article-title>MODY only monogenic? a narrative review of the novel rare and low-penetrant variants</article-title>. <source>Int J Mol Sci</source>. (<year>2024</year>) <volume>25</volume>:<fpage>8790</fpage>. <pub-id pub-id-type="doi">10.3390/ijms25168790</pub-id><pub-id pub-id-type="pmid">39201476</pub-id></mixed-citation></ref>
<ref id="B11"><label>11.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Velho</surname> <given-names>G</given-names></name> <name><surname>Froguel</surname> <given-names>P</given-names></name></person-group>. <article-title>Genetic, metabolic and clinical characteristics of maturity onset diabetes of the young</article-title>. <source>Eur J Endocrinol</source>. (<year>1998</year>) <volume>138</volume>:<fpage>233</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1530/eje.0.1380233</pub-id><pub-id pub-id-type="pmid">9539292</pub-id></mixed-citation></ref>
<ref id="B12"><label>12.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fajans</surname> <given-names>SS</given-names></name> <name><surname>Bell</surname> <given-names>GI</given-names></name> <name><surname>Polonsky</surname> <given-names>KS</given-names></name></person-group>. <article-title>Molecular mechanisms and clinical pathophysiology of maturity-onset diabetes of the young</article-title>. <source>New Engl J Med</source>. (<year>2001</year>) <volume>345</volume>:<fpage>971</fpage>&#x2013;<lpage>80</lpage>. <pub-id pub-id-type="doi">10.1056/nejmra002168</pub-id><pub-id pub-id-type="pmid">11575290</pub-id></mixed-citation></ref>
<ref id="B13"><label>13.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Costa</surname> <given-names>A</given-names></name> <name><surname>Bescos</surname> <given-names>M</given-names></name> <name><surname>Velho</surname> <given-names>G</given-names></name> <name><surname>Chevre</surname> <given-names>J</given-names></name> <name><surname>Vidal</surname> <given-names>J</given-names></name> <name><surname>Sesmilo</surname> <given-names>G</given-names></name></person-group>, et al. <article-title>Genetic and clinical characterisation of maturity-onset diabetes of the young in Spanish families</article-title>. <source>Eur J Endocrinol</source>. (<year>2000</year>) <volume>142</volume>:<fpage>380</fpage>&#x2013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.1530/eje.0.1420380</pub-id><pub-id pub-id-type="pmid">10754480</pub-id></mixed-citation></ref>
<ref id="B14"><label>14.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mendon&#x00E7;a</surname> <given-names>M</given-names></name> <name><surname>Barros</surname> <given-names>P</given-names></name> <name><surname>Santa Cruz</surname> <given-names>L</given-names></name> <name><surname>Pastilha</surname> <given-names>AC</given-names></name> <name><surname>Cordeiro</surname> <given-names>R</given-names></name></person-group>. <article-title>Maturity-onset diabetes of the young type 3 (MODY 3): a rare presentation of diabetes in primary care</article-title>. <source>Cureus</source>. (<year>2024</year>) <volume>16</volume>:<fpage>e63119</fpage>. <pub-id pub-id-type="doi">10.7759/cureus.63119</pub-id></mixed-citation></ref>
<ref id="B15"><label>15.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pihoker</surname> <given-names>C</given-names></name> <name><surname>Gilliam</surname> <given-names>LK</given-names></name> <name><surname>Ellard</surname> <given-names>S</given-names></name> <name><surname>Dabelea</surname> <given-names>D</given-names></name> <name><surname>Davis</surname> <given-names>C</given-names></name> <name><surname>Dolan</surname> <given-names>LM</given-names></name></person-group>, et al. <article-title>Prevalence, characteristics and clinical diagnosis of maturity onset diabetes of the young due to mutations in hnf1a, hnf4a, and glucokinase: results from the search for diabetes in youth</article-title>. <source>J Clin Endocrinol Metab</source>. (<year>2013</year>) <volume>98</volume>:<fpage>4055</fpage>&#x2013;<lpage>62</lpage>. <pub-id pub-id-type="doi">10.1210/jc.2013-1279</pub-id><pub-id pub-id-type="pmid">23771925</pub-id></mixed-citation></ref>
<ref id="B16"><label>16.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Uygun</surname> <given-names>G</given-names></name> <name><surname>Ayaz</surname> <given-names>A</given-names></name> <name><surname>Kanat</surname> <given-names>M</given-names></name></person-group>. <article-title>Maturity-onset diabetes of the young (MODY): How much can we detect?</article-title> <source>Bezmialem Sci</source>. (<year>2025</year>) <volume>13</volume>:<fpage>222</fpage>&#x2013;<lpage>31</lpage>. <pub-id pub-id-type="doi">10.14235/bas.galenos.2025.57704</pub-id></mixed-citation></ref>
<ref id="B17"><label>17.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hattersley</surname> <given-names>AT</given-names></name> <name><surname>Greeley</surname> <given-names>SAW</given-names></name> <name><surname>Polak</surname> <given-names>M</given-names></name> <name><surname>Rubio-Cabezas</surname> <given-names>O</given-names></name> <name><surname>Nj&#x00F8;lstad</surname> <given-names>PR</given-names></name> <name><surname>Mlynarski</surname> <given-names>W</given-names></name></person-group>, et al. <article-title>Ispad clinical practice consensus guidelines 2018: the diagnosis and management of monogenic diabetes in children and adolescents</article-title>. <source>Pediatr Diabetes</source>. (<year>2018</year>) <volume>19</volume>:<fpage>47</fpage>&#x2013;<lpage>63</lpage>. <pub-id pub-id-type="doi">10.1111/pedi.12772</pub-id><pub-id pub-id-type="pmid">30225972</pub-id></mixed-citation></ref>
<ref id="B18"><label>18.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Garg</surname> <given-names>RK</given-names></name></person-group>. <article-title>MODY: strategies for a unique form of diabetes</article-title>. <source>J Educ Health Promot</source>. (<year>2025</year>) <volume>14</volume>:<fpage>338</fpage>. <pub-id pub-id-type="doi">10.4103/jehp.jehp_1880_24</pub-id><pub-id pub-id-type="pmid">40979327</pub-id></mixed-citation></ref>
<ref id="B19"><label>19.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>De Dios</surname> <given-names>A</given-names></name> <name><surname>L&#x00F3;pez</surname> <given-names>A</given-names></name> <name><surname>Frechtel</surname> <given-names>G</given-names></name></person-group>. <article-title>Cl&#x00ED;nica y tratamiento de la diabetes tipo MODY</article-title>. <source>Rev Soc Argent Diabetes</source>. (<year>2014</year>) <volume>48</volume>:<fpage>130</fpage>. <pub-id pub-id-type="doi">10.47196/diab.v48i3.190</pub-id></mixed-citation></ref>
<ref id="B20"><label>20.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Klupa</surname> <given-names>T</given-names></name> <name><surname>Warram</surname> <given-names>JH</given-names></name> <name><surname>Antonellis</surname> <given-names>A</given-names></name> <name><surname>Pezzolesi</surname> <given-names>M</given-names></name> <name><surname>Nam</surname> <given-names>M</given-names></name> <name><surname>Malecki</surname> <given-names>MT</given-names></name></person-group>, et al. <article-title>Determinants of the development of diabetes (maturity-onset diabetes of the young-3) in carriers of hnf-1 alpha mutations</article-title>. <source>Diabetes Care</source>. (<year>2002</year>) <volume>25</volume>:<fpage>2292</fpage>&#x2013;<lpage>301</lpage>. <pub-id pub-id-type="doi">10.2337/diacare.25.12.2292</pub-id><pub-id pub-id-type="pmid">12453976</pub-id></mixed-citation></ref>
<ref id="B21"><label>21.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gill-Carey</surname> <given-names>O</given-names></name> <name><surname>Shields</surname> <given-names>B</given-names></name> <name><surname>Colclough</surname> <given-names>K</given-names></name> <name><surname>Ellard</surname> <given-names>S</given-names></name> <name><surname>Hattersley</surname> <given-names>A</given-names></name></person-group>. <article-title>Finding a glucokinase mutation alters patient treatment</article-title>. <source>Diabet Med</source>. (<year>2007</year>) <volume>24</volume>:<fpage>6</fpage>. <pub-id pub-id-type="doi">10.1111/j.1464-5491.2007.02125.x</pub-id></mixed-citation></ref>
<ref id="B22"><label>22.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ellard</surname> <given-names>S</given-names></name> <name><surname>Bellann&#x00E9;-Chantelot</surname> <given-names>C</given-names></name> <name><surname>Hattersley</surname> <given-names>AT</given-names></name></person-group>. <article-title>Best practice guidelines for the molecular genetic diagnosis of maturity-onset diabetes of the young</article-title>. <source>Diabetologia</source>. (<year>2008</year>) <volume>51</volume>:<fpage>546</fpage>&#x2013;<lpage>53</lpage>. <pub-id pub-id-type="doi">10.1007/s00125-008-0942-y</pub-id><pub-id pub-id-type="pmid">18297260</pub-id></mixed-citation></ref>
<ref id="B23"><label>23.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Monta&#x00F1;o-Candelo</surname> <given-names>LC</given-names></name> <name><surname>Mejia-de Beldjena</surname> <given-names>L</given-names></name></person-group>. <article-title>Caracterizaci&#x00F3;n cl&#x00ED;nica de pacientes con diabetes tipo MODY: reporte de casos</article-title>. <source>Iatreia</source>. (<year>2025</year>) <volume>38</volume>:<fpage>782</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.17533/udea.iatreia.317</pub-id></mixed-citation></ref>
<ref id="B24"><label>24.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ajith</surname> <given-names>K</given-names></name> <name><surname>Sureka</surname> <given-names>V</given-names></name> <name><surname>Jayannan</surname> <given-names>J</given-names></name> <name><surname>Indu</surname> <given-names>P</given-names></name></person-group>. <article-title>From misdiagnosis to precision: genetic testing reveals MODY in adolescents initially treated as type 1 diabetes</article-title>. <source>J Clin Diagn Res</source>. (<year>2025</year>) <volume>19</volume>:<fpage>OR01</fpage>&#x2013;<lpage>3</lpage>. <pub-id pub-id-type="doi">10.7860/jcdr/2025/77139.22165</pub-id></mixed-citation></ref>
<ref id="B25"><label>25.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Awa</surname> <given-names>W</given-names></name> <name><surname>Schober</surname> <given-names>E</given-names></name> <name><surname>Wiegand</surname> <given-names>S</given-names></name> <name><surname>Herwig</surname> <given-names>J</given-names></name> <name><surname>Meissner</surname> <given-names>T</given-names></name> <name><surname>Schmidt</surname> <given-names>F</given-names></name></person-group>, et al. <article-title>Reclassification of diabetes type in pediatric patients initially classified as type 2 diabetes mellitus: 15 years follow-up using routine data from the German/Austrian DPV database</article-title>. <source>Diabetes Res Clin Pract</source>. (<year>2011</year>) <volume>94</volume>:<fpage>463</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1016/j.diabres.2011.09.011</pub-id><pub-id pub-id-type="pmid">21955960</pub-id></mixed-citation></ref>
<ref id="B26"><label>26.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Carmody</surname> <given-names>D</given-names></name> <name><surname>Naylor</surname> <given-names>RN</given-names></name> <name><surname>Bell</surname> <given-names>CD</given-names></name> <name><surname>Berry</surname> <given-names>S</given-names></name> <name><surname>Montgomery</surname> <given-names>JT</given-names></name> <name><surname>Tadie</surname> <given-names>EC</given-names></name></person-group>, et al. <article-title>Gck-MODY in the us national monogenic diabetes registry: frequently misdiagnosed and unnecessarily treated</article-title>. <source>Acta Diabetol</source>. (<year>2016</year>) <volume>53</volume>:<fpage>703</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1007/s00592-016-0859-8</pub-id><pub-id pub-id-type="pmid">27106716</pub-id></mixed-citation></ref>
<ref id="B27"><label>27.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bhattacharya</surname> <given-names>S</given-names></name> <name><surname>Fernandez</surname> <given-names>CJ</given-names></name> <name><surname>Kamrul-Hasan</surname> <given-names>ABM</given-names></name> <name><surname>Pappachan</surname> <given-names>JM</given-names></name></person-group>. <article-title>Monogenic diabetes: an evidence-based clinical approach</article-title>. <source>World J Diabetes</source>. (<year>2025</year>) <volume>16</volume>:<fpage>1</fpage>&#x2013;<lpage>17</lpage>. <pub-id pub-id-type="doi">10.4239/wjd.v16.i5.104787</pub-id></mixed-citation></ref>
<ref id="B28"><label>28.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Naylor</surname> <given-names>RN</given-names></name> <name><surname>John</surname> <given-names>PM</given-names></name> <name><surname>Winn</surname> <given-names>AN</given-names></name> <name><surname>Carmody</surname> <given-names>D</given-names></name> <name><surname>Greeley</surname> <given-names>SAW</given-names></name> <name><surname>Philipson</surname> <given-names>LH</given-names></name></person-group>, et al. <article-title>Cost-effectiveness of MODY genetic testing: translating genomic advances into practical health applications</article-title>. <source>Diabetes Care</source>. (<year>2014</year>) <volume>37</volume>:<fpage>202</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.2337/dc13-0410</pub-id><pub-id pub-id-type="pmid">24026547</pub-id></mixed-citation></ref>
<ref id="B29"><label>29.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Asgarian</surname> <given-names>S</given-names></name> <name><surname>Lanjanian</surname> <given-names>H</given-names></name> <name><surname>Rahimipour Anaraki</surname> <given-names>S</given-names></name> <name><surname>Hadaegh</surname> <given-names>F</given-names></name> <name><surname>Moazzam-Jazi</surname> <given-names>M</given-names></name> <name><surname>Najd-Hassan-Bonab</surname> <given-names>L</given-names></name></person-group>, et al. <article-title>Examining the clinical and genetic spectrum of maturity-onset diabetes of the young (MODY) in Iran</article-title>. <source>Sci Rep</source>. (<year>2024</year>) <volume>14</volume>:<fpage>19860</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-024-70864-y</pub-id><pub-id pub-id-type="pmid">39191897</pub-id></mixed-citation></ref>
<ref id="B30"><label>30.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhou</surname> <given-names>X</given-names></name> <name><surname>Siegel</surname> <given-names>KR</given-names></name> <name><surname>Ng</surname> <given-names>BP</given-names></name> <name><surname>Jawanda</surname> <given-names>S</given-names></name> <name><surname>Proia</surname> <given-names>KK</given-names></name> <name><surname>Zhang</surname> <given-names>X</given-names></name></person-group>, et al. <article-title>Cost-effectiveness of diabetes prevention interventions targeting high-risk individuals and whole populations: a systematic review</article-title>. <source>Diabetes Care</source>. (<year>2020</year>) <volume>43</volume>:<fpage>1593</fpage>&#x2013;<lpage>616</lpage>. <pub-id pub-id-type="doi">10.2337/dci20-0018</pub-id><pub-id pub-id-type="pmid">33534726</pub-id></mixed-citation></ref>
<ref id="B31"><label>31.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chatterjee</surname> <given-names>R</given-names></name> <name><surname>Narayan</surname> <given-names>KV</given-names></name> <name><surname>Lipscomb</surname> <given-names>J</given-names></name> <name><surname>Jackson</surname> <given-names>SL</given-names></name> <name><surname>Long</surname> <given-names>Q</given-names></name> <name><surname>Zhu</surname> <given-names>M</given-names></name></person-group>, et al. <article-title>Screening for diabetes and prediabetes should be cost-saving in patients at high risk</article-title>. <source>Diabetes Care</source>. (<year>2013</year>) <volume>36</volume>:<fpage>1981</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.2337/dc12-1752</pub-id><pub-id pub-id-type="pmid">23393215</pub-id></mixed-citation></ref>
<ref id="B32"><label>32.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Peghinelli</surname> <given-names>VV</given-names></name> <name><surname>De Sibio</surname> <given-names>MT</given-names></name> <name><surname>Depra</surname> <given-names>IC</given-names></name> <name><surname>Teles Bezerra</surname> <given-names>MG</given-names></name> <name><surname>Sakalem</surname> <given-names>ME</given-names></name> <name><surname>J&#x00FA;nior</surname> <given-names>AFDM</given-names></name></person-group>, et al. <article-title>MODY calculator applied in patients with clinical diagnosis of type 1 diabetes mellitus: is a higher cutoff needed?</article-title> <source>Heliyon</source>. (<year>2024</year>) <volume>10</volume>:<fpage>e36006</fpage>. <pub-id pub-id-type="doi">10.1016/j.heliyon.2024.e36006</pub-id><pub-id pub-id-type="pmid">39224250</pub-id></mixed-citation></ref>
<ref id="B33"><label>33.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Alarcon</surname> <given-names>G</given-names></name> <name><surname>Nguyen</surname> <given-names>A</given-names></name> <name><surname>Jones</surname> <given-names>A</given-names></name> <name><surname>Shields</surname> <given-names>B</given-names></name> <name><surname>Redondo</surname> <given-names>MJ</given-names></name> <name><surname>Tosur</surname> <given-names>M</given-names></name></person-group>. <article-title>The maturity-onset diabetes of the young (MODY) calculator overestimates MODY probability in hispanic youth</article-title>. <source>J Clin Endocrinol Metab</source>. (<year>2025</year>) <volume>110</volume>:<fpage>e2191</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1210/clinem/dgae770</pub-id><pub-id pub-id-type="pmid">39492690</pub-id></mixed-citation></ref>
<ref id="B34"><label>34.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ang</surname> <given-names>SF</given-names></name> <name><surname>Lim</surname> <given-names>SC</given-names></name> <name><surname>Tan</surname> <given-names>CS</given-names></name> <name><surname>Fong</surname> <given-names>JC</given-names></name> <name><surname>Kon</surname> <given-names>WY</given-names></name> <name><surname>Lian</surname> <given-names>JX</given-names></name></person-group>, et al. <article-title>A preliminary study to evaluate the strategy of combining clinical criteria and next generation sequencing (NGS) for the identification of monogenic diabetes among multi-ethnic Asians</article-title>. <source>Diabetes Res Clin Pract</source>. (<year>2016</year>) <volume>119</volume>:<fpage>13</fpage>&#x2013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1016/j.diabres.2016.06.008</pub-id><pub-id pub-id-type="pmid">27420379</pub-id></mixed-citation></ref>
<ref id="B35"><label>35.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Laver</surname> <given-names>TW</given-names></name> <name><surname>Patel</surname> <given-names>KA</given-names></name></person-group>. <article-title>Maturity onset diabetes of the young and beyond: the changing face of single-gene diabetes</article-title>. <source>Eur J Endocrinol</source>. (<year>2025</year>) <volume>193</volume>:<fpage>R25</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1093/ejendo/lvaf172</pub-id><pub-id pub-id-type="pmid">40819284</pub-id></mixed-citation></ref>
<ref id="B36"><label>36.</label><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Fern&#x00E1;ndez</surname> <given-names>A</given-names></name></person-group>. <source>Opacity, Machine Learning and Explainable AI</source>. <publisher-loc>Switzerland</publisher-loc>: <publisher-name>Springer Nature</publisher-name> (<year>2023</year>). <comment>Chap. 2. p. 39&#x2013;58</comment>.</mixed-citation></ref>
<ref id="B37"><label>37.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shields</surname> <given-names>BM</given-names></name> <name><surname>McDonald</surname> <given-names>TJ</given-names></name> <name><surname>Ellard</surname> <given-names>S</given-names></name> <name><surname>Campbell</surname> <given-names>MJ</given-names></name> <name><surname>Hyde</surname> <given-names>C</given-names></name> <name><surname>Hattersley</surname> <given-names>AT</given-names></name></person-group>. <article-title>The development and validation of a clinical prediction model to determine the probability of MODY in patients with young-onset diabetes</article-title>. <source>Diabetologia</source>. (<year>2012</year>) <volume>55</volume>:<fpage>1265</fpage>&#x2013;<lpage>72</lpage>. <pub-id pub-id-type="doi">10.1007/s00125-011-2418-8</pub-id><pub-id pub-id-type="pmid">22218698</pub-id></mixed-citation></ref>
<ref id="B38"><label>38.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shields</surname> <given-names>BM</given-names></name> <name><surname>Carlsson</surname> <given-names>A</given-names></name> <name><surname>Patel</surname> <given-names>K</given-names></name> <name><surname>Knupp</surname> <given-names>J</given-names></name> <name><surname>Kaur</surname> <given-names>A</given-names></name> <name><surname>Johnston</surname> <given-names>D</given-names></name></person-group>, et al. <article-title>Development of a clinical calculator to aid the identification of MODY in pediatric patients at the time of diabetes diagnosis</article-title>. <source>Sci Rep</source>. (<year>2024</year>) <volume>14</volume>:<fpage>10589</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-024-60160-0</pub-id><pub-id pub-id-type="pmid">38719926</pub-id></mixed-citation></ref>
<ref id="B39"><label>39.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname> <given-names>J</given-names></name> <name><surname>Chen</surname> <given-names>Y</given-names></name> <name><surname>Ma</surname> <given-names>F</given-names></name> <name><surname>Shu</surname> <given-names>H</given-names></name> <name><surname>Zheng</surname> <given-names>L</given-names></name> <name><surname>Liu</surname> <given-names>Y</given-names></name></person-group>, et al. <article-title>MODY probability calculator is suitable for MODY screening in China: a population-based study</article-title>. <source>J Endocr Soc</source>. (<year>2024</year>) <volume>8</volume>:<fpage>bvae047</fpage>. <pub-id pub-id-type="doi">10.1210/jendso/bvae047</pub-id><pub-id pub-id-type="pmid">38562131</pub-id></mixed-citation></ref>
<ref id="B40"><label>40.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tsoi</surname> <given-names>STF</given-names></name> <name><surname>Lim</surname> <given-names>CKP</given-names></name> <name><surname>Ma</surname> <given-names>RCW</given-names></name> <name><surname>Lau</surname> <given-names>ESH</given-names></name> <name><surname>Fan</surname> <given-names>B</given-names></name> <name><surname>Chun Kwan</surname> <given-names>O</given-names></name></person-group>, et al. <article-title>Development of a Chinese-specific clinical model to predict maturity onset diabetes of the young</article-title>. <source>Diabetes Metab Res Rev</source>. (<year>2025</year>) <volume>41</volume>:<fpage>e70087</fpage>. <pub-id pub-id-type="doi">10.1002/dmrr.70087</pub-id><pub-id pub-id-type="pmid">40966384</pub-id></mixed-citation></ref>
<ref id="B41"><label>41.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yoshiji</surname> <given-names>S</given-names></name> <name><surname>Hasebe</surname> <given-names>M</given-names></name> <name><surname>Tanaka</surname> <given-names>D</given-names></name> <name><surname>Shimizu</surname> <given-names>M</given-names></name> <name><surname>Soma</surname> <given-names>Y</given-names></name> <name><surname>Kawaguchi</surname> <given-names>T</given-names></name></person-group>, et al. <article-title>Genetic and clinical characteristics of monogenic diabetes in Japan: a nationwide study by the Japan diabetes society</article-title>. <source>J Clin Endocrinol Metab</source>. (<year>2025</year>) <volume>111</volume>:<fpage>757</fpage>&#x2013;<lpage>69</lpage>. <pub-id pub-id-type="doi">10.1210/clinem/dgaf478</pub-id></mixed-citation></ref>
<ref id="B42"><label>42.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Buuren</surname> <given-names>S</given-names></name> <name><surname>Groothuis-Oudshoorn</surname> <given-names>K</given-names></name></person-group>. <article-title>mice: Multivariate imputation by chained equations inR</article-title>. <source>J Stat Softw</source>. (<year>2011</year>) <volume>45</volume>:<fpage>1</fpage>&#x2013;<lpage>67</lpage>. <pub-id pub-id-type="doi">10.18637/jss.v045.i03</pub-id></mixed-citation></ref>
<ref id="B43"><label>43.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Seu</surname> <given-names>K</given-names></name> <name><surname>Kang</surname> <given-names>MS</given-names></name> <name><surname>Lee</surname> <given-names>H</given-names></name></person-group>. <article-title>An intelligent missing data imputation techniques: a review</article-title>. <source>JOIV Int J Inform Vis</source>. (<year>2022</year>) <volume>6</volume>:<fpage>278</fpage>. <pub-id pub-id-type="doi">10.30630/joiv.6.1-2.935</pub-id></mixed-citation></ref>
<ref id="B44"><label>44.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Lundberg</surname> <given-names>SM</given-names></name> <name><surname>Lee</surname> <given-names>SI</given-names></name></person-group>. <article-title>A unified approach to interpreting model predictions</article-title>. In: <person-group person-group-type="editor"><name><surname>Guyon</surname><given-names>I</given-names></name> <name><surname>Luxburg</surname><given-names>UV</given-names></name> <name><surname>Bengio</surname><given-names>S</given-names></name> <name><surname>Wallach</surname><given-names>H</given-names></name> <name><surname>Fergus</surname><given-names>R</given-names></name> <name><surname>Vishwanathan</surname><given-names>S</given-names></name></person-group>, et al. editors. <source>Advances in Neural Information Processing Systems</source>. <publisher-loc>New York / Conference: Long Beach, CA</publisher-loc>: <publisher-name>Curran Associates, Inc</publisher-name>. (<year>2017</year>). Vol. <volume>30</volume>. p. <fpage>4765</fpage>&#x2013;<lpage>74</lpage>.</mixed-citation></ref></ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by"><p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1491366/overview">Hadi Akbarzadeh Khorshidi</ext-link>, The University of Melbourne, Australia</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by"><p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1173353/overview">David Tyler Broome</ext-link>, University of Michigan, United States</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2146931/overview">Sheng-Chieh Lu</ext-link>, University of Texas MD Anderson Cancer Center, United States</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2590964/overview">Nrusingha Tripathy</ext-link>, Siksha O Anusandhan University, India</p></fn>
</fn-group>
</back>
</article>