<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Endocrinol.</journal-id>
<journal-title>Frontiers in Endocrinology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Endocrinol.</abbrev-journal-title>
<issn pub-type="epub">1664-2392</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fendo.2024.1368225</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Endocrinology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Study on risk factors of impaired fasting glucose and development of a prediction model based on Extreme Gradient Boosting algorithm</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Cui</surname>
<given-names>Qiyuan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2406287"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Pu</surname>
<given-names>Jianhong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Li</surname>
<given-names>Wei</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zheng</surname>
<given-names>Yun</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lin</surname>
<given-names>Jiaxi</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2343690"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Lu</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Xue</surname>
<given-names>Peng</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Zhu</surname>
<given-names>Jinzhou</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>He</surname>
<given-names>Mingqing</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1065913"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of Geriatrics, The First Affiliated Hospital of Soochow University</institution>, <addr-line>Suzhou, Jiangsu</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Physical Examination Center, The Affiliated Suzhou Hospital of Nanjing University Medical School</institution>, <addr-line>Suzhou, Jiangsu</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Department of Gastroenterology, The First Affiliated Hospital of Soochow University</institution>, <addr-line>Suzhou, Jiangsu</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Department of Endocrinology, The Affiliated Suzhou Hospital of Nanjing University Medical School</institution>, <addr-line>Suzhou, Jiangsu</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: &#xc5;ke Sj&#xf6;holm, G&#xe4;vle Hospital, Sweden</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Nebojsa Bacanin, Singidunum University, Serbia</p>
<p>Nizamettin Aydin, Istanbul Technical University, T&#xfc;rkiye</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Mingqing He, <email xlink:href="mailto:hmqiori@163.com">hmqiori@163.com</email>; Jinzhou Zhu, <email xlink:href="mailto:jzzhu@zju.edu.cn">jzzhu@zju.edu.cn</email>; Peng Xue, <email xlink:href="mailto:xsytyl@126.com">xsytyl@126.com</email>
</p>
</fn>
<fn fn-type="equal" id="fn003">
<p>&#x2020;These authors have contributed equally to this work and share first authorship</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>24</day>
<month>09</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>15</volume>
<elocation-id>1368225</elocation-id>
<history>
<date date-type="received">
<day>10</day>
<month>01</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>04</day>
<month>09</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Cui, Pu, Li, Zheng, Lin, Liu, Xue, Zhu and He</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Cui, Pu, Li, Zheng, Lin, Liu, Xue, Zhu and He</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec>
<title>Objective</title>
<p>The aim of this study was to develop and validate a machine learning-based model to predict the development of impaired fasting glucose (IFG) in middle-aged and older elderly people over a 5-year period using data from a cohort study.</p>
</sec>
<sec>
<title>Methods</title>
<p>This study was a retrospective cohort study. The study population was 1855 participants who underwent consecutive physical examinations at the First Affiliated Hospital of Soochow University between 2018 and 2022.The dataset included medical history, physical examination, and biochemical index test results. The cohort was randomly divided into a training dataset and a validation dataset in a ratio of 8:2. The machine learning algorithms used in this study include Extreme Gradient Boosting (XGBoost), Support Vector Machines (SVM), Naive Bayes, Decision Trees (DT), and traditional Logistic Regression (LR). Feature selection, parameter optimization, and model construction were performed in the training set, while the validation set was used to evaluate the predictive performance of the models. The performance of these models is evaluated by an area under the receiver operating characteristic (ROC) curves (AUC), calibration curves and decision curve analysis (DCA). To interpret the best-performing model, the Shapley Additive exPlanation (SHAP) Plots was used in this study.</p>
</sec>
<sec>
<title>Results</title>
<p>The training/validation dataset consists of 1,855 individuals from the First Affiliated Hospital of Soochow University, yielded significant variables following selection by the Boruta algorithm and logistic multivariate regression analysis. These significant variables included systolic blood pressure (SBP), fatty liver, waist circumference (WC) and serum creatinine (Scr). The XGBoost model outperformed the other models, demonstrating an AUC of 0.7391 in the validation set.</p>
</sec>
<sec>
<title>Conclusions</title>
<p>The XGBoost model was composed of SBP, fatty liver, WC and Scr may assist doctors with the early identification of IFG in middle-aged and elderly people.</p>
</sec>
</abstract>
<kwd-group>
<kwd>impaired fasting glucose</kwd>
<kwd>prediction model</kwd>
<kwd>artificial intelligence</kwd>
<kwd>cohort study</kwd>
<kwd>middle-aged and elderly people</kwd>
</kwd-group>
<contract-num rid="cn001">No. 81901262</contract-num>
<contract-sponsor id="cn001">Soochow University<named-content content-type="fundref-id">10.13039/501100007824</named-content>
</contract-sponsor>
<counts>
<fig-count count="7"/>
<table-count count="3"/>
<equation-count count="0"/>
<ref-count count="29"/>
<page-count count="13"/>
<word-count count="5730"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Clinical Diabetes</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Type 2 diabetes mellitus (T2DM) represents a group of metabolic disorders marked by persistent elevations in blood glucose levels, posing a substantial global public health challenge. In 2017, it was estimated that 451 million individuals aged 18 to 99 years worldwide had diabetes, with projections indicating a staggering increase to 693 million by 2045, as reported by the International Diabetes Federation (IDF) (<xref ref-type="bibr" rid="B1">1</xref>). Prediabetes mellitus (PDM) signifies an intermediate phase preceding the onset of full-blown diabetes&#x2014;a state of glucose metabolism lying between diabetes and normal glucose tolerance (NGT). It encompasses conditions like impaired fasting glucose (IFG), impaired glucose tolerance (IGT), or a combination of both. Recent data published in the British Medical Journal (BMJ) in 2020 by Chinese researchers revealed that the prevalence of diabetes among Chinese adults stood at 12.8%, with an alarming 35.2% prevalence in the prediabetic state (<xref ref-type="bibr" rid="B2">2</xref>). With the ongoing aging of society, the number of elderly individuals grappling with diabetes has surged dramatically. Notably, individuals with IFG face a significantly heightened risk of developing diabetes and its associated complications (<xref ref-type="bibr" rid="B3">3</xref>). The early identification of IFG in individuals and the timely implementation of lifestyle interventions can effectively mitigate the progression from IFG to T2DM (<xref ref-type="bibr" rid="B4">4</xref>).</p>
<p>Currently, relatively few studies have delved into risk prediction models specifically tailored to IFG, and many of these studies rely on cross-sectional data. For instance, a South Korean study (<xref ref-type="bibr" rid="B5">5</xref>) fashioned a predictive model for IFG using the categorical boosting (Cat Boost) algorithm, which encompassed eight predictors: age, high cholesterol levels, waist-to-hip ratio (WHtR), Body Mass Index (BMI), frequent alcohol consumption over the past year, marital status, hypertension, and smoking. Few investigations have concentrated on risk modeling for the development of prediabetes or IFG, as much of the existing literature primarily examines risk factors for IFG through cross-sectional analyses. For example, in a study by Khadija et&#xa0;al. that assessed prediabetes risk in nurses using straightforward statistical techniques, noteworthy variables associated with prediabetes included age, BMI, waist circumference (WC), antihypertensive medication history, high blood glucose history, family history of diabetes, daily consumption of fruits, berries, or vegetables, and daily physical activity (<xref ref-type="bibr" rid="B6">6</xref>). In summary, previous studies on risk factors for IFG are based on cross-sectional databases, which cannot provide causal associations for the development of IFG. In summary, it is proposed in this study to develop a predictive model for the development of IFG in middle-aged and elderly people using data from a longitudinal cohort study, which will provide valuable assistance to community healthcare providers and clinicians in the management of IFG.</p>
<p>In recent years, artificial intelligence technology has experienced rapid advancement, encompassing machine learning(ML), deep learning, and neural network algorithms (<xref ref-type="bibr" rid="B7">7</xref>). As such, they have found extensive utility in disease diagnosis and risk prediction within the medical and healthcare domains (<xref ref-type="bibr" rid="B8">8</xref>). ML algorithms encompass a range of techniques, including Extreme Gradient Boosting (XGBoost), Random Forest (RF), Support Vector Machines (SVM), Na&#xef;ve Bayes, and more. These algorithms are distinguished by their capacity to learn from data, enabling them to make precise predictions regarding future events (<xref ref-type="bibr" rid="B9">9</xref>). In a study published in 2022, it was noted that an automated image analysis framework was constructed by using a simple convolutional neural network (CNN) model to recognize COVID-19 afflicted chest X-ray data. In order to improve classification accuracy, the fully connected layer of a simple CNN was further replaced with an efficient XGBoost classifier in the above study (<xref ref-type="bibr" rid="B10">10</xref>). The role played by ML algorithms can also be seen in non-medical fields, such as in India where academics have devised a Hierarchical Feature Selection (HFS) model based on Genetic Algorithms to optimize the local and global features extracted from each handwritten word images under consideration (<xref ref-type="bibr" rid="B11">11</xref>). In addition, there have been many advances in research related to Deep Learning (DL) algorithms, such as an enhanced version of the Firefly algorithm proposed in a study in 2021 that corrects the recognized shortcomings of the original method by explicitly exploring the mechanism and a chaotic local search strategy (<xref ref-type="bibr" rid="B12">12</xref>). In a proposed study published in 2022, an automated framework based on the hybridized sine cosine algorithm was proposed to tackle the overfitting shortcomings of neural network algorithms in DL algorithms (<xref ref-type="bibr" rid="B13">13</xref>). Consequently, the development of ML provides a novel avenue for constructing a predictive model for IFG. In the present study, risk factors for IFG were determined through a five-year longitudinal cohort analysis of clinical data. Considering DPN as an outcome variable, an accurate IFG risk prediction model was finally built based on multiple ML algorithms and traditional logistic regression (LR) analysis methods. Such an endeavor promises valuable insights into the prediction and prevention of IFG. In addition, this study is innovative in that it also uses a variety of visualization methods to demonstrate the role of weighting variables in the model output prediction results. This type of analytical approach to visualizing the implementation of ML algorithms has been less reported in risk prediction models for IFG. To improve the interpretability of the black-box model, the SHapley Additive exPlanation (SHAP) was used in this study to explain the predictive model. As a result, the prediction model not only predicts prognostic outcomes, but also provides reasonable explanations for the predicted outcomes, which greatly improves the user&#x2019;s trust in the model. In summary, this study aims to establish a highly feasible model that provides a valuable reference for clinicians engaged in the early screening, diagnosis and treatment of IFG.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Methods and materials</title>
<sec id="s2_1">
<label>2.1</label>
<title>Study population</title>
<p>The study population was selected from individuals who underwent health checkups at the Health Management Center of the First Affiliated Hospital of Soochow University. Data on health check-ups from January 2018 to December 2022 were collected for this study. To ensure the accuracy and reasonableness of the data, the research paid special attention to the last five years of data from the Health Management Center. According to the design of a retrospective cohort study, this study considered the 2018 health check-up data as the baseline of the cohort and the study ended in 2022. The presence of impaired fasting glucose (IFG) in this study population was the primary outcome of interest.</p>
<p>The inclusion criteria were as follows: (1) aged 45 years or older;(2) not previously diagnosed with IFG or T2DM at baseline;(3) possessed complete physical examination data from 2018 to 2022 without significant gaps in critical information. Exclusion criteria included: (1) age less than 45 years; (2) prior or recent diagnosis of IFG or T2DM at baseline; (3) use of medications that could influence plasma glucose levels;(4)missing or incomplete data on key clinical parameters such as fasting plasma glucose (FPG).</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Data collection</title>
<p>Demographic and sociological information, including age, gender, and medical history, was collected from all participants in this study. In addition, the results of each participant&#x2019;s physical examination were recorded and laboratory measurements were taken. This involved measuring height, weight, systolic and diastolic blood pressure (SBP and DBP) and calculating the body mass index (BMI) based on the participant&#x2019;s height and weight. All participants underwent fasting for a minimum of 8 hours before morning examinations, during which 3-5ml of venous blood was drawn from the elbow. Fasting plasma glucose (FPG), blood urea nitrogen (BUN), serum uric acid (SUA), serum creatinine (Scr), alanine aminotransferase (ALT), aspartic acid aminotransferase (AST), glutamyl transpeptidase (GGT), alkaline phosphatase (ALP), total cholesterol (TC), triglycerides (TG), high-density lipoprotein cholesterol (HDL), low-density lipoprotein cholesterol (LDL), apolipoprotein A1 (ApoA1), and apolipoprotein B (ApoB) were assessed using the Hitachi 7600 Automatic Biochemistry Analyzer. Furthermore, abdominal ultrasound examinations were performed by trained sonographers, and all participants underwent abdominal ultrasound scans. To accommodate machine learning algorithms that require numeric feature attributes, non-numeric attributes like gender were converted into numeric values. The percentage of missing variables included in this study was less than 30%. The MICE package was used for missing value analysis and multiple interpolation. <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure&#xa0;1</bold>
</xref> illustrates the results of data interpolation. The flowchart of this study is shown in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>The flowchart of this study.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fendo-15-1368225-g001.tif"/>
</fig>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Diagnostic criteria</title>
<p>In accordance with the World Health Organization (WHO) diagnostic criteria for diabetes mellitus established in 1999 (<xref ref-type="bibr" rid="B14">14</xref>), the FPG levels of the participants were classified in this study. A normal blood glucose state was characterized by a fasting glucose level below 6.10 mmol/L and a 2-hour glucose level from the oral glucose tolerance test (OGTT) under 7.80 mmol/L. IFG was defined as a fasting glucose level ranging from 6.10 to 7.00 mmol/L or a 2-hour OGTT glucose level below 7.80 mmol/L, not meeting the diagnostic criteria for diabetes. For the diagnosis of fatty liver, we adhered to the ultrasound criteria outlined in the National Workshop on Fatty Liver and Alcoholic Liver Disease, Chinese Society of Hepatology, Chinese Medical Association (<xref ref-type="bibr" rid="B15">15</xref>). In this study, the diagnosis of fatty liver was confirmed using ultrasound diagnostic criteria proposed at the National Symposium on Fatty Liver and Alcoholic Liver Disease of the Chinese Medical Association Hepatology Branch (<xref ref-type="bibr" rid="B15">15</xref>). Specifically, fatty liver diagnosis relied on one of the following criteria: (1) noticeable enhancement of near-field liver echoes surpassing that of the kidney; (2) indistinct intrahepatic duct structure; (3) gradual attenuation of liver echoes in the far field. Gallstones were identified through ultrasound imaging, typically presenting as one or more intense echoes within the gallbladder, extrahepatic bile duct, or intrahepatic bile duct, accompanied by movable acoustic shadows (<xref ref-type="bibr" rid="B16">16</xref>). A history of cholecystectomy denoted prior gallbladder removal, where the gallbladder was no longer visible on ultrasound (<xref ref-type="bibr" rid="B17">17</xref>). The term &#x201c;gallstones&#x201d; encompassed both the presence of gallstones and prior cholecystectomy (<xref ref-type="bibr" rid="B18">18</xref>).</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Development of models</title>
<p>The data collected from the medical checkup center of the First Affiliated Hospital of Soochow University were randomly divided into a training set and a validation set at a ratio of 8:2. Feature selection, parameter tuning, and model building were performed in the training set, while the validation set was used to evaluate the predictive performance of the models. Variable selection was the main step before the modelling. Boruta algorithm belongs to one of the random forest algorithms, its main purpose is to screen and sort important characteristic variables related to dependent variables. In each iteration, a comparison of the importance of the original and shadow variables is performed. If the importance of the original variable is significantly higher than the importance of the shadow variable, the original variable is considered important; if the importance of the original variable is significantly lower than the importance of the shadow variable, the original variable is considered unimportant (<xref ref-type="bibr" rid="B19">19</xref>). Boruta algorithm reaches a specified limit of random forest operation after 99 iterations. All variables were selected by Boruta algorithm and then multivariate logistic regression analysis was performed. The filtered variables were incorporated into the ML model. ML prediction models were developed, comprising XGBoost, SVM, Naive Bayes, DT and LR models. To determine the optimal parameters, a 5-fold cross-validation grid search was executed on the training set, while the LR model was implemented with default parameters.</p>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>The evaluation and interpretation of models</title>
<p>Based on the prediction results of the model, the area under the receiver operating characteristic curve (AUC) of the training and validation sets can be calculated to assess the discriminative ability of the model. The confusion matrix, consisting of true positives (TP), true negatives (TF), false positives (FP), and false negatives (FN), was established to calculate Sensitivity, Specificity, positive predictive value (PPV), negative predictive value (NPV), accuracy (ACC), and F1 scores for evaluating discrimination performance of models. Formulas were as follows: Sensitivity=Recall= TP/(TP+FN); Specificity= TN/(TN+FP); ACC = (TP + TN)/(TP + FP + FN + TN); PPV = Precision = TP/(TP + NP); NPV = TN/(TN + FN); F1 score= 2*Recall*Precision/(Recall +Precision). Calibration curves plotted after sampling with repetition 500 times using the Bootstrap method reflect the fitting of the models. The decision curves (DCA) were used to assess the clinical utility of the predictive model. To gain insight into the factors that contribute to the development of IFG in middle-aged and elderly people, SHapley Additive Explanations (SHAP) plots were further developed in this study. These plots visually demonstrated the variable contributions to the outcome, with local SHAP plots providing a detailed look at variable contributions for specific instances. Feature importance was ranked according to the Shapley value. In addition, force plots within the SHAP model were used to individualize predictions for two randomly selected samples from the validation set.</p>
</sec>
<sec id="s2_6">
<label>2.6</label>
<title>Statistical analysis</title>
<p>Statistical analysis was performed using SPSS (version 27.0) to describe the general data of all individuals. The Shapiro&#x2013;Wilk test was employed to ascertain the normal distribution of variables. Continuous variables displaying a normal distribution were presented as mean &#xb1; standard deviation (SD), while those with skewed distribution were described as median (interquartile ranges). Categorical variables were represented as frequencies. To compare variables between groups, the Pearson Chi-square test was used for categorical variables, while the Student&#x2019;s t-test or the nonparametric Mann-Whitney U test was applied for continuous variables. All variables, determined as significant and tentative through Boruta algorithm screening, were incorporated into the multivariate logistic regression analysis. The logistic regression analysis was conducted using Enter approach, with &#x3b1; in = 0.05, &#x3b1; out = 0.1, and an inspection level of &#x3b1; = 0.05. SPSS (version 27.0) was also used to draw box plots to depict the independent risk factors associated with the onset of IFG in the middle-aged and elderly people identified in this study. The data interpolation, feature selection, model construction, evaluation, and visualization were executed using R software (version 4.2.3). The main packages involved are &#x201c;mice, Boruta, caret, xgboost, shap for xgboost, shapviz, Resource Selection, rms&#x201d;. A two-sided <italic>p</italic> &lt; 0.05 was considered statistically significant.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Demographic and clinical characteristics</title>
<p>A total of 1,855 individuals were included in the cohort, with 734 cases (39.0%) of impaired fasting glucose (IFG) observed within the entire cohort. They were randomly split into training (n = 1,438) and validation (n = 417) sets in an 8:2 ratio. In the training dataset, 73.0% (1,050/1,438) were male, and 27.0% (388/1,438) were female. The median age was 56 years (IQR = 50&#x2013;67 years) for the IFG group and 55 years (IQR = 49&#x2013;65 years) for the non-IFG group. In the test dataset, IFG onset was more common among male patients, with median ages ranging from 60 to 71 years. Detailed demographic and clinical characteristics are presented in <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Demographic and clinical characteristics of participants.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" rowspan="2" align="center">Variables</th>
<th valign="top" colspan="4" align="center">The training dataset(n=1,438)</th>
<th valign="top" colspan="3" align="center">The validation dataset(n=417)</th>
</tr>
<tr>
<th valign="top" align="center">Group</th>
<th valign="top" align="center">IFG(n=570)</th>
<th valign="top" align="center">Non-IFG(n=868)</th>
<th valign="top" align="center">
<italic>p</italic>-value</th>
<th valign="top" align="center">IFG(n=164)</th>
<th valign="top" align="center">Non-IFG(n=253)</th>
<th valign="top" align="center">
<italic>p</italic>-value</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Age(years) (median [IQR])</td>
<td valign="top" align="left"/>
<td valign="top" align="center">56.00[50.00,67.00]</td>
<td valign="top" align="center">55.00[49.00,65.00]</td>
<td valign="top" align="center">0.003</td>
<td valign="top" align="center">63.00[60.00,71.00]</td>
<td valign="top" align="center">62.50[54.00,69.00]</td>
<td valign="top" align="center">0.003</td>
</tr>
<tr>
<td valign="top" align="left">Gender(%)</td>
<td valign="top" align="left">Male</td>
<td valign="top" align="center">412(72.3)</td>
<td valign="top" align="center">638(73.5)</td>
<td valign="top" rowspan="2" align="center">0.610</td>
<td valign="top" align="center">118(72.0)</td>
<td valign="top" align="center">189(74.57)</td>
<td valign="top" rowspan="2" align="center">0.533</td>
</tr>
<tr>
<td valign="top" align="left"/>
<td valign="top" align="left">Female</td>
<td valign="top" align="center">158(27.7)</td>
<td valign="top" align="center">230(26.5)</td>
<td valign="top" align="center">46(28.0)</td>
<td valign="top" align="center">64(25.3)</td>
</tr>
<tr>
<td valign="top" align="left">Fattyliver(%)</td>
<td valign="top" align="left">Yes</td>
<td valign="top" align="center">267(46.8)</td>
<td valign="top" align="center">237(27.3)</td>
<td valign="top" rowspan="2" align="center">&lt;0.001</td>
<td valign="top" align="center">68(41.5)</td>
<td valign="top" align="center">75(29.6)</td>
<td valign="top" rowspan="2" align="center">0.013</td>
</tr>
<tr>
<td valign="top" align="left"/>
<td valign="top" align="left">No</td>
<td valign="top" align="center">303(53.2)</td>
<td valign="top" align="center">631(72.7)</td>
<td valign="top" align="center">96(58.5)</td>
<td valign="top" align="center">178(70.4)</td>
</tr>
<tr>
<td valign="top" align="left">Gallstone(%)</td>
<td valign="top" align="left">Yes</td>
<td valign="top" align="center">102(17.9)</td>
<td valign="top" align="center">125 (14.4)</td>
<td valign="top" rowspan="2" align="center">0.090</td>
<td valign="top" align="center">138(84.1)</td>
<td valign="top" align="center">35 (13.8)</td>
<td valign="top" rowspan="2" align="center">0.569</td>
</tr>
<tr>
<td valign="top" align="left"/>
<td valign="top" align="left">No</td>
<td valign="top" align="center">468(82.1)</td>
<td valign="top" align="center">743(85.6)</td>
<td valign="top" align="center">26(15.9)</td>
<td valign="top" align="center">218(86.2)</td>
</tr>
<tr>
<td valign="top" align="left">Cholecystectomy</td>
<td valign="top" align="left">Yes</td>
<td valign="top" align="center">61(10.7)</td>
<td valign="top" align="center">60(6.9)</td>
<td valign="top" rowspan="2" align="center">0.011</td>
<td valign="top" align="center">13(7.9)</td>
<td valign="top" align="center">15(5.9)</td>
<td valign="top" rowspan="2" align="center">0.428</td>
</tr>
<tr>
<td valign="top" align="left"/>
<td valign="top" align="left">No</td>
<td valign="top" align="center">509(89.3)</td>
<td valign="top" align="center">808(93.1)</td>
<td valign="top" align="center">151(92.1)</td>
<td valign="top" align="center">238(94.1)</td>
</tr>
<tr>
<td valign="top" align="left">SBP(mmHg) (median [IQR])</td>
<td valign="top" align="left" rowspan="17"/>
<td valign="top" align="center">136.00[123.00,150.00]</td>
<td valign="top" align="center">128.00[116.00,139.00]</td>
<td valign="top" align="center">&lt;0.001</td>
<td valign="top" align="center">144.00[128.00,157.50]</td>
<td valign="top" align="center">132.00[21.00,145.75]</td>
<td valign="top" align="center">&lt;0.001</td>
</tr>
<tr>
<td valign="top" align="left">DBP(mmHg) (median [IQR])</td>
<td valign="top" align="center">81.00[73.00,87.00]</td>
<td valign="top" align="center">78.00[70.00,84.00]</td>
<td valign="top" align="center">&lt;0.001</td>
<td valign="top" align="center">80.00[71.50,87.00]</td>
<td valign="top" align="center">77.00[70.25,85.75]</td>
<td valign="top" align="center">0.001</td>
</tr>
<tr>
<td valign="top" align="left">WC(cm) (median [IQR])</td>
<td valign="top" align="center">90.00[85.00,96.00]</td>
<td valign="top" align="center">87.00[80.00,93.00]</td>
<td valign="top" align="center">&lt;0.001</td>
<td valign="top" align="center">92.00[84.00,96.00]</td>
<td valign="top" align="center">88.50[81.25,94.75]</td>
<td valign="top" align="center">0.002</td>
</tr>
<tr>
<td valign="top" align="left">BMI(kg/m&#xb2;) (median [IQR])</td>
<td valign="top" align="center">25.25[23.51,27.03]</td>
<td valign="top" align="center">24.49[22.52,26.17]</td>
<td valign="top" align="center">&lt;0.001</td>
<td valign="top" align="center">24.77[22.84,27.18]</td>
<td valign="top" align="center">24.29[22.31,26.22]</td>
<td valign="top" align="center">0.001</td>
</tr>
<tr>
<td valign="top" align="left">BUN(mmol/L) (median [IQR])</td>
<td valign="top" align="center">5.10[4.50,6.00]</td>
<td valign="top" align="center">5.00[4.30,5.90]</td>
<td valign="top" align="center">0.024</td>
<td valign="top" align="center">5.10[4.15,5.90]</td>
<td valign="top" align="center">5.10[4.30,6.20]</td>
<td valign="top" align="center">0.217</td>
</tr>
<tr>
<td valign="top" align="left">Scr(&#x3bc;mol/L) (median [IQR])</td>
<td valign="top" align="center">70.95[60.30,80.03]</td>
<td valign="top" align="center">72.60[61.83,81.78]</td>
<td valign="top" align="center">0.068</td>
<td valign="top" align="center">72.00[61.20,81.45]</td>
<td valign="top" align="center">75.05[62.13,83.40]</td>
<td valign="top" align="center">0.961</td>
</tr>
<tr>
<td valign="top" align="left">SUA (&#x3bc;mol/L) (median [IQR])</td>
<td valign="top" align="center">367.15[313.28,418.15]</td>
<td valign="top" align="center">349.15[289.43,414.40]</td>
<td valign="top" align="center">0.001</td>
<td valign="top" align="center">388.40,319.50,440.95]</td>
<td valign="top" align="center">351.00[292.00,392.98]</td>
<td valign="top" align="center">0.340</td>
</tr>
<tr>
<td valign="top" align="left">ALT(U/L) (median [IQR])</td>
<td valign="top" align="center">21.40[15.60,29.00]</td>
<td valign="top" align="center">18.60[14.00,25.00]</td>
<td valign="top" align="center">&lt;0.001</td>
<td valign="top" align="center">19.00[14.35,27.00]</td>
<td valign="top" align="center">17.25[13.03,22.90]</td>
<td valign="top" align="center">0.060</td>
</tr>
<tr>
<td valign="top" align="left">AST (U/L) (median [IQR])</td>
<td valign="top" align="center">21.00[17.90,25.30]</td>
<td valign="top" align="center">20.50[17.70,23.78]</td>
<td valign="top" align="center">0.038</td>
<td valign="top" align="center">19.00[17.00,23.10]</td>
<td valign="top" align="center">20.00[17.00,23.18]</td>
<td valign="top" align="center">0.110</td>
</tr>
<tr>
<td valign="top" align="left">GGT (U/L) (median [IQR])</td>
<td valign="top" align="center">26.85[19.10,41.73]</td>
<td valign="top" align="center">23.05[15.80,35.30]</td>
<td valign="top" align="center">&lt;0.001</td>
<td valign="top" align="center">29.40[18.00,41.65]</td>
<td valign="top" align="center">21.35[14.20,31.25]</td>
<td valign="top" align="center">0.056</td>
</tr>
<tr>
<td valign="top" align="left">ALP (U/L) (median [IQR])</td>
<td valign="top" align="center">64.85[54.50,77.80]</td>
<td valign="top" align="center">63.45[53.93,75.68]</td>
<td valign="top" align="center">0.140</td>
<td valign="top" align="center">68.00[58.00,82.50]</td>
<td valign="top" align="center">64.95[53.20,76.95]</td>
<td valign="top" align="center">0.280</td>
</tr>
<tr>
<td valign="top" align="left">TC (mmol/L) (median [IQR])</td>
<td valign="top" align="center">4.41[5.04,5.69]</td>
<td valign="top" align="center">4.99[4.42,5.18]</td>
<td valign="top" align="center">0.094</td>
<td valign="top" align="center">4.90[4.31,5.57]</td>
<td valign="top" align="center">5.00[4.42,5.49]</td>
<td valign="top" align="center">0.640</td>
</tr>
<tr>
<td valign="top" align="left">TG (mmol/L) (median [IQR])</td>
<td valign="top" align="center">1.59[1.14,2.15]</td>
<td valign="top" align="center">1.34[0.93,1.91]</td>
<td valign="top" align="center">&lt;0.001</td>
<td valign="top" align="center">1.54[1.21,2.26]</td>
<td valign="top" align="center">1.30[1.00,1.91]</td>
<td valign="top" align="center">0.080</td>
</tr>
<tr>
<td valign="top" align="left">HDL(mmol/L) (median [IQR])</td>
<td valign="top" align="center">1.18[1.00,1.40]</td>
<td valign="top" align="center">1.22[1.05,1.48]</td>
<td valign="top" align="center">0.003</td>
<td valign="top" align="center">1.19[1.06,1.35]</td>
<td valign="top" align="center">1.27[1.06,1.50]</td>
<td valign="top" align="center">0.434</td>
</tr>
<tr>
<td valign="top" align="left">LDL (mmol/L) (median [IQR])</td>
<td valign="top" align="center">2.87[2.31,3.39]</td>
<td valign="top" align="center">2.85[2.36,3.29]</td>
<td valign="top" align="center">0.305</td>
<td valign="top" align="center">3.08[2.55,3.72]</td>
<td valign="top" align="center">2.95[2.44,3.57]</td>
<td valign="top" align="center">0.380</td>
</tr>
<tr>
<td valign="top" align="left">ApoA1(g/L) (median [IQR])</td>
<td valign="top" align="center">1.38[1.25,1.53]</td>
<td valign="top" align="center">1.37[1.25,1.51]</td>
<td valign="top" align="center">0.892</td>
<td valign="top" align="center">1.39[1.22,1.52]</td>
<td valign="top" align="center">1.39[1.27,1.50]</td>
<td valign="top" align="center">0.922</td>
</tr>
<tr>
<td valign="top" align="left">ApoB(g/L) (median [IQR])</td>
<td valign="top" align="center">0.98[0.83,1.14]</td>
<td valign="top" align="center">0.95[0.82,1.08]</td>
<td valign="top" align="center">0.001</td>
<td valign="top" align="center">0.95[0.77,1.08]</td>
<td valign="top" align="center">0.97[0.80,1.09]</td>
<td valign="top" align="center">0.047</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Feature selection</title>
<p>Following 99 iterations, the Boruta algorithm&#x2019;s feature variable screening results are illustrated in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>. The following variables were considered important for their association with IFG: SBP, Fatty liver, BMI, WC, TG, Scr, Age, DBP, ApoB, TC, SUA and GGT. To further clarify the risk or protective factors related to IFG, multivariate regression analysis was conducted between the IFG and non-IFG groups in the training cohort, revealing significant differences in the following variables: SBP, Fatty liver, WC and Scr. This study confirmed that Scr was a protective factor associated with IFG, while SBP, Fatty liver and WC were all risk factors for IFG. The results showed that for every 1 mmHg increase in SBP, the risk of IFG in middle-aged and elderly people increased by 3.0% (OR =1.030, 95% CI: 1.020-1.040). The risk of IFG in middle-aged and elderly people increased by 3.3% for every 1 cm increase in waist circumference (OR = 1.034, 95% CI: 1.009-1.059). The risk of IFG was 50.5% higher in middle-aged and elderly people with fatty liver compared to those without fatty liver disease (OR = 1.657, 95% CI: 1.274-2.156). The risk of IFG in middle-aged and elderly people was elevated by 1.6% for every 1&#x3bc;mol/L decrease in Scr levels (OR = 0.984, 95% CI: 0.975-0.993). Further details are provided in <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>. In the study, box plots describing the distribution of these three continuous variables including SBP, WC and Scr in the IFG and non-IFG groups were further plotted based on the training set. As shown in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>, middle-aged and elderly people who developed IFG over a 5-year period had higher SBP, larger WC and lower Scr than middle-aged and elderly people without IFG.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Results of variable selection by the Boruta method. SBP, systolic blood pressure; WC, waist circumstance; BMI, Body Mass Index; TG, triglycerides; Scr, serum creatinine; ALT, alanine aminotransferase; DBP, diastolic blood pressure; ApoB,  apolipoprotein B; GGT,  glutamyl transpeptidase; SUA,  serum uric acid; AST,  aspartic acid aminotransferase; TC,  total cholesterol; LDL,  low-density lipoprotein cholesterol; HDL,  high-density lipoprotein cholesterol; BUN,  blood urea nitrogen; ALP,  alkaline phosphatase; ApoA1,  apolipoprotein A1; Boruta method applied only to the training dataset.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fendo-15-1368225-g002.tif"/>
</fig>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Multivariate logistic regression analysis of impaired fasting glucose.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" rowspan="2" align="center">Variables</th>
<th valign="top" rowspan="2" align="center">&#x3b2;</th>
<th valign="top" rowspan="2" align="center">Standard Error</th>
<th valign="top" rowspan="2" align="center">Wald &#x3c7;2</th>
<th valign="top" rowspan="2" align="center">OR</th>
<th valign="top" colspan="2" align="center">95%CI</th>
<th valign="top" rowspan="2" align="center">
<italic>p</italic>-value</th>
</tr>
<tr>
<th valign="top" align="center">Lower</th>
<th valign="top" align="center">Upper</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">Fattyliver</td>
<td valign="top" align="center">0.505</td>
<td valign="top" align="center">0.134</td>
<td valign="top" align="center">14.147</td>
<td valign="top" align="center">1.657</td>
<td valign="top" align="center">1.274</td>
<td valign="top" align="center">2.156</td>
<td valign="top" align="center">&lt;0.001</td>
</tr>
<tr>
<td valign="top" align="center">SBP</td>
<td valign="top" align="center">0.030</td>
<td valign="top" align="center">0.005</td>
<td valign="top" align="center">33.960</td>
<td valign="top" align="center">1.030</td>
<td valign="top" align="center">1.020</td>
<td valign="top" align="center">1.040</td>
<td valign="top" align="center">&lt;0.001</td>
</tr>
<tr>
<td valign="top" align="center">WC</td>
<td valign="top" align="center">0.033</td>
<td valign="top" align="center">0.012</td>
<td valign="top" align="center">7.044</td>
<td valign="top" align="center">1.034</td>
<td valign="top" align="center">1.009</td>
<td valign="top" align="center">1.059</td>
<td valign="top" align="center">0.008</td>
</tr>
<tr>
<td valign="top" align="center">Scr</td>
<td valign="top" align="center">-0.016</td>
<td valign="top" align="center">0.005</td>
<td valign="top" align="center">11.533</td>
<td valign="top" align="center">0.984</td>
<td valign="top" align="center">0.975</td>
<td valign="top" align="center">0.993</td>
<td valign="top" align="center">0.001</td>
</tr>
<tr>
<td valign="top" align="center">Constant</td>
<td valign="top" align="center">-5.558</td>
<td valign="top" align="center">0.868</td>
<td valign="top" align="center">40.994</td>
<td valign="top" align="center">0.004</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="top" align="center">&lt;0.001</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>On the basis of the trainingset, box plots of the distribution of the three continuous variables in the IFG and non-IFG groups were further plotted. <bold>(A)</bold> This figure shows the box plot of the distribution of SBP in the IFG and non-IFG groups. <bold>(B)</bold> This figure shows the box plot of the distribution of WC in the IFG and non-IFG groups. <bold>(C)</bold> This figure shows the box plot of the distribution of Scr in the IFG and non-IFG groups.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fendo-15-1368225-g003.tif"/>
</fig>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>The evaluation and interpretation of the models</title>
<p>Four features selected by the Boruta algorithm and multivariate regression analysis were considered as input variables, with the development of IFG as the outcome. Six different algorithms, including XGBoost, SVM, Na&#xef;ve Bayes, DT and LR, were applied to construct prediction models in this study. The AUC, Sensitivity, Specificity, ACC, PPV, NPV, and F1 scores of the model in the validation set were calculated from the confusion matrix results. The details are showed in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>. Among these models, the XGBoost model yielded the highest AUC, indicating superior performance. With the aid of grid search, the optimal structure of the XGBoost model was determined as follows: booster=&#x2018;gbtree&#x2019;, objective=&#x2018;binary:logistic&#x2019;,gamma=0.5, eta=0.06,max_depth=7, min_child_weight=5, subsample=0.65, colsample_bytree=0.72. <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref> displays the superior AUC value achieved by the XGBoost algorithm compared to SVM, Naive Bayes, DT and LR algorithms (0.7391, 0.7328, 0.7288, 0.6480 and 0.6795) respectively. Typically, a model with an AUC greater than 0.7 is considered to have good predictive performance. The calibration curves for the training and validation sets are plotted in this study and are shown in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>. The results demonstrate the internal validation of the XGBoost model using the bootstrap method with 500 repetitions of sampling. The mean absolute errors for the training and validation sets were 0.010 and 0.025, respectively, indicating that the predicted probabilities of the XGBoost model closely aligned with the actual observations. The results of the Hosmer-Lemeshow goodness of fit test(H-L) showed that the model was well fitted (<italic>p</italic>&gt;0.05). The clinical effect of the XGBoost prediction model was evaluated in this study using DCA curves, as shown in <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5</bold>
</xref>, showing that individuals with a higher risk of developing IFG in middle-aged and elderly people as assessed using this XGBoost model may have a higher net benefit value if they were intervened. In addition, the SHAP framework has provided an intuitive interpretation of the XGBoost model, as shown in <xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6</bold>
</xref>.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Predictive performance indicators of prediction models.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" colspan="10" align="left">(a) Prediction performance of the training set.</th>
</tr>
</thead>
<tbody>
<tr>
<th valign="top" rowspan="2" align="center">Model</th>
<th valign="top" rowspan="2" align="center">AUC</th>
<th valign="top" colspan="2" align="center">95%CI</th>
<th valign="top" rowspan="2" align="center">Sensitivity</th>
<th valign="top" rowspan="2" align="center">Specificity</th>
<th valign="top" rowspan="2" align="center">Accuracy</th>
<th valign="top" rowspan="2" align="center">PPV</th>
<th valign="top" rowspan="2" align="center">NPV</th>
<th valign="top" rowspan="2" align="center">F1 score</th>
</tr>
<tr>
<th valign="top" align="center">Lower</th>
<th valign="top" align="center">Upper</th>
</tr>
<tr>
<td valign="top" align="center">
<bold>XGBoost</bold>
</td>
<td valign="top" align="center">0.8264</td>
<td valign="top" align="center">0.8079</td>
<td valign="top" align="center">0.8503</td>
<td valign="top" align="center">0.8561</td>
<td valign="top" align="center">0.6636</td>
<td valign="top" align="center">0.7399</td>
<td valign="top" align="center">0.6256</td>
<td valign="top" align="center">0.8754</td>
<td valign="top" align="center">0.7230</td>
</tr>
<tr>
<td valign="top" align="center">
<bold>SVM</bold>
</td>
<td valign="top" align="center">0.7230</td>
<td valign="top" align="center">0.7501</td>
<td valign="top" align="center">0.6959</td>
<td valign="top" align="center">0.2052</td>
<td valign="top" align="center">0.4516</td>
<td valign="top" align="center">0.3540</td>
<td valign="top" align="center">0.1973</td>
<td valign="top" align="center">0.4639</td>
<td valign="top" align="center">0.2012</td>
</tr>
<tr>
<td valign="top" align="center">
<bold>Naive Bayes</bold>
</td>
<td valign="top" align="center">0.6827</td>
<td valign="top" align="center">0.6548</td>
<td valign="top" align="center">0.7106</td>
<td valign="top" align="center">0.7673</td>
<td valign="top" align="center">0.4614</td>
<td valign="top" align="center">0.6460</td>
<td valign="top" align="center">0.5656</td>
<td valign="top" align="center">0.6845</td>
<td valign="top" align="center">0.7235</td>
</tr>
<tr>
<td valign="top" align="center">
<bold>DT</bold>
</td>
<td valign="top" align="center">0.6796</td>
<td valign="top" align="center">0.6557</td>
<td valign="top" align="center">0.7035</td>
<td valign="top" align="center">0.8318</td>
<td valign="top" align="center">0.4474</td>
<td valign="top" align="center">0.6794</td>
<td valign="top" align="center">0.6360</td>
<td valign="top" align="center">0.7030</td>
<td valign="top" align="center">0.7580</td>
</tr>
<tr>
<td valign="top" align="center">
<bold>LR</bold>
</td>
<td valign="top" align="center">0.6912</td>
<td valign="top" align="center">0.6637</td>
<td valign="top" align="center">0.7189</td>
<td valign="top" align="center">0.3947</td>
<td valign="top" align="center">0.8157</td>
<td valign="top" align="center">0.6488</td>
<td valign="top" align="center">0.5844</td>
<td valign="top" align="center">0.6724</td>
<td valign="top" align="center">0.4712</td>
</tr>
</tbody>
</table>
<table frame="hsides">
<thead>
<tr>
<th valign="top" colspan="10" align="left">(b) Prediction performance of the validation set.</th>
</tr>
</thead>
<tbody>
<tr>
<th valign="top" rowspan="2" align="center">Model</th>
<th valign="top" rowspan="2" align="center">AUC</th>
<th valign="top" colspan="2" align="center">95%CI</th>
<th valign="top" rowspan="2" align="center">Sensitivity</th>
<th valign="top" rowspan="2" align="center">Specificity</th>
<th valign="top" rowspan="2" align="center">Accuracy</th>
<th valign="top" rowspan="2" align="center">PPV</th>
<th valign="top" rowspan="2" align="center">NPV</th>
<th valign="top" rowspan="2" align="center">F1 score</th>
</tr>
<tr>
<th valign="top" align="center">Lower</th>
<th valign="top" align="center">Upper</th>
</tr>
<tr>
<td valign="top" align="left">
<bold>XGBoost</bold>
</td>
<td valign="top" align="center">0.7391</td>
<td valign="top" align="center">0.6911</td>
<td valign="top" align="center">0.7870</td>
<td valign="top" align="center">0.7561</td>
<td valign="top" align="center">0.6245</td>
<td valign="top" align="center">0.6763</td>
<td valign="top" align="center">0.5662</td>
<td valign="top" align="center">0.7980</td>
<td valign="top" align="center">0.6475</td>
</tr>
<tr>
<td valign="top" align="left">
<bold>SVM</bold>
</td>
<td valign="top" align="center">0.7328</td>
<td valign="top" align="center">0.6838</td>
<td valign="top" align="center">0.7819</td>
<td valign="top" align="center">0.1646</td>
<td valign="top" align="center">0.4980</td>
<td valign="top" align="center">0.3669</td>
<td valign="top" align="center">0.1753</td>
<td valign="top" align="center">0.4791</td>
<td valign="top" align="center">0.1698</td>
</tr>
<tr>
<td valign="top" align="left">
<bold>Naive Bayes</bold>
</td>
<td valign="top" align="center">0.7288</td>
<td valign="top" align="center">0.6804</td>
<td valign="top" align="center">0.7773</td>
<td valign="top" align="center">0.7787</td>
<td valign="top" align="center">0.5549</td>
<td valign="top" align="center">0.6906</td>
<td valign="top" align="center">0.6190</td>
<td valign="top" align="center">0.7296</td>
<td valign="top" align="center">0.7533</td>
</tr>
<tr>
<td valign="top" align="left">
<bold>DT</bold>
</td>
<td valign="top" align="center">0.6480</td>
<td valign="top" align="center">0.6027</td>
<td valign="top" align="center">0.6932</td>
<td valign="top" align="center">0.8142</td>
<td valign="top" align="center">0.4817</td>
<td valign="top" align="center">0.6835</td>
<td valign="top" align="center">0.6270</td>
<td valign="top" align="center">0.7079</td>
<td valign="top" align="center">0.7235</td>
</tr>
<tr>
<td valign="top" align="left">
<bold>LR</bold>
</td>
<td valign="top" align="center">0.6795</td>
<td valign="top" align="center">0.6275</td>
<td valign="top" align="center">0.7316</td>
<td valign="top" align="center">0.3598</td>
<td valign="top" align="center">0.8735</td>
<td valign="top" align="center">0.6715</td>
<td valign="top" align="center">0.6484</td>
<td valign="top" align="center">0.6779</td>
<td valign="top" align="center">0.4627</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>AUC, Area Under the Curve; Extreme Gradient Boosting, XGBoost; DT, Decision Tree; SVM, Support Vector Machine; LR, Logistic Regression; PPV, Positive Predictive Value; NPV, Negative Predictive Value.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Calibration plot. The x&#x2010;axis represents the XGBoost model&#x2010;predicted probability, and the y-axis represents the actual probability of IFG. <bold>(A)</bold> The plot shows the calibration curve of the training set. A perfect prediction would fall along the 45&#x2010;degree line (&#x201c;ideal&#x201d; line). The &#x201c;apparent&#x201d; line represents the training cohort, and the solid black line represents bias corrected by bootstrapping (500 repetitions), indicating observed the performance of XGBoost model. <bold>(B) </bold>The plot shows the calibration curve of the validation set. A perfect prediction would fall along the 45degree line (&#x201c;ideal&#x201d; line). The &#x201c;apparent&#x201d; line represents the validation cohort, and the solid black line represents bias corrected by bootstrapping (500 repetitions), indicating observed the performance of XGBoost model.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fendo-15-1368225-g004.tif"/>
</fig>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>DCA plot. <bold>(A)</bold>The plot shows the calibration curve of the training set. <bold>(B)</bold>The plot shows the calibration curve of the validation set.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fendo-15-1368225-g005.tif"/>
</fig>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>The SHAP plot of the XGBoost model. The ordinate represents the name of the variable. The variables from top to bottom are of decreasing importance to the predicted results and the number beside the variables is the mean of the SHAP values for all samples. Each point in the graph represents the SHAP value for each sample, with colors closer to purple indicating a larger value and closer to yellow indicating a smaller value. The more dispersed the points in the graph, the greater the influence of the variable on the model. <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5</bold>
</xref> shows that SBP has the greatest impact on the model.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fendo-15-1368225-g006.tif"/>
</fig>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Individualized prediction of IFG</title>
<p>
<xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref> illustrates the SHAP analysis, showcasing the role of important variables on individual predictions in 2 randomly selected samples from the validation set. The local SHAP plots illustrated the contributions of variables to the outcomes for each sample. In <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref>, the purple portion of the local SHAP force plot represents support for a positive prediction, while the yellow portion indicates support for a negative prediction. The length of the feature lines corresponds to the size of their contribution. For example, case 1 exhibited a high predicted probability of 0.82 for progressing to IFG, as predicted by the XGBoost model. SBP was the most significant feature contributing to the prediction, followed by Scr, Fatty liver and WC. This was enough to confirm the usefulness of the XGBoost model and contribute to increasing doctors&#x2019; trust in the predictive model to help them make the right auxiliary decisions.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Predicted outcome of individual occurrence of IFG. <bold>(A)</bold> The local SHAP plot of the case#1. Case #1 Male, 72 years old, with a positive outcome at the end of follow-up and the model predicts an 81.9% probability of IFG in this study individual. <bold>(B)</bold> The local SHAP plot of the case#2. Case #2 Female, 54 years old, with a negative outcome at the finish of follow-up and the model predicts an 16.3% probability of IFG in this study individual.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fendo-15-1368225-g007.tif"/>
</fig>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>The strength of this study lies in the establishment of a retrospective cohort using clinical data from longitudinal physical examinations of middle-aged and elderly people from 2018 to 2022. The analysis of the original data can accurately reflect the real-world problems, and the results of the study have important reference value for further in-depth discussions. Furthermore, a prediction model for development of IFG was constructed by integrating the XGBoost algorithm. SHAP plots were used to visualize the model, which solved the limitations imposed by the &#x201c;black box&#x201d; nature of traditional machine learning. Boruta&#x2019;s algorithm screened a total of twelve features deemed important in relation to IFG. To further clarify the risk or protective factors related to IFG, multivariate regression analysis was conducted between the IFG and non-IFG groups in the training cohort. Four features were ultimately selected for inclusion in the model and it was concluded that all of these variables are readily available in routine medical practice and therefore have clinical value. In clinical practice, the accurate identification or screening of IFG presents a crucial opportunity to control diabetes progression and reduce its associated harm.</p>
<p>Research has indicated that insulin sensitivity gradually decreases over time, a phenomenon that manifests a decade before the diagnosis of T2DM (<xref ref-type="bibr" rid="B20">20</xref>). This finding provides evidence that insulin resistance(IR) and the function of pancreatic &#x3b2;-cells are diminished when the body is in the stage of IFG (<xref ref-type="bibr" rid="B21">21</xref>). Hypertension is a common diabetes complication, with a study by Emdin et&#xa0;al. revealing that for every 20 mmHg increase in SBP, the risk of new-onset T2DM rises by approximately 58% (<xref ref-type="bibr" rid="B22">22</xref>). Nonlinear associations between blood pressure and diabetes risk have been observed, with SBP demonstrating a J-shaped curve, while DBP shows a U-shaped curve (<xref ref-type="bibr" rid="B23">23</xref>). Additionally, this study underscores the interaction effect of high SBP (&gt;200 mmHg), low DBP (&lt;69 mmHg), and age (&gt;50 years) in increasing diabetes risk. SBP emerges as a more significant contributor to dysglycemia compared to DBP, particularly in individuals over 50 years old (<xref ref-type="bibr" rid="B24">24</xref>). These findings align with the present study, which suggest that elevated SBP has a greater influence on IFG occurrence than elevated DBP. WC reflects abdominal obesity, which, when present, leads to IR, characterized by impaired insulin response in peripheral tissues, and altered glucose uptake and utilization (<xref ref-type="bibr" rid="B25">25</xref>). Several studies, including this one, have found strong correlations between IFG and obesity-related indicators such as WC and BMI, underscoring their ability to shift glucose metabolism from normal to impaired in middle-aged and elderly people.</p>
<p>The SHAP algorithm, rooted in game theory, allows for the analysis of feature contributions to model predictions, offering both local and global interpretations. Each feature is considered a contributor, and its marginal contribution is calculated when added to the model (<xref ref-type="bibr" rid="B26">26</xref>). The SHAP model&#x2019;s key advantage is its capacity to reflect the influence of features in each sample, including both positive and negative effects on predictions. SHAP analysis in this study demonstrates that lower Scr levels are associated with a higher likelihood of IFG. This aligns with previous research suggesting that low creatinine levels may correlate with IFG onset, even after adjusting for variables such as age, BMI, and SBP (<xref ref-type="bibr" rid="B27">27</xref>). Creatinine levels serve as a proxy for skeletal muscle mass, with lower levels indicating reduced muscle volume and fewer insulin targets (<xref ref-type="bibr" rid="B28">28</xref>). Thus, lower Scr levels may contribute to IFG (<xref ref-type="bibr" rid="B29">29</xref>). The results obtained by the model helped better understand the importance of each feature to the model&#x2019;s prediction. Among the indicators detected by the model, the four most closely related to IFG were SBP, Fatty liver, WC and Scr. The high correlation between the above variables and IFG further emphasized the importance of early intervention in preventing development of IFG and T2DM.</p>
<p>There are some theoretical and practical limitations in this study. At present, deep learning algorithms can be used for both structured and unstructured data. In the future, unstructured data needs to be collected and combined with deep learning algorithms applied to the research field of risk prediction models for predicting the development of IFG. Secondly, in the practice of data collection, the number of participants in this study is small, so it is necessary to further expand the sample population and joint multi-center studies in the future. Potential factors contributing to IFG, such as dietary habits and lifestyle, were not considered in this study, and future studies should incorporate these variables with genetic information and nutritional intake for a more comprehensive understanding of IFG.</p>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusions</title>
<p>In conclusion, this cohort study developed a predictive model for IFG development using the XGBoost algorithm, demonstrating promising performance. This effective computer-assisted approach can aid frontline clinicians in recognizing and intervening in IFG development. Consequently, it advances the frontiers of T2DM prevention through more effective early identification and mitigation of the disease&#x2019;s negative impact on middle-aged and elderly people.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec id="s7" sec-type="ethics-statement">
<title>Ethics statement</title>
<p>This study was approved by the Ethics Committee of the First Affiliated Hospital of Soochow University (Number: 2023-285). The studies were conducted in accordance with the local legislation and institutional requirements. Written informed consent for participation was not required from the participants or the participants&#x2019; legal guardians/next of kin in accordance with the national legislation and institutional requirements.</p>
</sec>
<sec id="s8" sec-type="author-contributions">
<title>Author contributions</title>
<p>QC: Conceptualization, Data curation, Investigation, Software, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. JP: Data curation, Methodology, Writing &#x2013; review &amp; editing. WL: Data curation, Formal analysis, Methodology, Writing &#x2013; original draft. YZ: Conceptualization, Data curation, Software, Writing &#x2013; original draft. JL: Data curation, Methodology, Software, Visualization, Writing &#x2013; original draft. LL: Conceptualization, Data curation, Methodology, Software, Writing &#x2013; original draft. PX: Data curation, Software, Supervision, Writing &#x2013; review &amp; editing. JZ: Conceptualization, Methodology, Supervision, Validation, Visualization, Writing &#x2013; review &amp; editing. MH: Conceptualization, Data curation, Funding acquisition, Investigation, Resources, Visualization, Writing &#x2013; review &amp; editing.</p>
</sec>
<sec id="s9" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This research received financial support from the National Natural Science Foundation of China (Grant No. 81901262), the Natural Science Foundation of Jiangsu Province (Grant No. BK20190171), the Elderly Health Research Project of Jiangsu Province (Grant No. LR2021010), the Science and Technology Plan Projects of Suzhou (Grant No. SKY2023164), and Jiangsu Provincial Government Scholarship for Overseas Studies.</p>
</sec>
<ack>
<title>Acknowledgments</title>
<p>We would like to thank the First Affiliated Hospital of Soochow University for help with data management and collection.</p>
</ack>
<sec id="s10" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s12" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fendo.2024.1368225/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fendo.2024.1368225/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="DataSheet1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hinault</surname> <given-names>C</given-names>
</name>
<name>
<surname>Caroli-Bosc</surname> <given-names>P</given-names>
</name>
<name>
<surname>Bost</surname> <given-names>F</given-names>
</name>
<name>
<surname>Chevalier</surname> <given-names>N</given-names>
</name>
</person-group>. <article-title>Critical overview on endocrine disruptors in diabetes mellitus</article-title>. <source>Int J Mol Sci</source>. (<year>2023</year>) <volume>24</volume>:<elocation-id>4537</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/ijms24054537</pub-id>
</citation>
</ref>
<ref id="B2">
<label>2</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Teng</surname> <given-names>D</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>X</given-names>
</name>
<name>
<surname>Qin</surname> <given-names>G</given-names>
</name>
<name>
<surname>Qin</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Quan</surname> <given-names>H</given-names>
</name>
<etal/>
</person-group>. <article-title>Prevalence of diabetes recorded in mainland China using 2018 diagnostic criteria from the American Diabetes Association: national cross sectional study</article-title>. <source>BMJ</source>. (<year>2020</year>) <volume>369</volume>:<elocation-id>m997</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1136/bmj.m997</pub-id>
</citation>
</ref>
<ref id="B3">
<label>3</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vijayakumar</surname> <given-names>G</given-names>
</name>
<name>
<surname>Manghat</surname> <given-names>S</given-names>
</name>
<name>
<surname>Vijayakumar</surname> <given-names>R</given-names>
</name>
<name>
<surname>Simon</surname> <given-names>L</given-names>
</name>
<name>
<surname>Scaria</surname> <given-names>LM</given-names>
</name>
<name>
<surname>Vijayakumar</surname> <given-names>A</given-names>
</name>
<etal/>
</person-group>. <article-title>Incidence of type 2 diabetes mellitus and prediabetes in Kerala, India: results from a 10-year prospective cohort</article-title>. <source>BMC Public Health</source>. (<year>2019</year>) <volume>19</volume>:<fpage>140</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12889-019-6445-6</pub-id>
</citation>
</ref>
<ref id="B4">
<label>4</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Glechner</surname> <given-names>A</given-names>
</name>
<name>
<surname>Keuchel</surname> <given-names>L</given-names>
</name>
<name>
<surname>Affengruber</surname> <given-names>L</given-names>
</name>
<name>
<surname>Titscher</surname> <given-names>V</given-names>
</name>
<name>
<surname>Sommer</surname> <given-names>I</given-names>
</name>
<name>
<surname>Matyas</surname> <given-names>N</given-names>
</name>
<etal/>
</person-group>. <article-title>Effects of lifestyle changes on adults with prediabetes: A systematic review and meta-analysis</article-title>. <source>Prim Care Diabetes</source>. (<year>2018</year>) <volume>12</volume>:<fpage>393</fpage>&#x2013;<lpage>408</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.pcd.2018.07.003</pub-id>
</citation>
</ref>
<ref id="B5">
<label>5</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Byeon</surname> <given-names>H</given-names>
</name>
</person-group>. <article-title>Exploring the risk factors of impaired fasting glucose in middle-aged population living in South Korean communities by using categorical boosting machine</article-title>. <source>Front Endocrinol (Lausanne)</source>. (<year>2022</year>) <volume>13</volume>:<elocation-id>1013162</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fendo.2022.1013162</pub-id>
</citation>
</ref>
<ref id="B6">
<label>6</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abdullah</surname> <given-names>K</given-names>
</name>
<name>
<surname>Jacob</surname> <given-names>S</given-names>
</name>
<name>
<surname>Hussain</surname> <given-names>HY</given-names>
</name>
<name>
<surname>Salim</surname> <given-names>NA</given-names>
</name>
</person-group>. <article-title>Prediabetes Risk Assessment among Nurses Recruited in a Dubai Local Government Hospital: A Cross-Sectional Study</article-title>. <source>Int J Diabetes Metab</source>. (<year>2019</year>) <volume>25</volume>:<page-range>39&#x2013;44</page-range>.  doi: <pub-id pub-id-type="doi">10.1159/000500913</pub-id>
</citation>
</ref>
<ref id="B7">
<label>7</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luo</surname> <given-names>J</given-names>
</name>
<name>
<surname>Pan</surname> <given-names>M</given-names>
</name>
<name>
<surname>Mo</surname> <given-names>K</given-names>
</name>
<name>
<surname>Mao</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Zou</surname> <given-names>D</given-names>
</name>
</person-group>. <article-title>Emerging role of artificial intelligence in diagnosis, classification and clinical management of glioma</article-title>. <source>Semin Cancer Biol</source>. (<year>2023</year>) <volume>S1044-579X</volume>(<issue>23</issue>)<elocation-id>00045&#x2013;7</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.semcancer.2023.03.006</pub-id>
</citation>
</ref>
<ref id="B8">
<label>8</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname> <given-names>T</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>W</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>L</given-names>
</name>
<name>
<surname>He</surname> <given-names>X</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y</given-names>
</name>
<name>
<surname>You</surname> <given-names>M</given-names>
</name>
<etal/>
</person-group>. <article-title>A machine learning-based framework to identify type 2 diabetes through electronic health records</article-title>. <source>Int J Med Inform</source>. (<year>2017</year>) <volume>97</volume>:<page-range>120&#x2013;7</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ijmedinf.2016.09.014</pub-id>
</citation>
</ref>
<ref id="B9">
<label>9</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Verma</surname> <given-names>AA</given-names>
</name>
<name>
<surname>Murray</surname> <given-names>J</given-names>
</name>
<name>
<surname>Greiner</surname> <given-names>R</given-names>
</name>
<name>
<surname>Cohen</surname> <given-names>JP</given-names>
</name>
<name>
<surname>Shojania</surname> <given-names>KG</given-names>
</name>
<name>
<surname>Ghassemi</surname> <given-names>M</given-names>
</name>
<etal/>
</person-group>. <article-title>Implementing machine learning in medicine</article-title>. <source>CMAJ</source>. (<year>2021</year>) <volume>193</volume>:<page-range>E1351&#x2013;7</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1503/cmaj.202434</pub-id>
</citation>
</ref>
<ref id="B10">
<label>10</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zivkovic</surname> <given-names>M</given-names>
</name>
<name>
<surname>Bacanin</surname> <given-names>N</given-names>
</name>
<name>
<surname>Antonijevic</surname> <given-names>M</given-names>
</name>
<name>
<surname>Nikolic</surname> <given-names>B</given-names>
</name>
<name>
<surname>Kvascev</surname> <given-names>G</given-names>
</name>
<name>
<surname>Marjanovic</surname> <given-names>M</given-names>
</name>
<etal/>
</person-group>. <article-title>Hybrid CNN and XGBoost model tuned by modified arithmetic optimization algorithm for COVID-19 early diagnostics from X-ray images</article-title>. <source>Electronics</source>. (<year>2022</year>) <volume>11</volume>:<elocation-id>3798</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/electronics11223798</pub-id>
</citation>
</ref>
<ref id="B11">
<label>11</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Malakar</surname> <given-names>S</given-names>
</name>
<name>
<surname>Ghosh</surname> <given-names>M</given-names>
</name>
<name>
<surname>Bhowmik</surname> <given-names>S</given-names>
</name>
<name>
<surname>Sarkar</surname> <given-names>R</given-names>
</name>
<name>
<surname>Nasipuri</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>A GA based hierarchical feature selection approach for handwritten word recognition</article-title>. <source>Neural Comput Applic</source>. (<year>2020</year>) <volume>32</volume>:<page-range>2533&#x2013;52</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00521-018-3937-8</pub-id>
</citation>
</ref>
<ref id="B12">
<label>12</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bacanin</surname> <given-names>N</given-names>
</name>
<name>
<surname>Stoean</surname> <given-names>R</given-names>
</name>
<name>
<surname>Zivkovic</surname> <given-names>M</given-names>
</name>
<name>
<surname>Petrovic</surname> <given-names>A</given-names>
</name>
<name>
<surname>Rashid</surname> <given-names>TA</given-names>
</name>
<name>
<surname>Bezdan</surname> <given-names>T</given-names>
</name>
</person-group>. <article-title>Performance of a novel chaotic firefly algorithm with enhanced exploration for tackling global optimization problems: application for dropout regularization</article-title>. <source>Mathematics</source>. (<year>2021</year>) <volume>9</volume>:<elocation-id>2705</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/math9212705</pub-id>
</citation>
</ref>
<ref id="B13">
<label>13</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bacanin</surname> <given-names>N</given-names>
</name>
<name>
<surname>Zivkovic</surname> <given-names>M</given-names>
</name>
<name>
<surname>Al-Turjman</surname> <given-names>F</given-names>
</name>
<name>
<surname>Venkatachalam</surname> <given-names>K</given-names>
</name>
<name>
<surname>Trojovsk&#xfd;</surname> <given-names>P</given-names>
</name>
<name>
<surname>Strumberger</surname> <given-names>I</given-names>
</name>
<etal/>
</person-group>. <article-title>Hybridized sine cosine algorithm with convolutional neural networks dropout regularization application</article-title>. <source>Sci Rep</source>. (<year>2022</year>) <volume>12</volume>:<fpage>6302</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-022-09744-2</pub-id>
</citation>
</ref>
<ref id="B14">
<label>14</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alberti</surname> <given-names>KG</given-names>
</name>
<name>
<surname>Zimmet</surname> <given-names>PZ</given-names>
</name>
</person-group>. <article-title>Definition, diagnosis and classification of diabetes mellitus and its complications. Part 1: diagnosis and classification of diabetes mellitus provisional report of a WHO consultation</article-title>. <source>Diabetes Med</source>. (<year>1998</year>) <volume>15</volume>:<page-range>539&#x2013;53</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/(SICI)1096-9136(199807)15:7&lt;539::AID-DIA668&gt;3.0.CO;2-S</pub-id>
</citation>
</ref>
<ref id="B15">
<label>15</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<collab>National Workshop on Fatty Liver and Alcoholic Liver Disease, Chinese Society of Hepatology, Chinese Medical Association, Fatty Liver Expert Committee, Chinese Medical Doctor Association</collab>
</person-group>. <article-title>[Guidelines of prevention and treatment for nonalcoholic fatty liver disease: a 2018 update]</article-title>. <source>Zhonghua Gan Zang Bing Za Zhi</source>. (<year>2018</year>) <volume>26</volume>:<fpage>195</fpage>&#x2013;<lpage>203</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3760/cma.j.issn.1007-3418.2018.03.008</pub-id>
</citation>
</ref>
<ref id="B16">
<label>16</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Walcher</surname> <given-names>T</given-names>
</name>
<name>
<surname>Haenle</surname> <given-names>MM</given-names>
</name>
<name>
<surname>Kron</surname> <given-names>M</given-names>
</name>
<name>
<surname>Hay</surname> <given-names>B</given-names>
</name>
<name>
<surname>Mason</surname> <given-names>RA</given-names>
</name>
<name>
<surname>Walcher</surname> <given-names>D</given-names>
</name>
<etal/>
</person-group>. <article-title>Vitamin C supplement use may protect against gallstones: an observational study on a randomly selected population</article-title>. <source>BMC Gastroenterol</source>. (<year>2009</year>) <volume>9</volume>:<elocation-id>74</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/1471-230X-9-74</pub-id>
</citation>
</ref>
<ref id="B17">
<label>17</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname> <given-names>Q</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>X</given-names>
</name>
<name>
<surname>Ji</surname> <given-names>X</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>L</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>J</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>C</given-names>
</name>
<etal/>
</person-group>. <article-title>The association between gallstones and metabolic syndrome in urban Han Chinese: a longitudinal cohort study</article-title>. <source>Sci Rep</source>. (<year>2016</year>) <volume>6</volume>:<elocation-id>29937</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/srep29937</pub-id>
</citation>
</ref>
<ref id="B18">
<label>18</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>C-M</given-names>
</name>
<name>
<surname>Tung</surname> <given-names>T-H</given-names>
</name>
<name>
<surname>Chou</surname> <given-names>P</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>VT-K</given-names>
</name>
<name>
<surname>Hsu</surname> <given-names>C-T</given-names>
</name>
<name>
<surname>Chien</surname> <given-names>W-S</given-names>
</name>
<etal/>
</person-group>. <article-title>Clinical correlation of gallstone disease in a Chinese population in Taiwan: experience at Cheng Hsin General Hospital</article-title>. <source>World J Gastroenterol</source>. (<year>2006</year>) <volume>12</volume>:<page-range>1281&#x2013;6</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.3748/wjg.v12.i8.1281</pub-id>
</citation>
</ref>
<ref id="B19">
<label>19</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kursa</surname> <given-names>MB</given-names>
</name>
<name>
<surname>Rudnicki</surname> <given-names>WR</given-names>
</name>
</person-group>. <article-title>Feature selection with boruta package</article-title>. <source>J Stat Software</source>. (<year>2010</year>) <volume>36</volume>:<fpage>1</fpage>&#x2013;<lpage>13</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.18637/jss.v036.i11</pub-id>
</citation>
</ref>
<ref id="B20">
<label>20</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hulman</surname> <given-names>A</given-names>
</name>
<name>
<surname>Simmons</surname> <given-names>RK</given-names>
</name>
<name>
<surname>Brunner</surname> <given-names>EJ</given-names>
</name>
<name>
<surname>Witte</surname> <given-names>DR</given-names>
</name>
<name>
<surname>F&#xe6;rch</surname> <given-names>K</given-names>
</name>
<name>
<surname>Vistisen</surname> <given-names>D</given-names>
</name>
<etal/>
</person-group>. <article-title>Trajectories of glycaemia, insulin sensitivity and insulin secretion in South Asian and white individuals before diagnosis of type 2 diabetes: a longitudinal analysis from the Whitehall II cohort study</article-title>. <source>Diabetologia</source>. (<year>2017</year>) <volume>60</volume>:<page-range>1252&#x2013;60</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00125-017-4275-6</pub-id>
</citation>
</ref>
<ref id="B21">
<label>21</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abdul-Ghani</surname> <given-names>MA</given-names>
</name>
<name>
<surname>Tripathy</surname> <given-names>D</given-names>
</name>
<name>
<surname>DeFronzo</surname> <given-names>RA</given-names>
</name>
</person-group>. <article-title>Contributions of beta-cell dysfunction and insulin resistance to the pathogenesis of impaired glucose tolerance and impaired fasting glucose</article-title>. <source>Diabetes Care</source>. (<year>2006</year>) <volume>29</volume>:<page-range>1130&#x2013;9</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.2337/diacare.2951130</pub-id>
</citation>
</ref>
<ref id="B22">
<label>22</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Emdin</surname> <given-names>CA</given-names>
</name>
<name>
<surname>Anderson</surname> <given-names>SG</given-names>
</name>
<name>
<surname>Woodward</surname> <given-names>M</given-names>
</name>
<name>
<surname>Rahimi</surname> <given-names>K</given-names>
</name>
</person-group>. <article-title>Usual blood pressure and risk of new-onset diabetes: evidence from 4.1 million adults and a meta-analysis of prospective studies</article-title>. <source>J Am Coll Cardiol</source>. (<year>2015</year>) <volume>66</volume>:<page-range>1552&#x2013;62</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jacc.2015.07.059</pub-id>
</citation>
</ref>
<ref id="B23">
<label>23</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Turi</surname> <given-names>KN</given-names>
</name>
<name>
<surname>Buchner</surname> <given-names>DM</given-names>
</name>
<name>
<surname>Grigsby-Toussaint</surname> <given-names>DS</given-names>
</name>
</person-group>. <article-title>Predicting risk of type 2 diabetes by using data on easy-to-measure risk factors</article-title>. <source>Prev Chronic Dis</source>. (<year>2017</year>) <volume>14</volume>:<elocation-id>E23</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.5888/pcd14.160244</pub-id>
</citation>
</ref>
<ref id="B24">
<label>24</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bowling</surname> <given-names>CB</given-names>
</name>
<name>
<surname>Sloane</surname> <given-names>R</given-names>
</name>
<name>
<surname>Pieper</surname> <given-names>C</given-names>
</name>
<name>
<surname>Luciano</surname> <given-names>A</given-names>
</name>
<name>
<surname>Davis</surname> <given-names>BR</given-names>
</name>
<name>
<surname>Simpson</surname> <given-names>LM</given-names>
</name>
<etal/>
</person-group>. <article-title>Association of sustained blood pressure control with lower risk for high-cost multimorbidities among medicare beneficiaries in ALLHAT</article-title>. <source>J Gen Intern Med</source> (<year>2021</year>) <volume>36</volume>:<page-range>2221&#x2013;9</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11606-021-06623-w</pub-id>
</citation>
</ref>
<ref id="B25">
<label>25</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hayashi</surname> <given-names>T</given-names>
</name>
<name>
<surname>Boyko</surname> <given-names>EJ</given-names>
</name>
<name>
<surname>Leonetti</surname> <given-names>DL</given-names>
</name>
<name>
<surname>McNeely</surname> <given-names>MJ</given-names>
</name>
<name>
<surname>Newell-Morris</surname> <given-names>L</given-names>
</name>
<name>
<surname>Kahn</surname> <given-names>SE</given-names>
</name>
<etal/>
</person-group>. <article-title>Visceral adiposity and the risk of impaired glucose tolerance: a prospective study among Japanese Americans</article-title>. <source>Diabetes Care</source>. (<year>2003</year>) <volume>26</volume>:<page-range>650&#x2013;5</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.2337/diacare.26.3.650</pub-id>
</citation>
</ref>
<ref id="B26">
<label>26</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bi</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Xiang</surname> <given-names>D</given-names>
</name>
<name>
<surname>Ge</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Li</surname> <given-names>F</given-names>
</name>
<name>
<surname>Jia</surname> <given-names>C</given-names>
</name>
<name>
<surname>Song</surname> <given-names>J</given-names>
</name>
</person-group>. <article-title>An interpretable prediction model for identifying N7-methylguanosine sites based on XGBoost and SHAP</article-title>. <source>Mol Ther Nucleic Acids</source>. (<year>2020</year>) <volume>22</volume>:<page-range>362&#x2013;72</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.omtn.2020.08.022</pub-id>
</citation>
</ref>
<ref id="B27">
<label>27</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yoshida</surname> <given-names>N</given-names>
</name>
<name>
<surname>Miyake</surname> <given-names>T</given-names>
</name>
<name>
<surname>Yamamoto</surname> <given-names>S</given-names>
</name>
<name>
<surname>Furukawa</surname> <given-names>S</given-names>
</name>
<name>
<surname>Senba</surname> <given-names>H</given-names>
</name>
<name>
<surname>Kanzaki</surname> <given-names>S</given-names>
</name>
<etal/>
</person-group>. <article-title>The serum creatinine level might be associated with the onset of impaired fasting glucose: A community-based longitudinal cohort health checkup study</article-title>. <source>Intern Med</source>. (<year>2019</year>) <volume>58</volume>:<page-range>505&#x2013;10</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.2169/internalmedicine.0760-18</pub-id>
</citation>
</ref>
<ref id="B28">
<label>28</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Andrews</surname> <given-names>R</given-names>
</name>
<name>
<surname>Greenhaff</surname> <given-names>P</given-names>
</name>
<name>
<surname>Curtis</surname> <given-names>S</given-names>
</name>
<name>
<surname>Perry</surname> <given-names>A</given-names>
</name>
<name>
<surname>Cowley</surname> <given-names>AJ</given-names>
</name>
</person-group>. <article-title>The effect of dietary creatine supplementation on skeletal muscle metabolism in congestive heart failure</article-title>. <source>Eur Heart J</source>. (<year>1998</year>) <volume>19</volume>:<page-range>617&#x2013;22</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1053/euhj.1997.0767</pub-id>
</citation>
</ref>
<ref id="B29">
<label>29</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Harita</surname> <given-names>N</given-names>
</name>
<name>
<surname>Hayashi</surname> <given-names>T</given-names>
</name>
<name>
<surname>Sato</surname> <given-names>KK</given-names>
</name>
<name>
<surname>Nakamura</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Yoneda</surname> <given-names>T</given-names>
</name>
<name>
<surname>Endo</surname> <given-names>G</given-names>
</name>
<etal/>
</person-group>. <article-title>Lower serum creatinine is a new risk factor of type 2 diabetes: the Kansai healthcare study</article-title>. <source>Diabetes Care</source>. (<year>2009</year>) <volume>32</volume>:<page-range>424&#x2013;6</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.2337/dc08-1265</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>