<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Cardiovasc. Med.</journal-id>
<journal-title>Frontiers in Cardiovascular Medicine</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Cardiovasc. Med.</abbrev-journal-title>
<issn pub-type="epub">2297-055X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fcvm.2022.854287</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Cardiovascular Medicine</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>A Cardiovascular Disease Prediction Model Based on Routine Physical Examination Indicators Using Machine Learning Methods: A Cohort Study</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Qian</surname> <given-names>Xin</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x02020;</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Li</surname> <given-names>Yu</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x02020;</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhang</surname> <given-names>Xianghui</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Guo</surname> <given-names>Heng</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>He</surname> <given-names>Jia</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1795649/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname> <given-names>Xinping</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Yan</surname> <given-names>Yizhong</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Ma</surname> <given-names>Jiaolong</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Ma</surname> <given-names>Rulin</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Guo</surname> <given-names>Shuxia</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c002"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1634736/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Department of Public Health, Shihezi University School of Medicine</institution>, <addr-line>Shihezi</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of NHC Key Laboratory of Prevention and Treatment of Central Asia High Incidence Diseases, The First Affiliated Hospital of Shihezi University Medical College</institution>, <addr-line>Shihezi</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Yun Fang, The University of Chicago, United States</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Yichen Ding, The University of Texas at Dallas, United States; Junguk Hur, University of North Dakota, United States</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Rulin Ma <email>marulin&#x00040;shzu.edu.cn</email></corresp>
<corresp id="c002">Shuxia Guo <email>gsxshzu&#x00040;sina.com</email></corresp>
<fn fn-type="other" id="fn001"><p>This article was submitted to Atherosclerosis and Vascular Medicine, a section of the journal Frontiers in Cardiovascular Medicine</p></fn>
<fn fn-type="equal" id="fn002"><p>&#x02020;These authors have contributed equally to this work and share first authorship</p></fn></author-notes>
<pub-date pub-type="epub">
<day>17</day>
<month>06</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>9</volume>
<elocation-id>854287</elocation-id>
<history>
<date date-type="received">
<day>23</day>
<month>01</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>23</day>
<month>05</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2022 Qian, Li, Zhang, Guo, He, Wang, Yan, Ma, Ma and Guo.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Qian, Li, Zhang, Guo, He, Wang, Yan, Ma, Ma and Guo</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license></permissions>
<abstract>
<sec>
<title>Background</title>
<p>Cardiovascular diseases (CVD) are currently the leading cause of premature death worldwide. Model-based early detection of high-risk populations for CVD is the key to CVD prevention. Thus, this research aimed to use machine learning (ML) algorithms to establish a CVD prediction model based on routine physical examination indicators suitable for the Xinjiang rural population.</p>
</sec>
<sec>
<title>Method</title>
<p>The research cohort data collection was divided into two stages. The first stage involved a baseline survey from 2010 to 2012, with follow-up ending in December 2017. The second-phase baseline survey was conducted from September to December 2016, and follow-up ended in August 2021. A total of 12,692 participants (10,407 Uyghur and 2,285 Kazak) were included in the study. Screening predictors and establishing variable subsets were based on least absolute shrinkage and selection operator (Lasso) regression, logistic regression forward partial likelihood estimation (FLR), random forest (RF) feature importance, and RF variable importance. The selected subset of variables was compared with L1 regularized logistic regression (L1-LR), RF, support vector machine (SVM), and AdaBoost algorithm to establish a CVD prediction model suitable for this population. The incidence of CVD in this population was then analyzed.</p>
</sec>
<sec>
<title>Result</title>
<p>After 4.94 years of follow-up, a total of 1,176 people were diagnosed with CVD (cumulative incidence: 9.27%). In the comparison of discrimination and calibration, the prediction performance of the subset of variables selected based on FLR was better than that of other models. Combining the results of discrimination, calibration, and clinical validity, the prediction model based on L1-LR had the best prediction performance. Age, systolic blood pressure, low-density lipoprotein-L/high-density lipoproteins-C, triglyceride blood glucose index, body mass index, and body adiposity index were all important predictors of the onset of CVD in the Xinjiang rural population.</p>
</sec>
<sec>
<title>Conclusion</title>
<p>In the Xinjiang rural population, the prediction model based on L1-LR had the best prediction performance.</p>
</sec>
</abstract>
<kwd-group>
<kwd>cardiovascular disease</kwd>
<kwd>machine learning</kwd>
<kwd>predictive models</kwd>
<kwd>routine physical examination indicators</kwd>
<kwd>cohort study</kwd>
</kwd-group>
<contract-sponsor id="cn001">Chinese Academy of Medical Sciences<named-content content-type="fundref-id">10.13039/501100005150</named-content></contract-sponsor>
<contract-sponsor id="cn002">Shihezi University<named-content content-type="fundref-id">10.13039/501100004317</named-content></contract-sponsor>
<counts>
<fig-count count="4"/>
<table-count count="3"/>
<equation-count count="0"/>
<ref-count count="56"/>
<page-count count="11"/>
<word-count count="7632"/>
</counts>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>Introduction</title>
<p>Cardiovascular disease (CVD), a chronic and complex disease caused by heart and vascular diseases, is currently the main cause of premature death and chronic disability globally (<xref ref-type="bibr" rid="B1">1</xref>, <xref ref-type="bibr" rid="B2">2</xref>). Its treatment usually involves medical and surgical methods. Nevertheless, these treatments cannot cure CVD. Moreover, these treatments have a great impact on the quality of life of individuals with CVD. Therefore, the current management of CVD mainly focuses on preventive measures. Recent studies suggest that &#x0007E;80% of premature CVD mortality could be prevented through early intervention (<xref ref-type="bibr" rid="B3">3</xref>). In addition, CVD has a slow onset and long incubation period; thus, it is generally at a more serious stage at the time of diagnosis. Therefore, early identification of high-risk groups for CVD is particularly important for its prevention and control (<xref ref-type="bibr" rid="B4">4</xref>).</p>
<p>In recent years, an increasing number of CVD prevention and control guidelines recommended the use of CVD risk prediction models to identify high-risk groups who could receive early intervention to reduce CVD risk (<xref ref-type="bibr" rid="B5">5</xref>). Most current risk prediction models for CVD were established using traditional statistical methods (<xref ref-type="bibr" rid="B6">6</xref>&#x02013;<xref ref-type="bibr" rid="B10">10</xref>). A model is established if it meets the requirements of independence and linearity. Therefore, it cannot reflect the complex relationship between variables, which affects the accuracy of the prediction model and the applicability of external verification (<xref ref-type="bibr" rid="B11">11</xref>, <xref ref-type="bibr" rid="B12">12</xref>). The machine learning (ML) algorithm is a traditional statistical method that can effectively solve the problems of non-linearity, variable redundancy, and interaction between variables. Moreover, it can be used to explore the potential risk factors for CVD to improve its predictive performance; hence, it is widely used in the field of CVD prevention and control (<xref ref-type="bibr" rid="B13">13</xref>). Despite its advantages, there are still controversies regarding its ability to predict CVD. Related studies reported that the predictive performance of ML algorithms was better than those of traditional statistical methods (<xref ref-type="bibr" rid="B14">14</xref>). Contrastingly, studies showed that the predictive performance of logistic regression (LR) was not weaker than that of machine learning algorithms (<xref ref-type="bibr" rid="B15">15</xref>, <xref ref-type="bibr" rid="B16">16</xref>).</p>
<p>Xinjiang is located in northwest China and is home to multiple ethnic groups. Uyghur and Kazakh are the main ethnic groups in Xinjiang. Studies found that these populations have high prevalence of CVD risk factors, such as metabolic syndrome, hypertension, and obesity, thereby corresponding with high incidence of CVD (<xref ref-type="bibr" rid="B17">17</xref>&#x02013;<xref ref-type="bibr" rid="B20">20</xref>). Most prediction models for CVD are based on European and American populations (<xref ref-type="bibr" rid="B6">6</xref>, <xref ref-type="bibr" rid="B9">9</xref>, <xref ref-type="bibr" rid="B21">21</xref>). Although in recent years, Chinese researchers have established predictive models based on Cox regression and ML algorithms, most are based on a feature screening method for predictive modeling (<xref ref-type="bibr" rid="B22">22</xref>, <xref ref-type="bibr" rid="B23">23</xref>). Moreover, there are few reports on ethnic minority groups in Xinjiang, and previous studies showed that the Framingham risk score (FRS) and Pooled Cohort Equations (PCEs) were not suitable for identifying groups that had a high risk of CVD among the Uyghur and Kazak populations (<xref ref-type="bibr" rid="B24">24</xref>).</p>
<p>Thus, this study aimed to use machine learning algorithms to establish a CVD prediction model that was suitable for the Xinjiang Uyghur and Kazak populations based on routine physical examination indicators. This study also aimed to identify the main factors that affect the occurrence of CVD, to identify groups that had a high risk of CVD in early-stage disease, to provide a theoretical basis for the effective prevention of CVD, and to have important, practical significance for the comprehensive prevention and control of CVD in the Uyghur and Kazak populations.</p>
</sec>
<sec sec-type="methods" id="s2">
<title>Methods</title>
<sec>
<title>Study Population</title>
<p>Baseline data collection was divided into two phases. In the first stage, a baseline survey was conducted from 2010 to 2012. Through stratified cluster random sampling, the Uyghur population in Jiangbazi Township, Jiashi County, Kashi Prefecture, and southern Xinjiang, and the Kazakhs in Nalati Township, Xinyuan County, Ili Prefecture, and northern Xinjiang were selected. In the second stage, a baseline survey was conducted from September to December 2016, and the Uyghur population of the 51st Regiment of the Third Division of the Xinjiang Corps was selected as the research cohort through stratified cluster random sampling. A total of 19,549 people who were aged &#x02265;18 years and lived in the local area for &#x0003E;6 months were included in the study. The exclusion criteria included CVD at baseline, those lost to follow-up, and those with incomplete blood information. Follow-up continued until December 2017 for the first stage (median: 6.07 years) and until August 2021 for the second stage (median: 4.94 years). According to the inclusion and exclusion criteria, 5,335 and 7,357 people were included in the first and second stages, respectively, for a total of 12,692 individuals (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figures 1.1, 1.2</xref>). Then do data analysis (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 1.3</xref>). All participants provided written informed consent. This study was approved by the Ethics Committee of the First Affiliated Hospital of Shihezi University School of Medicine (NO. SHZ2010LL01).</p>
</sec>
<sec>
<title>Data Collection</title>
<p>Data were collected <italic>via</italic> questionnaire, physical examination, and laboratory examination. Questionnaires were completed face-to-face. Anthropometric measurements such as height, weight, waist circumference (WC), hip circumference (HC), and blood pressure were obtained by trained professionals. Blood pressure was measured three times for each participant using a mercury sphygmomanometer after 5-min seated rest, and the average value was calculated. Hypertension was defined as systolic blood pressure (SBP) of &#x02265;140 mmHg or diastolic blood pressure (DBP) of &#x02265;90 mmHg. Prehypertension was defined as 140 &#x0003E; SBP &#x02265; 120 mmHg or 90 &#x0003E; DBP &#x02265; 80 mmHg (<xref ref-type="bibr" rid="B25">25</xref>). Synthetic indices were calculated based on anthropometric measurements: BMI [weight (kg)/height<sup>2</sup> (m)]; BAI (HC/height<sup>1.5</sup>-18); pulse pressure (SBP&#x02013;DBP); and waist-to-hip ratio [WHR; WC (cm)/HC (cm)]. A family history of diabetes was defined as a history of diabetes in at least one parent or sibling; the same criteria were used for a family history of stroke and coronary heart disease (CHD). Current smokers were defined as participants who had been smoking for &#x0003E;6 months (<xref ref-type="bibr" rid="B26">26</xref>). Drinking was defined as consuming alcoholic beverages (beer, red wine, and white wine) &#x02265;2 times a month (<xref ref-type="bibr" rid="B27">27</xref>). A 5 ml fasting blood sample was collected from each subject and levels of the fasting blood glucose (FBG), triglycerides (TGs), high-density lipoprotein cholesterol (HDL-C), total cholesterol (TC), low-density lipoprotein cholesterol (LDL-C), and other indicators were obtained using an automatic biochemical analyser (Olympus AU 2700; Olympus Diagnostics, Hamburg, Germany) at the First Affiliated Hospital of Shihezi University School of Medicine. In this study, individuals with diabetes (<xref ref-type="bibr" rid="B28">28</xref>) were defined as having FBG level of &#x02265;7.0 mmol/L and 2-h postprandial blood glucose level of &#x02265;11.1 mmol/L, a previous diabetes diagnosis, and use of blood sugar control drugs. We also calculated other synthetic indices, including TyG, (TG [mg/dl]<sup>&#x0002A;</sup>FBG [mg/dl]), (LAP) (men: [WC-65]<sup>&#x0002A;</sup>TC [mmol/L]; women: [WC-58]<sup>&#x0002A;</sup>TG [mmol/L]); lipoprotein combine index (LCI) (TC<sup>&#x0002A;</sup>TG [mmol/L]<sup>&#x0002A;</sup>LDL-C/HDL-C); atherogenic index (AI) (TC [mmol/L]-HDL-C)/HDL-C); atherogenic index of plasma (AIP) (Log[TG/HDL]); LpH (LDL-C/HDL-C ratio); and bilirubin comprehensive index (THT) (TC [mmol/L]/[HDL-C&#x0002B;TBIL (&#x003BC;mol/mL)]).</p>
</sec>
<sec>
<title>Data Pre-processing</title>
<p>There were some missing values in the database, and direct deletion of missing values resulted in the loss of sample information. Since there were a few variables with missing values in this study, continuous variables were filled using the mean, while categorical variables were filled using the mode. By standardizing continuous variables, categorical variables were processed by one-hot encoding to reduce the influence of different variable units and quantity levels on the analysis. For the description of missing variables in this study, see <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 1</xref>.</p>
</sec>
<sec>
<title>Diagnostic Criteria</title>
<p>The diagnostic criteria for CVD (<xref ref-type="bibr" rid="B29">29</xref>) pertained to the detection of ischaemic heart disease, cerebrovascular disease, and related diseases [International Classification of Diseases (ICD)-9: code 390&#x02013;495]; hospitalization; or death due to CVD (ICD-10) during the follow-up period. Data regarding patient questionnaire answers, medical records, and the diagnosis of CVD during the follow-up period were obtained and recorded. If the same type of CVD event occurred more than once in a patient, the first occurrence of CVD was the final event. The time of onset was recorded. Self-reported patients needed to provide proof of their clinical diagnosis.</p>
</sec>
<sec>
<title>Introduction to Predictive Models</title>
<p>Logistic regression belongs to probabilistic nonlinear regression and is one of the most widely used classification models. Logistic regression usually uses regularization to optimize the model. The adjustable parameters include inverse regularization parameters and methods (<xref ref-type="bibr" rid="B30">30</xref>). By adding a regularization coefficient to Logistic regression, the parameters of the variable are sparse, so that the weight of most of the feature vectors is 0, thereby reducing the dimension of the variable. SVM is currently one of the most common ML algorithms that can effectively solve the classification problem of small samples and nonlinear and high-dimensional data. It classifies samples by finding a set of hyperplanes in a high-dimensional space, and the samples closest to the hyperplane are called support vectors. When the training data are inseparable, this problem can be solved using the kernel trick (<xref ref-type="bibr" rid="B31">31</xref>).That is, the original features of the samples are mapped to a higher dimensional space that makes the samples linearly separable through the mapping function. The RF algorithm is an ensemble learning algorithm based on the decision tree algorithm. The basic idea is to integrate weak classifiers into a more robust model (<xref ref-type="bibr" rid="B32">32</xref>). AdaBoost (<xref ref-type="bibr" rid="B33">33</xref>) is an ensemble learning algorithm based on boosting. The algorithm first builds a weak learner based on the training data and then according to AdaBoost, increases the weight of the samples that were misclassified by weak learning in the previous round. Then, it reduces the weight of the correctly classified samples, loops this process until the weak learner reaches the specified value, and then linearly combines all weak learners to obtain the final strong classifier by weighted majority voting. In this study, both random forest and Adaboost are ensemble learning algorithms based on decision trees. The decision tree algorithm selects variables by evaluating the characteristics and depth of dividing nodes, reducing the dimension of variables. The integrated model has better generalization error and can effectively reduce the overfitting combination phenomenon.</p>
</sec>
<sec>
<title>Model Establishment and Verification</title>
<p>The datasets were randomly divided into training datasets (927CVD/10153) and test datasets (249CVD/2539). The KS test was performed on the training and test datasets, and the <italic>P</italic>-values were both &#x0003E;0.05. The ratio of the training and test datasets was 8:2. We considered four variable selection methods: forward partial likelihood estimation (FLR) with logistic regression (LR), lasso regularization with logistic regression (Lasso-LR), permutation-based selection with random forest (RF), and characteristic importance with RF. Variables were established using a subset of algorithms, such as L1-LR, RF, SVM, and AdaBoost. A prediction model of each algorithm was then established. The optimal prediction model of the same algorithm was then selected by discrimination and calibration, and the most suitable prediction model for the population was obtained by comparing the discrimination, calibration, and clinical effectiveness of the optimal prediction models of different algorithms.</p>
<p>The discrimination of the model was determined by comparing the area under the receiver operating curve (AUC), Net Reclassification Index (cNRI), and Integrated Discrimination Improvement Index (IDI) (<xref ref-type="bibr" rid="B34">34</xref>) between models, and the calibration degree was compared by calculating the Brier Score (BS) and Homser&#x02013;Lemeshow &#x003C7;<sup>2</sup> (<xref ref-type="bibr" rid="B35">35</xref>, <xref ref-type="bibr" rid="B36">36</xref>). This study evaluated the clinical validity of the model using decision curve analysis (DCA) (<xref ref-type="bibr" rid="B37">37</xref>). The horizontal axis of the decision curve represents the threshold probability and vertical axis represents the net benefit obtained after subtracting the harm from the benefit under the threshold probability. Using DCA to determine the net benefit that can be obtained using the model to screen high-risk groups compared with assuming that all participants are high-risk groups of CVD and implanting undifferentiated interventions, followed by calculating the net benefit without increasing the number of positive results, can reduce unnecessary interventions.</p>
<p>To avoid over-fitting the problem of the model in the process of model selection and hyper-parameter tuning, we used a 10-fold cross-validation to optimize the parameters of the training set and subsequently selected the optimal model. This method divided the training data in 10 equal, non-repeated parts, nine of which were used for model training, and the remaining one was used for model verification. This process was repeated 10 times, and combination of Bayesian optimisation and grid search was used to select the optimal hyperparameters. The AUC was used as the model selection criterion to determine the hyperparameter value that optimized the model predictive performance. Afterwards, we used the optimal hyperparameter value. We built the model on all training data sets. Finally, the independent test data set was used to make a final evaluation of model performance.</p>
</sec>
<sec>
<title>Data Analysis</title>
<p>Since machine learning algorithms, such as SVM output, predicted CVD occurrence by default, they did not directly predict CVD probability. We used the Platt scaling method (<xref ref-type="bibr" rid="B38">38</xref>) to calibrate the predicted probabilities output using the four models for more accurate prediction of CVD risk and identification of high-risk groups. The data used in this study were unbalanced to enable the use of the threshold probability movement method. The default 0.5 of the model was not used as the standard for dividing the incidence of CVD. However, the optimal threshold probability of each model was determined according to the Youden Index, which was the basis for dividing the high-risk population of CVD. All statistical analyses were performed using the Python 3.7 or R version 4.0. A two-sided test with a <italic>P</italic>-value of &#x0003C;0.05 was considered statistically significant.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>Results</title>
<sec>
<title>Study Population Characteristics</title>
<p>A total of 12,692 people (6,264 men, 6,398 women; average age 41.24 years) were included in this study. A total of 1,176 CVD events were observed during a median follow-up of 4.94 years. The cumulative incidence was 9.26%. Compared with people without CVD events, those with CVD showed a higher trend in study indicators, such as age, BMI, TC, alkaline phosphatase (ALP), WC, and HC. Moreover, subjects with high blood pressure and type 2 diabetes were also at a higher risk of CVD development. The comparison of different characteristics between participants with CVD and those without training and test datasets listed is shown in <xref ref-type="supplementary-material" rid="SM1">Supplementary Tables 2.1</xref>,<xref ref-type="supplementary-material" rid="SM1">2.2</xref>.</p>
</sec>
<sec>
<title>Independent Variable Selection and Optimal Model Construction</title>
<p>The research database included demographic characteristics, physical examination findings, and serology results. There were 62 variables in total. After removing the missing ratio of &#x02265;50% and 11 variables unrelated to the research, a total of 51 variables were included. The following methods were used to filter and establish a subset of variables: FLR-LR (22 variables) and Lasso-LR (34 variables). The top 35 variables were selected according to the built-in random forest importance. The top 30 variables were subsequently selected as the screening subset according to permutation feature importance of RF. The variable subsets formed by the selected variables using the four methods are shown in <xref ref-type="supplementary-material" rid="SM1">Supplementary Tables 3</xref>&#x02013;<xref ref-type="supplementary-material" rid="SM1">6</xref>.</p>
<p>To further explore the predictive performance of different variable subsets on different algorithms, we used the above variable subsets and the full variable set to build predictive models using different algorithms to find the algorithm based on the optimal model. Through Bayesian optimization and grid search, the hyperparameter values with the best prediction performance of each model were selected (<xref ref-type="supplementary-material" rid="SM1">Supplementary Tables 7.1</xref>&#x02013;<xref ref-type="supplementary-material" rid="SM1">7.4</xref>). The AUC values of different algorithms in the training and test datasets are shown in <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 8</xref>. There was no risk of overfitting and, to comprehensively consider the results of discrimination and calibration, this study concluded that the optimal models based on the four algorithms were Lasso-AdaBoost, FLR-L1-LR, FLR-RF, and FLR-SVM (<xref ref-type="supplementary-material" rid="SM1">Supplementary Tables 9.1</xref>&#x02013;<xref ref-type="supplementary-material" rid="SM1">9.4</xref>).</p>
</sec>
<sec>
<title>Comparison of Optimal Model Prediction Performance</title>
<p>The predictive performance indicators of the optimal models for each algorithm are listed in <xref ref-type="table" rid="T1">Table 1</xref>. All models have a moderate or higher (AUC value between 0.798 and 0.817) distinguishing ability. The AUC of FLR-L1-LR, FLR-SVM, FLR-RF, and Lasso-AdaBoost was 0.817 (95% CI, 0.801&#x02013;0.832), 0.814 (95% CI, 0.798&#x02013;0.829), 0.804 (95% CI, 0.788&#x02013;0.820), and 0.798 (95% CI, 0.782&#x02013;0.81), respectively. The receiver operating characteristic (ROC) curve of the prediction model is shown in <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Comparison of the prediction performance of the optimal model of each algorithm.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center"><bold>AUC</bold></th>
<th valign="top" align="center"><bold>Youden</bold><break/> <bold>Index</bold></th>
<th valign="top" align="center"><bold>Optimal</bold><break/> <bold>threshold</bold></th>
<th valign="top" align="center"><bold>Sensitivity</bold><break/> <bold>(%)</bold></th>
<th valign="top" align="center"><bold>Specificity</bold><break/> <bold>(%)</bold></th>
<th valign="top" align="center"><bold>PPV (%)</bold></th>
<th valign="top" align="center"><bold>NPV (%)</bold></th>
<th valign="top" align="center"><bold>Proportion of</bold><break/> <bold>high-risk</bold><break/> <bold>population (%)</bold></th>
<th valign="top" align="center"><bold>Brier score</bold></th>
<th valign="top" align="center"><bold>Homser-</bold><break/><bold>Lemeshow</bold><break/> <bold>&#x003C7;<sup><bold>2</bold></sup></bold></th>
<th valign="top" align="center"><bold><italic>P-</italic>Value</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Lasso-AdaBoost</td>
<td valign="top" align="center">0.798 (0.782,<break/> 0.813)</td>
<td valign="top" align="center">0.472</td>
<td valign="top" align="center">0.11</td>
<td valign="top" align="center">73.09</td>
<td valign="top" align="center">74.10</td>
<td valign="top" align="center">23.5</td>
<td valign="top" align="center">96.2</td>
<td valign="top" align="center">30.4</td>
<td valign="top" align="center">0.078 (0.070, 0.086)</td>
<td valign="top" align="center">13.81</td>
<td valign="top" align="center">0.09</td>
</tr>
<tr>
<td valign="top" align="left">FLR-L1-LR</td>
<td valign="top" align="center">0.817 (0.801,<break/> 0.832)</td>
<td valign="top" align="center">0.524</td>
<td valign="top" align="center">0.11</td>
<td valign="top" align="center">73.49</td>
<td valign="top" align="center">78.86</td>
<td valign="top" align="center">27.4</td>
<td valign="top" align="center">96.5</td>
<td valign="top" align="center">26.7</td>
<td valign="top" align="center">0.076 (0.069, 0.084)</td>
<td valign="top" align="center">11.51</td>
<td valign="top" align="center">0.17</td>
</tr>
<tr>
<td valign="top" align="left">FLR-RF</td>
<td valign="top" align="center">0.804 (0.788,<break/> 0.820)</td>
<td valign="top" align="center">0.506</td>
<td valign="top" align="center">0.08</td>
<td valign="top" align="center">79.52</td>
<td valign="top" align="center">71.09</td>
<td valign="top" align="center">23.0</td>
<td valign="top" align="center">97.0</td>
<td valign="top" align="center">33.1</td>
<td valign="top" align="center">0.077 (0.070, 0.086)</td>
<td valign="top" align="center">11.59</td>
<td valign="top" align="center">0.17</td>
</tr>
<tr>
<td valign="top" align="left">FLR-SVM</td>
<td valign="top" align="center">0.814 (0.798,<break/> 0.829)</td>
<td valign="top" align="center">0.511</td>
<td valign="top" align="center">0.11</td>
<td valign="top" align="center">73.90</td>
<td valign="top" align="center">77.16</td>
<td valign="top" align="center">26.0</td>
<td valign="top" align="center">96.5</td>
<td valign="top" align="center">38.4</td>
<td valign="top" align="center">0.076 (0.069, 0.084)</td>
<td valign="top" align="center">16.10</td>
<td valign="top" align="center">0.04</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p><italic>AUC, area under the receiver operating characteristic curve; PPV, positive predictive value; NPV, negative predictive value; Lasso-AdaBoost, AdaBoost with Lasso regression; FLR-L1-LR, L1 regularized Logistic regression with forward Partial Likelihood Estimation; FLR-RF, random forest with forward Partial Likelihood Estimation; FLR-SVM, support vector machine with forward Partial Likelihood Estimation</italic>.</p>
</table-wrap-foot>
</table-wrap>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>Receiver operator characteristic curves of the optimal prediction model in Xinjiang rural population. FLR-L1-LR, L1 regularized Logistic regression with forwarding Partial Likelihood Estimation; FLR-RF, Random forest with forwarding Partial Likelihood Estimation; FLR-SVM, Support vector machine with forwarding Partial Likelihood Estimation.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcvm-09-854287-g0001.tif"/>
</fig>
<p>Compared with other optimal models, the FLR-L1-LR model performed better in terms of Youden index, specificity, and PPV when the optimal threshold was 0.11. BS and Homser&#x02013;Lemeshow &#x003C7;<sup>2</sup> also demonstrated that the FLR-L1-LR model was better than others. In the FLR-L1-LR model, 26.7% of the participants were identified as high risk for CVD development (<xref ref-type="table" rid="T1">Table 1</xref>). The results of the calibration curve showed that FLR-L1-LR, FLR-SVM, Lasso-AdaBoost, and FLR-RF predicted the number of patients with CVD to be 234.12, 234.05, 230.55, and 223.93, respectively. The corresponding predicted CVD events/objective CVD events (P/O) values were 94.02, 94.00, 92.59, and 89.93, respectively (<xref ref-type="fig" rid="F2">Figure 2</xref>).</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>Calibration plots of four ML models in predicting CVD outcomes in Xinjiang rural population. CVD, cardiovascular disease; ML, machine learning; FLR-L1-LR, L1 regularized Logistic regression with forwarding Partial Likelihood Estimation; FLR-RF, Random forest with forwarding Partial Likelihood Estimation; FLR-SVM, Support vector machine with forwarding Partial Likelihood Estimation.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcvm-09-854287-g0002.tif"/>
</fig>
<p>To further select a prediction model suitable for this population, we compared the differences between the AUC value, IDI, and cNRI of the optimal models. We found that the AUC values of FLR-L1-LR and FLR-SVM were similar (<italic>P</italic> &#x0003E; 0.05), and both were higher than the AUC values of Lasso-AdaBoost and FLR-RF (<italic>P</italic> &#x0003C; 0.05). The reclassification capabilities of each model were compared with that of the FLR-L1-L model. The cNRI values of FLR-SVM and Lasso-AdaBoost values were 0.278 and 0.208, respectively. Compared with the FLR-L1-LR model, the Lasso-AdaBoost and the FLR-SVM models had a correct classification rate of 21 and 28%, respectively. Similarly, FLR-SVM was compared with Lasso-AdaBoost in terms of the proportion of correct classification. The FLR-SVM had a 17% increased proportion of correct classification compared with that of the Lasso-AdaBoost. The difference between the reclassification capabilities of the remaining models was not statistically significant. The results of the comprehensive discrimination ability of each model, from best to worst, were FLR-L1-LR &#x0003E; FLR-SVM &#x0003E; FLR-RF &#x0003E; Lasso-AdaBoost. This is described in <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Comparison of discrimination performance of optimal prediction models.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>Predictive model</bold></th>
<th valign="top" align="center"><bold>AUC difference</bold></th>
<th valign="top" align="center"><bold><italic>P</italic>-Value</bold></th>
<th valign="top" align="center"><bold>cNRI</bold></th>
<th valign="top" align="center"><bold><italic>P</italic>-Value</bold></th>
<th valign="top" align="center"><bold>IDI</bold></th>
<th valign="top" align="center"><bold><italic>P</italic>-Value</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Lasso-AdaBoost vs. FLR-L1-LR</td>
<td valign="top" align="center">0.019</td>
<td valign="top" align="center">0.002</td>
<td valign="top" align="center">0.208 (0.078, 0.337)</td>
<td valign="top" align="center">&#x0003C;0.001</td>
<td valign="top" align="center">0.032 (0.019, 0.045)</td>
<td valign="top" align="center">&#x0003C;0.010</td>
</tr>
<tr>
<td valign="top" align="left">Lasso-AdaBoost vs. FLR-RF</td>
<td valign="top" align="center">0.007</td>
<td valign="top" align="center">0.334</td>
<td valign="top" align="center">0.097 (&#x02212;0.033, 0.228)</td>
<td valign="top" align="center">0.143</td>
<td valign="top" align="center">0.016 (0.007, 0.025)</td>
<td valign="top" align="center">&#x0003C;0.010</td>
</tr>
<tr>
<td valign="top" align="left">Lasso-AdaBoost vs. FLR-SVM</td>
<td valign="top" align="center">0.016</td>
<td valign="top" align="center">0.047</td>
<td valign="top" align="center">0.167 (0.037, 0.296)</td>
<td valign="top" align="center">0.012</td>
<td valign="top" align="center">0.029 (0.016, 0.042)</td>
<td valign="top" align="center">&#x0003C;0.010</td>
</tr>
<tr>
<td valign="top" align="left">FLR-RF vs. FLR-L1-LR</td>
<td valign="top" align="center">0.012</td>
<td valign="top" align="center">0.045</td>
<td valign="top" align="center">0.108 (&#x02212;0.022, 0.238)</td>
<td valign="top" align="center">0.105</td>
<td valign="top" align="center">0.016 (0.003, 0.028)</td>
<td valign="top" align="center">0.010</td>
</tr>
<tr>
<td valign="top" align="left">FLR-RF vs. FLR-SVM</td>
<td valign="top" align="center">0.003</td>
<td valign="top" align="center">0.016</td>
<td valign="top" align="center">0.072 (&#x02212;0.058, 0.203)</td>
<td valign="top" align="center">0.278</td>
<td valign="top" align="center">0.013 (0.001, 0.026)</td>
<td valign="top" align="center">0.040</td>
</tr>
<tr>
<td valign="top" align="left">FLR-SVM vs. FLR-L1-LR</td>
<td valign="top" align="center">0.010</td>
<td valign="top" align="center">0.118</td>
<td valign="top" align="center">0.278 (0.149, 0.408)</td>
<td valign="top" align="center">&#x0003C;0.001</td>
<td valign="top" align="center">0.003 (0.001, 0.004)</td>
<td valign="top" align="center">&#x0003C;0.010</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p><italic>AUC, area under the receiver operating characteristic curve; cNRI, continuous Net Reclassification Index; IDI, Integrated Discrimination Improvement Index; Lasso-AdaBoost, AdaBoost with Lasso regression; FLR-L1-LR, L1 regularized Logistic regression with forward Partial Likelihood Estimation; FLR-RF, random forest with forward Partial Likelihood Estimation; FLR-SVM, support vector machine with forward Partial Likelihood Estimation</italic>.</p>
</table-wrap-foot>
</table-wrap>
<p>The clinical effectiveness of FLR-L1-LR, FLR-SVM, FLR-RF, and Lasso-AdaBoost based on the results of the decision curve are shown in <xref ref-type="fig" rid="F3">Figure 3</xref>. It is evident that the clinical application value of the FLR-L1-LR model is higher than that of FLR-SVM, Lasso-AdaBoost, and FLR-RF (<xref ref-type="fig" rid="F3">Figure 3</xref>, <xref ref-type="table" rid="T3">Table 3</xref>). Under the optimal threshold, we assumed that all participants were in a high-risk group for CVD. We then administered undifferentiated interventions for primary and secondary prevention. The net benefit of using the FLR-L1-LR model was 0.061. This showed that without increasing the positive results, 49 out of every 1,000 people could avoid unnecessary interventions.</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>Decision curves for predicting CVD outcomes in Xinjiang rural population using four ML models. CVD, cardiovascular disease; ML, machine learning; FLR-L1-LR, L1 regularized Logistic regression with forwarding Partial Likelihood Estimation; FLR-RF, Random forest with forwarding Partial Likelihood Estimation; FLR-SVM, Support vector machine with forwarding Partial Likelihood Estimation.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcvm-09-854287-g0003.tif"/>
</fig>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p> Comparison of clinical effectiveness of models.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="center"><bold>Model</bold></th>
<th valign="top" align="center"><bold>Pt (%)</bold></th>
<th valign="top" align="center" colspan="2" style="border-bottom: thin solid #000000;"><bold>Net income</bold></th>
<th valign="top" align="center"><bold>Model</bold><break/> <bold>net</bold><break/> <bold>income</bold></th>
<th valign="top" align="center"><bold>Advantages</bold><break/> <bold>of the</bold><break/> <bold>model</bold><xref ref-type="table-fn" rid="TN3a"><sup><bold>&#x00023;</bold></sup></xref></th>
</tr>
<tr>
<th/>
<th/>
<th valign="top" align="center"><bold>Treat all</bold></th>
<th valign="top" align="center"><bold>Prediction</bold><break/> <bold>model</bold></th>
<th/>
<th/>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">FLR-L1-LR</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">0.051</td>
<td valign="top" align="center">0.066</td>
<td valign="top" align="center">0.015</td>
<td valign="top" align="center">29</td>
</tr>
<tr>
<td/>
<td valign="top" align="center">10</td>
<td valign="top" align="center">&#x02212;0.002</td>
<td valign="top" align="center">0.049</td>
<td valign="top" align="center">0.051</td>
<td valign="top" align="center">46</td>
</tr>
<tr>
<td/>
<td valign="top" align="center">11<xref ref-type="table-fn" rid="TN3b"><sup>a</sup></xref></td>
<td valign="top" align="center">&#x02212;0.013</td>
<td valign="top" align="center">0.048</td>
<td valign="top" align="center">0.061</td>
<td valign="top" align="center">49</td>
</tr>
<tr>
<td valign="top" align="center">FLR-SVM</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">0.051</td>
<td valign="top" align="center">0.065</td>
<td valign="top" align="center">0.014</td>
<td valign="top" align="center">27</td>
</tr>
<tr>
<td/>
<td valign="top" align="center">10</td>
<td valign="top" align="center">&#x02212;0.002</td>
<td valign="top" align="center">0.048</td>
<td valign="top" align="center">0.050</td>
<td valign="top" align="center">45</td>
</tr>
<tr>
<td/>
<td valign="top" align="center">11<xref ref-type="table-fn" rid="TN3b"><sup>a</sup></xref></td>
<td valign="top" align="center">&#x02212;0.013</td>
<td valign="top" align="center">0.045</td>
<td valign="top" align="center">0.058</td>
<td valign="top" align="center">47</td>
</tr>
<tr>
<td valign="top" align="center">Lasso-<break/>AdaBoost</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">0.051</td>
<td valign="top" align="center">0.063</td>
<td valign="top" align="center">0.012</td>
<td valign="top" align="center">23</td>
</tr>
<tr>
<td/>
<td valign="top" align="center">10</td>
<td valign="top" align="center">&#x02212;0.002</td>
<td valign="top" align="center">0.045</td>
<td valign="top" align="center">0.047</td>
<td valign="top" align="center">43</td>
</tr>
<tr>
<td/>
<td valign="top" align="center">11<xref ref-type="table-fn" rid="TN3b"><sup>a</sup></xref></td>
<td valign="top" align="center">&#x02212;0.013</td>
<td valign="top" align="center">0.043</td>
<td valign="top" align="center">0.056</td>
<td valign="top" align="center">46</td>
</tr>
<tr>
<td valign="top" align="center">FLR-RF</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">0.051</td>
<td valign="top" align="center">0.064</td>
<td valign="top" align="center">0.013</td>
<td valign="top" align="center">25</td>
</tr>
<tr>
<td/>
<td valign="top" align="center">10</td>
<td valign="top" align="center">&#x02212;0.002</td>
<td valign="top" align="center">0.046</td>
<td valign="top" align="center">0.048</td>
<td valign="top" align="center">43</td>
</tr>
<tr>
<td/>
<td valign="top" align="center">8<xref ref-type="table-fn" rid="TN3b"><sup>a</sup></xref></td>
<td valign="top" align="center">0.02</td>
<td valign="top" align="center">0.053</td>
<td valign="top" align="center">0.033</td>
<td valign="top" align="center">38</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TN3a"><label>&#x00023;</label><p><italic>The value was calculated as: (net benefit of the model&#x02013; net benefit of treat all)/[pt/(1 &#x02013; pt)] &#x000D7; 100.</italic></p></fn>
<fn id="TN3b"><label>a</label><p><italic>Select the optimal threshold probability of each model according to AUC.</italic></p></fn>
<p><italic>Pt, Threshold probability; Lasso-AdaBoost, AdaBoost with Lasso regression; FLR-L1-LR, L1 regularized Logistic regression with forward Partial Likelihood Estimation; FLR-RF, random forest with forward Partial Likelihood Estimation; FLR-SVM, support vector machine with forward Partial Likelihood Estimation</italic>.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec>
<title>Variable Importance Ranking of the Optimal Model Output</title>
<p>Previous studies indicated that compared with FRS and PCE, the ML algorithm could better determine the nonlinear and complex relationships between variables and outcomes. Furthermore, the ML algorithm identified potential risk factors more effectively (<xref ref-type="bibr" rid="B39">39</xref>&#x02013;<xref ref-type="bibr" rid="B41">41</xref>). We further analyzed the relative relationship among the importance rankings of the algorithm variables using the coefficients of variables that could not be obtained based on the Gaussian kernel function. Therefore, this study only highlights the importance of the optimal model variables established by the AdaBoost, RF, and L1-LR algorithms to compare the ability of each variable to predict the incidence of CVD (<xref ref-type="fig" rid="F4">Figure 4</xref>). This study found that the risk factors for CVD included factors that reflected the degree and type of body obesity, such as age, sex, ethnicity, DBP, HDL-C level, TC level, BAI, and BMI. Risk factors also included those that reflected glucose and lipid metabolism, such as TyG, LpH level, AI, and occupation type. The indicators were also risk factors for CVD and could predict CVD risk.</p>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p>Feature importance of included variables obtained from the random forest with forwarding Partial Likelihood Estimation (FLR-RF), L1 regularized Logistic regression with FLR (FLR-L1-LR), Lasso-AdaBoost model. SD, pulse pressure difference; DBP, diastolic blood pressure; BAI, body obesity index; BMI, body mass index; TyG, triglyceride blood glucose index; LpH, low-high-density lipoprotein ratio; AI, arteriosclerosis index; aUA, uric acid; TB, total bilirubin; APOB, apolipoprotein B; HDL-C, high-density lipoprotein cholesterol; TP, total protein; HBDH, &#x003B1;-hydroxybutyrate dehydrogenase; LDH, lactate dehydrogenase; SBP, systolic blood pressure; LCI, blood lipid index; AIP, Plasma arteriosclerosis index; TC, total cholesterol; ALP, alkaline phosphatase; aFBG, fasting blood glucose; AST, aspartate aminotransferase; WHR, waist-to-height ratio; APOAB, apolipoprotein AB; GGT, &#x003B3;-glutaminase; DB, Direct Bilirubin; DM, diabetes mellitus; Fhchd, Family history of coronary heart disease.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcvm-09-854287-g0004.tif"/>
</fig>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>Discussion</title>
<p>The results of this study show that the cumulative incidence of CVD in the Xinjiang Uyghur and Kazak populations was 9.26%. The incidence was similar to that in African Americans (<xref ref-type="bibr" rid="B42">42</xref>). However, it was higher than those of Han Chinese ancestry (<xref ref-type="bibr" rid="B43">43</xref>&#x02013;<xref ref-type="bibr" rid="B45">45</xref>), which may relate to the population&#x00027;s unique genetic background and diet. Here, we used ML algorithms to establish a predictive model and discover the main factors for the occurrence of CVD in this population.</p>
<p>To achieve the best predictive performance of the established model, we selected variables through four variable screening methods. We subsequently established different variable subsets, unlike those in the previous study that only used the feature importance of the RF algorithm to select variables (<xref ref-type="bibr" rid="B46">46</xref>). Our results indicate that the subset of variables established using FLR showed the best performance on the L1-LR, RF, and SVM algorithms, similar to the results reported by De Silva et al. (<xref ref-type="bibr" rid="B47">47</xref>). Unlike other variable screening methods, FLR focused more on the linear relationship between variables. The model built based on the combination of FLR-screened variable subsets and other ML algorithms had better predictive performance. This may be due to the consideration of the linear relationship of variables based on logistic regression and the in-depth analysis of the nonlinear relationship using different machine learning algorithms.</p>
<p>When the optimal prediction models of the LR, SVM, RF, and AdaBoost algorithms were compared, the prediction performance of the LR-based model was better than that of the other ML algorithm models. These findings are similar to those of a 2019 systematic review (<xref ref-type="bibr" rid="B15">15</xref>). There are many possible reasons for this phenomenon. First, the number of variables included in this research was limited, and some ML algorithms were better at dealing with high-dimensional data problems. Moreover, the logistic regression model was established based on the L1 regularization method. This method was better at dealing with small samples and low-dimensional data and was not easily affected by outliers. The established model was more robust.</p>
<p>Second, the performance of the SVM-based prediction model was lower than that of LR but higher than those of RF and AdaBoost. These findings are similar to the those reported by Wallert et al. (<xref ref-type="bibr" rid="B48">48</xref>). This might be because, although the SVM model based on the Gaussian kernel function could handle the nonlinear relationship among variables well, when dealing with research with fewer variables, its prediction performance was affected by insufficient variables. Prediction performance was lower in the SVM model compared with that of LR. Due to the poor interpretability of SVM and the difficulty of parameter optimisation, the model has fewer clinical application. Nevertheless, its high predictive potential was not ruled out.</p>
<p>Finally, concerning the RF and AdaBoost algorithms, the prediction performance of RF in this study was better than that of AdaBoost, although both integrated learning algorithms. Nevertheless, both were lower than those of LR and SVM, which are consistent with the results of Hae et al. (<xref ref-type="bibr" rid="B49">49</xref>). This may be because, compared with a single algorithm, integrated learning algorithms such as RF and AdaBoost require a larger sample size to achieve the optimal model performance (<xref ref-type="bibr" rid="B50">50</xref>). Therefore, it did not show optimal performance with the medium sample size of this study.</p>
<p>A comprehensive analysis of the variable importance rankings of the three algorithms revealed that age and systolic blood pressure were the most important predictors. This was similar to the findings of previous studies (<xref ref-type="bibr" rid="B9">9</xref>, <xref ref-type="bibr" rid="B51">51</xref>). Furthermore, this study found that compared with a single blood lipid index, composite indicators such as LpH and TyG calculated from multiple blood lipid indicators showed better predictive performance. Similarly, in a study by Huang et al. (<xref ref-type="bibr" rid="B52">52</xref>), compared with HDL-C and LDL-C alone, LpH had a stronger correlation with the severity of coronary heart disease. The results of the Tehran Lipid and Glucose Metabolism Study showed that for every standard deviation increase of 1 in TyG, the individual CVD risk increased by 20% (<xref ref-type="bibr" rid="B53">53</xref>). In addition, similar studies showed that TyG was an important variable of CVD risk prediction. This was similar to the results of this study (<xref ref-type="bibr" rid="B54">54</xref>). BMI and BAI were indicators that reflected the degree and type of body obesity. Moreover, related research showed that it had value in predicting CVD incidence (<xref ref-type="bibr" rid="B55">55</xref>, <xref ref-type="bibr" rid="B56">56</xref>). The results of this study also showed that BMI and BAI had strong capabilities of CVD prediction. This may be due to the high-salt and high-fat diets of the Uyghur and Kazakh populations, resulting in high body weight and large hip circumference.</p>
<p>Although we believe that the included population represents the general Uyghur and Kazak populations, this study has certain limitations. First, the variable information included was relatively small. ML algorithms are good at dealing with data relationships between high-dimensional data. The reduced sample information in this study may be the main reason for the limited prediction performance of ML algorithms. Second, this study lacked an independent external verification population, and the prediction accuracy and robustness of extrapolating the established model to other ethnic populations needs to be explored further. Moreover, only the baseline measurement data were used for modeling. Time effect and censored data were not considered during model construction. Finally, although this study uses Plating scaling to deal with this imbalanced dataset, the positive predictive value of different models in this population is low, which may lead to unnecessary intervention in the population.</p>
</sec>
<sec sec-type="conclusions" id="s5">
<title>Conclusion</title>
<p>In this study, the performance of the CVD prediction model based on the L1-LR algorithm was higher than those of other ML algorithms. In addition to the traditional single risk factors for cardiovascular disease, complex lipid metabolism indicators, such as LpH and TyG, and obesity indicators, such as BMI and BAI, were found to be important factors for predicting the incidence of CVD in this population.</p>
</sec>
<sec sec-type="data-availability" id="s6">
<title>Data Availability Statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec id="s7">
<title>Ethics Statement</title>
<p>The studies involving human participants were reviewed and approved by Ethics Committee of the First Affiliated Hospital of Shihezi University School of Medicine (No. SHZ2010LL01). The patients/participants provided their written informed consent to participate in this study.</p>
</sec>
<sec id="s8">
<title>Author Contributions</title>
<p>XQ and YL designed the study, analyzed the data, and wrote the manuscript. XHZ, HG, and JH collected and sorted the data. XPW, YZY, and JLM sorted and checked the data. SXG and RLM designed the study, guided the article writing, and modified the manuscript. All authors contributed to the article and approved the submitted version.</p>
</sec>
<sec sec-type="funding-information" id="s9">
<title>Funding</title>
<p>This research was funded by the Non-profit Central Research Institute Fund of Chinese Academy of Medical Sciences (2020-PT330-003), the Shihezi University Innovation Outstanding Young Talents Program (Natural Science) (No. CXPY202004), and Shihezi University independently funded and supported school-level scientific research projects (No. ZZZC202018A).</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x00027;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
</body>
<back>
<ack><p>We sincerely thank these rural multi-ethnic residents for their willingness to participate in the study. We are grateful to Nalati Township Hospital, Jiangbazi Township Hospital, and 51st Regiment Hospital for providing study facilities and other assistance.</p>
</ack><sec sec-type="supplementary-material" id="s11">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fcvm.2022.854287/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fcvm.2022.854287/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Roth</surname> <given-names>GA</given-names></name> <name><surname>Johnson</surname> <given-names>C</given-names></name> <name><surname>Abajobir</surname> <given-names>A</given-names></name> <name><surname>Abd-Allah</surname> <given-names>F</given-names></name> <name><surname>Abera</surname> <given-names>SF</given-names></name> <name><surname>Abyu</surname> <given-names>G</given-names></name> <etal/></person-group>. <article-title>Global, regional, and National Burden of Cardiovascular Diseases for 10 Causes, 1990 to 2015</article-title>. <source>J Am Coll Cardiol</source>. (<year>2017</year>) <volume>70</volume>:<fpage>1</fpage>&#x02013;<lpage>25</lpage>. <pub-id pub-id-type="doi">10.1016/j.jacc.2017.04.052</pub-id><pub-id pub-id-type="pmid">28527533</pub-id></citation></ref>
<ref id="B2">
<label>2.</label>
<citation citation-type="journal"><person-group person-group-type="author"><collab>GBD 2013 Mortality and Causes of Death Collaborators</collab></person-group>. <article-title>Global, regional, and national age-sex specific all-cause and cause-specific mortality for 240 causes of death, 1990-2013: a systematic analysis for the Global Burden of Disease Study 2013</article-title>. <source>Lancet</source>. (<year>2014</year>) <volume>385</volume>:<fpage>117</fpage>&#x02013;<lpage>71</lpage>. <pub-id pub-id-type="doi">10.1016/S0140-6736(14)61682-2</pub-id><pub-id pub-id-type="pmid">25530442</pub-id></citation></ref>
<ref id="B3">
<label>3.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Piepoli</surname> <given-names>MF</given-names></name> <name><surname>Hoes</surname> <given-names>AW</given-names></name> <name><surname>Agewall</surname> <given-names>S</given-names></name> <name><surname>Albus</surname> <given-names>C</given-names></name> <name><surname>Brotons</surname> <given-names>C</given-names></name> <name><surname>Catapano</surname> <given-names>AL</given-names></name> <etal/></person-group>. <article-title>2016 European Guidelines on cardiovascular disease prevention in clinical practice: The Sixth Joint Task Force of the European Society of Cardiology and Other Societies on Cardiovascular Disease Prevention in Clinical Practice (constituted by representatives of 10 societies and by invited experts)Developed with the special contribution of the European Association for Cardiovascular Prevention and Rehabilitation (EACPR)</article-title>. <source>Eur Heart J</source>. (<year>2016</year>) <volume>37</volume>:<fpage>2315</fpage>&#x02013;<lpage>2381</lpage>. <pub-id pub-id-type="doi">10.1093/eurheartj/ehw106</pub-id><pub-id pub-id-type="pmid">27664503</pub-id></citation></ref>
<ref id="B4">
<label>4.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>S</given-names></name> <name><surname>Li</surname> <given-names>Y</given-names></name> <name><surname>Zeng</surname> <given-names>X</given-names></name> <name><surname>Wang</surname> <given-names>H</given-names></name> <name><surname>Yin</surname> <given-names>P</given-names></name> <name><surname>Wang</surname> <given-names>L</given-names></name> <etal/></person-group>. <article-title>Burden of cardiovascular diseases in China, 1990-2016: findings from the 2016 Global Burden of Disease Study</article-title>. <source>JAMA Cardiol.</source> (<year>2019</year>) <volume>4</volume>:<fpage>342</fpage>&#x02013;<lpage>52</lpage>. <pub-id pub-id-type="doi">10.1001/jamacardio.2019.0295</pub-id><pub-id pub-id-type="pmid">30865215</pub-id></citation></ref>
<ref id="B5">
<label>5.</label>
<citation citation-type="journal"><person-group person-group-type="author"><collab>Joint Task Force for Guideline on the Assessment and Management of Cardiovascular Risk in China</collab></person-group>. <article-title>Guideline on the assessment and management of cardiovascular risk in China</article-title>. <source>Chin Circ J</source>. (<year>2019</year>) <volume>34</volume>:<fpage>4</fpage>&#x02212;<lpage>28</lpage>. (in Chinese). <pub-id pub-id-type="doi">10.3760/cma.j.issn.0253-9624.2019.01.004</pub-id><pub-id pub-id-type="pmid">30605960</pub-id></citation></ref>
<ref id="B6">
<label>6.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Goff</surname> <given-names>DC</given-names> <suffix>Jr.</suffix></name> <name><surname>Lloyd-Jones</surname> <given-names>DM</given-names></name> <name><surname>Bennett</surname> <given-names>G</given-names></name> <name><surname>Coady</surname> <given-names>S</given-names></name> <name><surname>D&#x00027;Agostino</surname> <given-names>RB</given-names></name> <name><surname>Gibbons</surname> <given-names>R</given-names></name> <etal/></person-group>. <article-title>2013 ACC/AHA guideline on the assessment of cardiovascular risk: a report of the American College of Cardiology/American Heart Association Task Force on Practice Guidelines</article-title>. <source>Circulation</source>. (<year>2014</year>) <volume>129</volume>:<fpage>S49</fpage>&#x02013;<lpage>73</lpage>. <pub-id pub-id-type="doi">10.1161/01.cir.0000437741.48606.98</pub-id><pub-id pub-id-type="pmid">24239921</pub-id></citation></ref>
<ref id="B7">
<label>7.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kannel</surname> <given-names>WB.</given-names></name> <name><surname>McGee</surname> <given-names>D.</given-names></name> <name><surname>Gordon</surname> <given-names>T</given-names></name></person-group>  <article-title>A general cardiovascular risk profile: the Framingham Study</article-title>. <source>Am J Cardiol.</source> (<year>1976</year>) <volume>38</volume>:<fpage>46</fpage>&#x02013;<lpage>51</lpage>. <pub-id pub-id-type="doi">10.1016/0002-9149(76)90061-8</pub-id><pub-id pub-id-type="pmid">132862</pub-id></citation></ref>
<ref id="B8">
<label>8.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wilson</surname> <given-names>PW</given-names></name> <name><surname>D&#x00027;Agostino</surname> <given-names>RB</given-names></name> <name><surname>Levy</surname> <given-names>D</given-names></name> <name><surname>Belanger</surname> <given-names>AM</given-names></name> <name><surname>Silbershatz</surname> <given-names>H</given-names></name> <name><surname>Kannel</surname> <given-names>WB</given-names></name></person-group>. <article-title>Prediction of coronary heart disease using risk factor categories</article-title>. <source>Circulation.</source> (<year>1998</year>) <volume>97</volume>:<fpage>1837</fpage>&#x02013;<lpage>47</lpage>. <pub-id pub-id-type="doi">10.1161/01.CIR.97.18.1837</pub-id><pub-id pub-id-type="pmid">9603539</pub-id></citation></ref>
<ref id="B9">
<label>9.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>D&#x00027;Agostino</surname> <given-names>RB</given-names> <suffix>Sr</suffix></name></person-group>. <article-title>General cardiovascular risk profile for use in primary care: the Framingham Heart Study</article-title>. <source>Circulation.</source> (<year>2008</year>) <volume>117</volume>:<fpage>743</fpage>&#x02013;<lpage>53</lpage>. <pub-id pub-id-type="doi">10.1161/CIRCULATIONAHA.107.699579</pub-id><pub-id pub-id-type="pmid">18212285</pub-id></citation></ref>
<ref id="B10">
<label>10.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hippisley-Cox</surname> <given-names>J</given-names></name> <name><surname>Coupland</surname> <given-names>C</given-names></name> <name><surname>Brindle</surname> <given-names>P</given-names></name></person-group>. <article-title>Development and validation of QRISK3 risk prediction algorithms to estimate future risk of cardiovascular disease: prospective cohort study</article-title>. <source>BMJ.</source> (<year>2017</year>) <volume>357</volume>:<fpage>j2099</fpage>. <pub-id pub-id-type="doi">10.1136/bmj.j2099</pub-id><pub-id pub-id-type="pmid">28536104</pub-id></citation></ref>
<ref id="B11">
<label>11.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Muntner</surname> <given-names>P</given-names></name> <name><surname>Colantonio</surname> <given-names>LD</given-names></name> <name><surname>Cushman</surname> <given-names>M</given-names></name> <name><surname>Goff</surname> <given-names> DC</given-names> <suffix>Jr</suffix></name></person-group>. <article-title>Validation of the atherosclerotic cardiovascular disease Pooled Cohort risk equations</article-title>. <source>JAMA.</source> (<year>2014</year>) <volume>311</volume>:<fpage>1406</fpage>&#x02013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1001/jama.2014.2630</pub-id><pub-id pub-id-type="pmid">24682252</pub-id></citation></ref>
<ref id="B12">
<label>12.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tang</surname> <given-names>X</given-names></name> <name><surname>Zhang</surname> <given-names>D</given-names></name> <name><surname>He</surname> <given-names>L</given-names></name> <name><surname>Wu</surname> <given-names>N</given-names></name> <name><surname>Si</surname> <given-names>Y</given-names></name> <name><surname>Cao</surname> <given-names>Y</given-names></name> <etal/></person-group>. <article-title>Performance of atherosclerotic cardiovascular risk prediction models in a rural Northern Chinese population: results from the Fangshan Cohort Study</article-title>. <source>Am Heart J.</source> (<year>2019</year>) <volume>211</volume>:<fpage>34</fpage>&#x02013;<lpage>44</lpage>. <pub-id pub-id-type="doi">10.1016/j.ahj.2019.01.009</pub-id><pub-id pub-id-type="pmid">30831332</pub-id></citation></ref>
<ref id="B13">
<label>13.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Goldstein</surname> <given-names>BA</given-names></name> <name><surname>Navar</surname> <given-names>AM</given-names></name> <name><surname>Carter</surname> <given-names>RE</given-names></name></person-group>. <article-title>Moving beyond regression techniques in cardiovascular risk prediction: applying machine learning to address analytic challenges</article-title>. <source>Eur Heart J.</source> (<year>2017</year>) <volume>38</volume>:<fpage>1805</fpage>&#x02013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1093/eurheartj/ehw302</pub-id><pub-id pub-id-type="pmid">27436868</pub-id></citation></ref>
<ref id="B14">
<label>14.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jamthikar</surname> <given-names>A</given-names></name> <name><surname>Gupta</surname> <given-names>D</given-names></name> <name><surname>Khanna</surname> <given-names>NN</given-names></name> <name><surname>Araki</surname> <given-names>T</given-names></name> <name><surname>Saba</surname> <given-names>L</given-names></name> <name><surname>Nicolaides</surname> <given-names>A</given-names></name></person-group>. <article-title>A special report on changing trends in preventive stroke/cardiovascular risk assessment <italic>via</italic> B-mode ultrasonography</article-title>. <source>Curr Atheroscler Rep</source>. (<year>2019</year>) <volume>21</volume>:<fpage>25</fpage>. <pub-id pub-id-type="doi">10.1007/s11883-019-0788-4</pub-id><pub-id pub-id-type="pmid">31041615</pub-id></citation></ref>
<ref id="B15">
<label>15.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Christodoulou</surname> <given-names>E</given-names></name> <name><surname>Ma</surname> <given-names>J</given-names></name> <name><surname>Collins</surname> <given-names>GS</given-names></name> <name><surname>Steyerberg</surname> <given-names>EW</given-names></name> <name><surname>Verbakel</surname> <given-names>JY</given-names></name> <name><surname>Van Calster</surname> <given-names>B</given-names></name></person-group>. <article-title>A systematic review shows no performance benefit of machine learning over logistic regression for clinical prediction models</article-title>. <source>J Clin Epidemiol.</source> (<year>2019</year>) <volume>110</volume>:<fpage>12</fpage>&#x02013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1016/j.jclinepi.2019.02.004</pub-id><pub-id pub-id-type="pmid">30763612</pub-id></citation></ref>
<ref id="B16">
<label>16.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nusinovici</surname> <given-names>S</given-names></name> <name><surname>Tham</surname> <given-names>YC</given-names></name> <name><surname>Chak Yan</surname> <given-names>MY</given-names></name> <name><surname>Wei Ting</surname> <given-names>DS</given-names></name> <name><surname>Li</surname> <given-names>J</given-names></name> <name><surname>Sabanayagam</surname> <given-names>C</given-names></name> <etal/></person-group>. <article-title>Logistic regression was as good as machine learning for predicting major chronic diseases</article-title>. <source>J Clin Epidemiol.</source> (<year>2020</year>) <volume>122</volume>:<fpage>56</fpage>&#x02013;<lpage>69</lpage>. <pub-id pub-id-type="doi">10.1016/j.jclinepi.2020.03.002</pub-id><pub-id pub-id-type="pmid">32169597</pub-id></citation></ref>
<ref id="B17">
<label>17.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xianghui</surname> <given-names>Z</given-names></name> <name><surname>Yizhong</surname> <given-names>Y</given-names></name> <name><surname>Jia</surname> <given-names>H</given-names></name> <name><surname>Jiaolong</surname> <given-names>M</given-names></name> <name><surname>Hongrui</surname> <given-names>P</given-names></name> <name><surname>Rulin</surname> <given-names>M</given-names></name> <etal/></person-group>. <article-title>Mei, and G. Shuxia Evaluation of screening indices for metabolic syndrome in adult Kazakh in Xinjiang</article-title>. <source>Chin J Hypertens.</source> (<year>2017</year>) <volume>25</volume>:<fpage>55</fpage>&#x02013;<lpage>60</lpage>.</citation>
</ref>
<ref id="B18">
<label>18.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shuxia</surname> <given-names>G</given-names></name> <name><surname>Jingyu</surname> <given-names>Z</given-names></name> <name><surname>Yihua</surname> <given-names>Z</given-names></name> <name><surname>Shangzhi</surname> <given-names>X</given-names></name> <name><surname>Heng</surname> <given-names>G</given-names></name> <name><surname>Qiang</surname> <given-names>N</given-names></name> <etal/></person-group>. <article-title>Epidemiological characteristics and analysis on hypertension of Kazakstans and Hans in Xinjiang</article-title>. <source>J Shihezi Univ</source>. (<year>2008</year>) <volume>26</volume>:<fpage>538</fpage>&#x02013;<lpage>40</lpage>. (in Chinese).</citation>
</ref>
<ref id="B19">
<label>19.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jia</surname> <given-names>H</given-names></name> <name><surname>Heng</surname> <given-names>G</given-names></name> <name><surname>Yusong</surname> <given-names>D</given-names></name> <name><surname>Jiaming</surname> <given-names>L</given-names></name> <name><surname>Mei</surname> <given-names>Z</given-names></name> <name><surname>Rulin</surname> <given-names>M</given-names></name> <etal/></person-group>. <article-title>Epidemiological study on overweight and obesity among rural adult residents in Hazakh. Uygur and Han populations in Xinjiang</article-title>. <source>Chin J Epidemiol</source>. (<year>2013</year>) <volume>34</volume>:<fpage>1164</fpage>&#x02013;<lpage>8</lpage>. (in Chinese).<pub-id pub-id-type="pmid">24518011</pub-id></citation></ref>
<ref id="B20">
<label>20.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tao</surname> <given-names>J</given-names></name> <name><surname>Ma</surname> <given-names>YT</given-names></name> <name><surname>Xiang</surname> <given-names>Y</given-names></name> <name><surname>Xie</surname> <given-names>X</given-names></name> <name><surname>Yang</surname> <given-names>YN</given-names></name> <name><surname>Li</surname> <given-names>XM</given-names></name> <etal/></person-group>. <article-title>Prevalanceof major cardiovascular risk factors and adverse risk profiles among three ethnic groups in the Xinjiang Uygur Autonomous Region</article-title>. <source>China.</source> (<year>2013</year>) <volume>12</volume>:<fpage>185</fpage>. <pub-id pub-id-type="doi">10.1186/1476-511X-12-185</pub-id><pub-id pub-id-type="pmid">24341701</pub-id></citation></ref>
<ref id="B21">
<label>21.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hippisley-Cox</surname> <given-names>J</given-names></name> <name><surname>Coupland</surname> <given-names>C</given-names></name> <name><surname>Robson</surname> <given-names>J</given-names></name> <name><surname>Brindle</surname> <given-names>P</given-names></name></person-group>. <article-title>Derivation, validation, and evaluation of a new QRISK model to estimate lifetime risk of cardiovascular disease: cohort study using QResearch database</article-title>. <source>BMJ.</source> (<year>2010</year>) <volume>341</volume>:<fpage>c6624</fpage>. <pub-id pub-id-type="doi">10.1136/bmj.c6624</pub-id><pub-id pub-id-type="pmid">21148212</pub-id></citation></ref>
<ref id="B22">
<label>22.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>L</given-names></name> <name><surname>Wu</surname> <given-names>H</given-names></name> <name><surname>Jin</surname> <given-names>X</given-names></name> <name><surname>Zheng</surname> <given-names>P</given-names></name> <name><surname>Hu</surname> <given-names>S</given-names></name> <name><surname>Xu</surname> <given-names>X</given-names></name> <etal/></person-group>. <article-title>Study of cardiovascular disease prediction model based on random forest in eastern China</article-title>. <source>Sci Rep.</source> (<year>2020</year>) <volume>10</volume>:<fpage>5245</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-020-62133-5</pub-id><pub-id pub-id-type="pmid">32251324</pub-id></citation></ref>
<ref id="B23">
<label>23.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>YY</given-names></name> <name><surname>Tian</surname> <given-names>WB</given-names></name> <name><surname>Jiang</surname> <given-names>CQ</given-names></name> <name><surname>Zhang</surname> <given-names>WS</given-names></name> <name><surname>Zhu</surname> <given-names>F</given-names></name> <name><surname>Jin</surname> <given-names>YL</given-names></name> <etal/></person-group>. <article-title>A simple model for predicting 10-year cardiovascular risk in middle-aged to older Chinese: Guangzhou Biobank Cohort Study</article-title>. <source>J Cardiovasc Transl Res.</source> (<year>2022</year>) <volume>15</volume>:<fpage>416</fpage>&#x02013;<lpage>26</lpage>. <pub-id pub-id-type="doi">10.1007/s12265-021-10163-3</pub-id><pub-id pub-id-type="pmid">34402029</pub-id></citation></ref>
<ref id="B24">
<label>24.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jiang</surname> <given-names>Y</given-names></name> <name><surname>Ma</surname> <given-names>R</given-names></name> <name><surname>Guo</surname> <given-names>H</given-names></name> <name><surname>Zhang</surname> <given-names>X</given-names></name> <name><surname>Wang</surname> <given-names>X</given-names></name> <name><surname>Wang</surname> <given-names>K</given-names></name> <etal/></person-group>. <article-title>External validation of three atherosclerotic cardiovascular disease risk equations in rural areas of Xinjiang, China</article-title>. <source>BMC Public Health.</source> (<year>2020</year>) <volume>20</volume>:<fpage>1471</fpage>. <pub-id pub-id-type="doi">10.1186/s12889-020-09579-4</pub-id><pub-id pub-id-type="pmid">32993590</pub-id></citation></ref>
<ref id="B25">
<label>25.</label>
<citation citation-type="journal"><person-group person-group-type="author"><collab>Writing Group of 2018</collab></person-group>. <article-title>2018 Chinese guidelines for the management of hypertension</article-title>. <source>Chin J Cardiovasc Med</source>. (<year>2019</year>) <volume>24</volume>:<fpage>24</fpage>&#x02013;<lpage>56</lpage>. (in Chinese).</citation>
</ref>
<ref id="B26">
<label>26.</label>
<citation citation-type="journal"><person-group person-group-type="author"><collab>Cigarette smoking among adults&#x02013;United States, 1992, and and changes in the definition of current cigarette smoking</collab></person-group>. <source>MMWR Morb Mortal Wkly Rep</source>. (<year>1994</year>) <volume>43</volume>:<fpage>342</fpage>&#x02013;<lpage>6</lpage>.</citation>
</ref>
<ref id="B27">
<label>27.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sun</surname> <given-names>K</given-names></name> <name><surname>Ren</surname> <given-names>M</given-names></name> <name><surname>Liu</surname> <given-names>D</given-names></name> <name><surname>Wang</surname> <given-names>C</given-names></name> <name><surname>Yang</surname> <given-names>C</given-names></name> <name><surname>Yan</surname> <given-names>L</given-names></name></person-group>. <article-title>Alcohol consumption and risk of metabolic syndrome: a meta-analysis of prospective studies</article-title>. <source>Clin Nutr.</source> (<year>2014</year>) <volume>33</volume>:<fpage>596</fpage>&#x02013;<lpage>602</lpage>. <pub-id pub-id-type="doi">10.1016/j.clnu.2013.10.003</pub-id><pub-id pub-id-type="pmid">24315622</pub-id></citation></ref>
<ref id="B28">
<label>28.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xinjun</surname> <given-names>W</given-names></name> <name><surname>Wen</surname> <given-names>Y</given-names></name></person-group>. <article-title>2012 Diabetes diagnosis and treatment guidelines-American Diabetes Association</article-title>. <source>Int J Endocrinol Metab.</source> (<year>2012</year>)<volume>32</volume>:<fpage>211</fpage>&#x02013;<lpage>4</lpage>. <pub-id pub-id-type="doi">10.3760/cma.j.issn.1673-4157.2012.03.020</pub-id></citation>
</ref>
<ref id="B29">
<label>29.</label>
<citation citation-type="journal"><person-group person-group-type="author"><collab>The World Health Organization MONICA Project (monitoring trends and determinants in cardiovascular disease): a major international collaboration</collab></person-group>. <article-title>WHO MONICA Project Principal Investigators</article-title>. <source>J Clin Epidemiol.</source> (<year>1988</year>) <volume>41</volume>:<fpage>105</fpage>&#x02013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1016/0895-4356(88)90084-4</pub-id><pub-id pub-id-type="pmid">3335877</pub-id></citation></ref>
<ref id="B30">
<label>30.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>L</given-names></name> <name><surname>Liu</surname> <given-names>ZP</given-names></name></person-group>. <article-title>Biomarker discovery for predicting spontaneous preterm birth from gene expression data by regularized logistic regression</article-title>. <source>Comput Struct Biotechnol J.</source> (<year>2020</year>) <volume>18</volume>:<fpage>3434</fpage>&#x02013;<lpage>46</lpage>. <pub-id pub-id-type="doi">10.1016/j.csbj.2020.10.028</pub-id><pub-id pub-id-type="pmid">33294138</pub-id></citation></ref>
<ref id="B31">
<label>31.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cortes</surname> <given-names>C</given-names></name> <name><surname>Vapnik</surname> <given-names>V</given-names></name></person-group>. <article-title>Support-vector networks</article-title>. <source>Mach Learn</source>. (<year>1995</year>) <volume>20</volume>:<fpage>273</fpage>&#x02013;<lpage>97</lpage>. <pub-id pub-id-type="doi">10.1007/BF00994018</pub-id></citation>
</ref>
<ref id="B32">
<label>32.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Breiman</surname> <given-names>L</given-names></name></person-group>. <article-title>Random forests</article-title>. <source>Mach Learn</source>. (<year>2001</year>) <volume>45</volume>:<fpage>5</fpage>&#x02013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id></citation>
</ref>
<ref id="B33">
<label>33.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hastie</surname> <given-names>T</given-names></name> <name><surname>Rosset</surname> <given-names>S</given-names></name> <name><surname>Zhu</surname> <given-names>J</given-names></name> <name><surname>Zou</surname> <given-names>H</given-names></name></person-group>. <article-title>Multi-class adaboost</article-title>. <source>Statist Interf</source> . (<year>2009</year>) <volume>2</volume>:<fpage>349</fpage>&#x02013;<lpage>60</lpage>. <pub-id pub-id-type="doi">10.4310/SII.2009.v2.n3.a8</pub-id></citation>
</ref>
<ref id="B34">
<label>34.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pencina</surname> <given-names>MJ</given-names></name> <name><surname>D&#x00027;Agostino</surname> <given-names>RB</given-names> <suffix>Sr</suffix></name></person-group>. <article-title>Evaluating the added predictive ability of a new marker: from area under the ROC curve to reclassification and beyond</article-title>. <source>Stat Med.</source> (<year>2008</year>) <volume>27</volume>:<fpage>157</fpage>&#x02013;<lpage>72</lpage>. <pub-id pub-id-type="doi">10.1002/sim.2929</pub-id><pub-id pub-id-type="pmid">17569110</pub-id></citation></ref>
<ref id="B35">
<label>35.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rufibach</surname> <given-names>K</given-names></name></person-group>. <article-title>Use of Brier score to assess binary predictions</article-title>. <source>J Clin Epidemiol.</source> (<year>2010</year>) <volume>63</volume>:<fpage>938</fpage>&#x02013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1016/j.jclinepi.2009.11.009</pub-id><pub-id pub-id-type="pmid">20189763</pub-id></citation></ref>
<ref id="B36">
<label>36.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lemeshow</surname> <given-names>S</given-names></name> <name><surname>Hosmer</surname> <given-names>DW</given-names> <suffix>Jr</suffix></name></person-group>. <article-title>A review of goodness of fit statistics for use in the development of logistic regression models</article-title>. <source>Am J Epidemiol.</source> (<year>1982</year>) <volume>115</volume>:<fpage>92</fpage>&#x02013;<lpage>106</lpage>. <pub-id pub-id-type="doi">10.1093/oxfordjournals.aje.a113284</pub-id><pub-id pub-id-type="pmid">7137160</pub-id></citation></ref>
<ref id="B37">
<label>37.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tsalatsanis</surname> <given-names>A</given-names></name> <name><surname>Hozo</surname> <given-names>I</given-names></name> <name><surname>Vickers</surname> <given-names>A</given-names></name> <name><surname>Djulbegovic</surname> <given-names>B</given-names></name></person-group>. <article-title>A regret theory approach to decision curve analysis: a novel method for eliciting decision makers&#x00027; preferences and decision-making</article-title>. <source>BMC Med Inform Decis Mak</source>. (<year>2010</year>) <volume>10</volume>:<fpage>51</fpage>. <pub-id pub-id-type="doi">10.1186/1472-6947-10-51</pub-id><pub-id pub-id-type="pmid">20846413</pub-id></citation></ref>
<ref id="B38">
<label>38.</label>
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Platt</surname> <given-names>JC</given-names></name></person-group>. <article-title>Probabilistic outputs for support vector machines and comparisons to regularized likelihood methods</article-title>. In: <source>Advances in Large Margin Classifiers</source>. <publisher-name>MIT Press</publisher-name> (<year>2000</year>). p. <fpage>61</fpage>&#x02013;<lpage>74</lpage>. <pub-id pub-id-type="doi">10.1016/j.enpol.2006.07.010</pub-id></citation>
</ref>
<ref id="B39">
<label>39.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mortazavi</surname> <given-names>BJ</given-names></name> <name><surname>Downing</surname> <given-names>NS</given-names></name> <name><surname>Bucholz</surname> <given-names>EM</given-names></name> <name><surname>Dharmarajan</surname> <given-names>K</given-names></name> <name><surname>Manhapra</surname> <given-names>A</given-names></name> <name><surname>Li</surname> <given-names>S-X</given-names></name> <etal/></person-group>. <article-title>Analysis of machine learning techniques for heart failure readmissions</article-title>. <source>Circ Cardiovasc Qual Outcomes</source>. (<year>2016</year>) <volume>9</volume>:<fpage>629</fpage>&#x02013;<lpage>640</lpage>. <pub-id pub-id-type="doi">10.1161/CIRCOUTCOMES.116.003039</pub-id><pub-id pub-id-type="pmid">28263938</pub-id></citation></ref>
<ref id="B40">
<label>40.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ambale-Venkatesh</surname> <given-names>B</given-names></name> <name><surname>Yang</surname> <given-names>X</given-names></name> <name><surname>Wu</surname> <given-names>CO</given-names></name> <name><surname>Liu</surname> <given-names>K</given-names></name> <name><surname>Hundley</surname> <given-names>WG</given-names></name> <name><surname>McClelland</surname> <given-names>R</given-names></name> <etal/></person-group>. <article-title>Cardiovascular event prediction by machine learning: the multi-ethnic study of atherosclerosis</article-title>. <source>Circ Res.</source> (<year>2017</year>) <volume>121</volume>:<fpage>1092</fpage>&#x02013;<lpage>101</lpage>. <pub-id pub-id-type="doi">10.1161/CIRCRESAHA.117.311312</pub-id><pub-id pub-id-type="pmid">28794054</pub-id></citation></ref>
<ref id="B41">
<label>41.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jiang</surname> <given-names>F</given-names></name> <name><surname>Jiang</surname> <given-names>Y</given-names></name> <name><surname>Zhi</surname> <given-names>H</given-names></name> <name><surname>Dong</surname> <given-names>Y</given-names></name> <name><surname>Li</surname> <given-names>H</given-names></name> <name><surname>Ma</surname> <given-names>S</given-names></name> <etal/></person-group>. <article-title>Artificial intelligence in healthcare: past, present and future</article-title>. <source>Stroke Vasc Neurol.</source> (<year>2017</year>) <volume>2</volume>:<fpage>230</fpage>&#x02013;<lpage>43</lpage>. <pub-id pub-id-type="doi">10.1136/svn-2017-000101</pub-id><pub-id pub-id-type="pmid">31670713</pub-id></citation></ref>
<ref id="B42">
<label>42.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>DeFilippis</surname> <given-names>AP</given-names></name> <name><surname>Young</surname> <given-names>R</given-names></name> <name><surname>Carrubba</surname> <given-names>CJ</given-names></name> <name><surname>McEvoy</surname> <given-names>JW</given-names></name> <name><surname>Budoff</surname> <given-names>MJ</given-names></name> <name><surname>Blumenthal</surname> <given-names>RS</given-names></name> <etal/></person-group>. <article-title>An analysis of calibration and discrimination among multiple cardiovascular risk scores in a modern multiethnic cohort</article-title>. <source>Ann Intern</source>. (<year>2015</year>) <volume>162</volume>:<fpage>266</fpage>&#x02013;<lpage>75</lpage>. <pub-id pub-id-type="doi">10.7326/M14-1281</pub-id><pub-id pub-id-type="pmid">25686167</pub-id></citation></ref>
<ref id="B43">
<label>43.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>L</given-names></name> <name><surname>Lee</surname> <given-names>Y</given-names></name> <name><surname>Wu</surname> <given-names>Y</given-names></name> <name><surname>Zhang</surname> <given-names>X</given-names></name> <name><surname>Jin</surname> <given-names>C</given-names></name> <name><surname>Huang</surname> <given-names>Z</given-names></name> <etal/></person-group>. <article-title>A prospective study of waist circumference trajectories and incident cardiovascular disease in China: the Kailuan Cohort Study</article-title>. <source>Am J Clin Nutr.</source> (<year>2021</year>) <volume>113</volume>:<fpage>338</fpage>&#x02013;<lpage>47</lpage>. <pub-id pub-id-type="doi">10.1093/ajcn/nqaa331</pub-id><pub-id pub-id-type="pmid">33330917</pub-id></citation></ref>
<ref id="B44">
<label>44.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhong</surname> <given-names>VW</given-names></name> <name><surname>Van Horn</surname> <given-names>L</given-names></name> <name><surname>Cornelis</surname> <given-names>MC</given-names></name> <name><surname>Wilkins</surname> <given-names>JT</given-names></name> <name><surname>Ning</surname> <given-names>H</given-names></name> <name><surname>Carnethon</surname> <given-names>MR</given-names></name> <etal/></person-group>. <article-title>Associations of dietary cholesterol or egg consumption with incident cardiovascular disease and mortality</article-title>. <source>JAMA.</source> (<year>2019</year>) <volume>321</volume>:<fpage>1081</fpage>&#x02013;<lpage>95</lpage>. <pub-id pub-id-type="doi">10.1001/jama.2019.1572</pub-id><pub-id pub-id-type="pmid">30874756</pub-id></citation></ref>
<ref id="B45">
<label>45.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>W</given-names></name> <name><surname>Hu</surname> <given-names>B</given-names></name> <name><surname>Dehghan</surname> <given-names>M</given-names></name> <name><surname>Mente</surname> <given-names>A</given-names></name> <name><surname>Wang</surname> <given-names>C</given-names></name> <name><surname>Yan</surname> <given-names>R</given-names></name> <etal/></person-group>. <article-title>Fruit, vegetable, and legume intake and the risk of all-cause, cardiovascular, and cancer mortality: a prospective study</article-title>. <source>Clin Nutr.</source> (<year>2021</year>) <volume>40</volume>:<fpage>4316</fpage>&#x02013;<lpage>23</lpage>. <pub-id pub-id-type="doi">10.1016/j.clnu.2021.01.016</pub-id><pub-id pub-id-type="pmid">33581953</pub-id></citation></ref>
<ref id="B46">
<label>46.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jiang</surname> <given-names>Y</given-names></name> <name><surname>Zhang</surname> <given-names>X</given-names></name> <name><surname>Ma</surname> <given-names>R</given-names></name> <name><surname>Wang</surname> <given-names>X</given-names></name> <name><surname>Liu</surname> <given-names>J</given-names></name> <name><surname>Keerman</surname> <given-names>M</given-names></name> <etal/></person-group>. <article-title>Cardiovascular disease prediction by machine learning algorithms based on cytokines in Kazakhs of China</article-title>. <source>Clin Epidemiol.</source> (<year>2021</year>) <volume>13</volume>:<fpage>417</fpage>&#x02013;<lpage>28</lpage>. <pub-id pub-id-type="doi">10.2147/CLEP.S313343</pub-id><pub-id pub-id-type="pmid">34135637</pub-id></citation></ref>
<ref id="B47">
<label>47.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>De Silva</surname> <given-names>K</given-names></name> <name><surname>Jonsson</surname> <given-names>D</given-names></name> <name><surname>Demmer</surname> <given-names>RT</given-names></name></person-group>. <article-title>A combined strategy of feature selection and machine learning to identify predictors of prediabetes</article-title>. <source>J Am Med Inform Assoc.</source> (<year>2020</year>) <volume>27</volume>:<fpage>396</fpage>&#x02013;<lpage>406</lpage>. <pub-id pub-id-type="doi">10.1093/jamia/ocz204</pub-id><pub-id pub-id-type="pmid">31889178</pub-id></citation></ref>
<ref id="B48">
<label>48.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wallert</surname> <given-names>J</given-names></name> <name><surname>Tomasoni</surname> <given-names>M</given-names></name> <name><surname>Madison</surname> <given-names>G</given-names></name> <name><surname>Held</surname> <given-names>C</given-names></name></person-group>. <article-title>Predicting two-year survival versus non-survival after first myocardial infarction using machine learning and Swedish national register data</article-title>. <source>BMC Med Inform Decis Mak.</source> (<year>2017</year>) <volume>17</volume>:<fpage>99</fpage>. <pub-id pub-id-type="doi">10.1186/s12911-017-0500-y</pub-id><pub-id pub-id-type="pmid">28679442</pub-id></citation></ref>
<ref id="B49">
<label>49.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hae</surname> <given-names>H</given-names></name> <name><surname>Kang</surname> <given-names>SJ</given-names></name> <name><surname>Kim</surname> <given-names>WJ</given-names></name> <name><surname>Choi</surname> <given-names>SY</given-names></name> <name><surname>Lee</surname> <given-names>JG</given-names></name> <name><surname>Bae</surname> <given-names>Y</given-names></name> <etal/></person-group>. <article-title>Machine learning assessment of myocardial ischemia using angiography: development and retrospective validation</article-title>. <source>PLoS Med.</source> (<year>2018</year>) <volume>15</volume>:<fpage>e1002693</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pmed.1002693</pub-id><pub-id pub-id-type="pmid">30422987</pub-id></citation></ref>
<ref id="B50">
<label>50.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>van der Ploeg</surname> <given-names>T</given-names></name> <name><surname>Austin</surname> <given-names>PC</given-names></name> <name><surname>Steyerberg</surname> <given-names>EW</given-names></name></person-group>. <article-title>Modern modelling techniques are data hungry: a simulation study for predicting dichotomous endpoints</article-title>. <source>BMC Med Res Methodol.</source> (<year>2014</year>) <volume>14</volume>:<fpage>137</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2288-14-137</pub-id><pub-id pub-id-type="pmid">25532820</pub-id></citation></ref>
<ref id="B51">
<label>51.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tokgozoglu</surname> <given-names>L</given-names></name> <name><surname>Torp-Pedersen</surname> <given-names>C</given-names></name></person-group>. <article-title>Redefining cardiovascular risk prediction: is the crystal ball clearer now?</article-title> <source>Eur Heart J.</source> (<year>2021</year>) <volume>42</volume>:<fpage>2468</fpage>&#x02013;<lpage>71</lpage>. <pub-id pub-id-type="doi">10.1093/eurheartj/ehab310</pub-id><pub-id pub-id-type="pmid">34120165</pub-id></citation></ref>
<ref id="B52">
<label>52.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>Z-S</given-names></name> <name><surname>Zhong</surname> <given-names>J-L</given-names></name> <name><surname>Luo</surname> <given-names>Y-T</given-names></name> <name><surname>Peng</surname> <given-names>L</given-names></name> <name><surname>Li</surname> <given-names>S-H</given-names></name> <name><surname>Liu</surname> <given-names>J-L</given-names></name></person-group>. <article-title>Correlation between LDL-C/HDL-C ratio and the severity of coronary artery lesion in patients with coronary heart disease</article-title>. <source>J Sun Yat-sen Univ</source>. (<year>2018</year>)<volume>39</volume>:<fpage>303</fpage>&#x02013;<lpage>8</lpage>.</citation>
</ref>
<ref id="B53">
<label>53.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Barzegar</surname> <given-names>N</given-names></name> <name><surname>Tohidi</surname> <given-names>M</given-names></name> <name><surname>Hasheminia</surname> <given-names>M</given-names></name> <name><surname>Azizi</surname> <given-names>F</given-names></name> <name><surname>Hadaegh</surname> <given-names>F</given-names></name></person-group>. <article-title>The impact of triglyceride-glucose index on incident cardiovascular events during 16 years of follow-up: Tehran Lipid and Glucose Study</article-title>. <source>Cardiovasc Diabetol.</source> (<year>2020</year>) <volume>19</volume>:<fpage>155</fpage>. <pub-id pub-id-type="doi">10.1186/s12933-020-01121-5</pub-id><pub-id pub-id-type="pmid">32993633</pub-id></citation></ref>
<ref id="B54">
<label>54.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sanchez-Inigo</surname> <given-names>L</given-names></name> <name><surname>Navarro-Gonzalez</surname> <given-names>D</given-names></name> <name><surname>Fernandez-Montero</surname> <given-names>A</given-names></name> <name><surname>Pastrana-Delgado</surname> <given-names>J</given-names></name> <name><surname>Martinez</surname> <given-names>JA</given-names></name></person-group>. <article-title>The TyG index may predict the development of cardiovascular events</article-title>. <source>Eur J Clin Invest.</source> (<year>2016</year>) <volume>46</volume>:<fpage>189</fpage>&#x02013;<lpage>97</lpage>. <pub-id pub-id-type="doi">10.1111/eci.12583</pub-id><pub-id pub-id-type="pmid">26683265</pub-id></citation></ref>
<ref id="B55">
<label>55.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lam</surname> <given-names>BC</given-names></name> <name><surname>Koh</surname> <given-names>GC</given-names></name> <name><surname>Chen</surname> <given-names>C</given-names></name> <name><surname>Wong</surname> <given-names>MT</given-names></name> <name><surname>Fallows</surname> <given-names>SJ</given-names></name></person-group>. <article-title>Comparison of Body Mass Index (BMI), Body Adiposity Index (BAI), Waist Circumference (WC), Waist-To-Hip Ratio (WHR) and Waist-To-Height Ratio (WHtR) as predictors of cardiovascular disease risk factors in an adult population in Singapore</article-title>. <source>PLoS ONE.</source> (<year>2015</year>) <volume>10</volume>:<fpage>e0122985</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0122985</pub-id><pub-id pub-id-type="pmid">25880905</pub-id></citation></ref>
<ref id="B56">
<label>56.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Moliner-Urdiales</surname> <given-names>D</given-names></name> <name><surname>Artero</surname> <given-names>EG</given-names></name> <name><surname>Lee</surname> <given-names>DC</given-names></name> <name><surname>Espana-Romero</surname> <given-names>V</given-names></name> <name><surname>Sui</surname> <given-names>X</given-names></name> <name><surname>Blair</surname> <given-names>SN</given-names></name></person-group>. <article-title>Body adiposity index and all-cause and cardiovascular disease mortality in men</article-title>. <source>Obesity.</source> (<year>2013</year>) <volume>21</volume>:<fpage>1870</fpage>&#x02013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.1002/oby.20399</pub-id><pub-id pub-id-type="pmid">23512375</pub-id></citation></ref>
</ref-list>
<glossary>
<def-list>
<title>Abbreviations</title>
<def-item><term>CVD</term>
<def><p>cardiovascular disease</p></def></def-item>
<def-item><term>ML</term>
<def><p>machine learning</p></def></def-item>
<def-item><term>L1-LR</term>
<def><p>L1 regularized logistic regression</p></def></def-item>
<def-item><term>RF</term>
<def><p>random forest</p></def></def-item>
<def-item><term>SVM</term>
<def><p>support vector machine</p></def></def-item>
<def-item><term>SBP</term>
<def><p>systolic blood pressure</p></def></def-item>
<def-item><term>TyG</term>
<def><p>triglyceride blood glucose index</p></def></def-item>
<def-item><term>BMI</term>
<def><p>body mass index</p></def></def-item>
<def-item><term>BAI</term>
<def><p>body obesity index</p></def></def-item>
<def-item><term>TG</term>
<def><p>triglycerides</p></def></def-item>
<def-item><term>HDL-C</term>
<def><p>high-density lipoprotein cholesterol</p></def></def-item>
<def-item><term>DBP</term>
<def><p>diastolic blood pressure</p></def></def-item>
<def-item><term>WHR</term>
<def><p>waist-to-hip ratio</p></def></def-item>
<def-item><term>LCI</term>
<def><p>lipoprotein combine index</p></def></def-item>
<def-item><term>AI</term>
<def><p>atherogenic index</p></def></def-item>
<def-item><term>LpH</term>
<def><p>low-high-density lipoprotein ratio</p></def></def-item>
<def-item><term>THT</term>
<def><p>bilirubin comprehensive index</p></def></def-item>
<def-item><term>FLR</term>
<def><p>forward partial likelihood estimation</p></def></def-item>
<def-item><term>LR</term>
<def><p>logistic regression</p></def></def-item>
<def-item><term>RF</term>
<def><p>Random forest</p></def></def-item>
<def-item><term>AUC</term>
<def><p>the area under the receiver operating curve</p></def></def-item>
<def-item><term>cNRI</term>
<def><p>the Net Reclassification Index</p></def></def-item>
<def-item><term>IDI</term>
<def><p>Integrated Discrimination Improvement Index</p></def></def-item>
<def-item><term>BS</term>
<def><p>Brier Score.</p></def></def-item>
</def-list>
</glossary>
</back>
</article>