<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Public Health</journal-id>
<journal-title-group>
<journal-title>Frontiers in Public Health</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Public Health</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2296-2565</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpubh.2025.1657551</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Development and interpretation of a machine learning model for predicting body mass index in Chinese adolescents: a prospective cohort study</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Zikang</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3113712"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Peng</surname>
<given-names>Wei</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Sun</surname>
<given-names>Shaoming</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Fangwen</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Sun</surname>
<given-names>Yining</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Huang</surname>
<given-names>Lei</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Hefei Institutes of Physical Science, Chinese Academy of Sciences</institution>, <city>Hefei</city>, <state>Anhui</state>, <country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>University of Science and Technology of China</institution>, <city>Hefei</city>, <state>Anhui</state>, <country country="cn">China</country></aff>
<aff id="aff3"><label>3</label><institution>CAS Hefei Institute of Technology Innovation</institution>, <city>Hefei</city>, <state>Anhui</state>, <country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>&#x002A;</label>Correspondence: Shaoming Sun, <email xlink:href="mailto:ssmjkcjzx@outlook.com">ssmjkcjzx@outlook.com</email>; Wei Peng, <email xlink:href="mailto:wpeng@iim.ac.cn">wpeng@iim.ac.cn</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2025-11-20">
<day>20</day>
<month>11</month>
<year>2025</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>13</volume>
<elocation-id>1657551</elocation-id>
<history>
<date date-type="received">
<day>01</day>
<month>07</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>26</day>
<month>10</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>10</day>
<month>11</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2025 Zhang, Peng, Sun, Zhang, Sun and Huang.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Zhang, Peng, Sun, Zhang, Sun and Huang</copyright-holder>
<license>
<ali:license_ref start_date="2025-11-20">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Purposes</title>
<p>This study aimed to develop a machine learning model to predict body mass index (BMI) in adolescents based on readily accessible daily information and to investigate the influence of modifiable factors on BMI changes through model interpretation techniques.</p>
</sec>
<sec>
<title>Methods</title>
<p>This study is a one-year prospective cohort study. Baseline data were collected through anthropometric measurements and questionnaires, and BMI were reassessed after 1 year. Six machine learning models were developed to predict BMI. Nested cross-validation (CV) was used for hyperparameter tuning and performance estimation. Predictors were prescreened on the inner-training folds of the nested CV using univariable analyses. Model performance was evaluated using Root Mean Squared Error (RMSE), Mean Squared Error (MSE), Mean Absolute Error (MAE), and coefficient of determination (R<sup>2</sup>). SHapley Additive exPlanations (SHAP) was used for global and local interpretations of the models.</p>
</sec>
<sec>
<title>Results</title>
<p>The mean BMI of the 1,827 students included in the final analysis increased from 21.18&#x202F;&#x00B1;&#x202F;3.63&#x202F;kg/m<sup>2</sup> at baseline to 21.54&#x202F;&#x00B1;&#x202F;3.59&#x202F;kg/m<sup>2</sup> after 1 year, with an average change of 0.36&#x202F;&#x00B1;&#x202F;1.40&#x202F;kg/m<sup>2</sup>. The CatBoost (CB) model demonstrated the best predictive performance. After calibration, it achieved an RMSE of 1.200 [95% confidence interval (CI): 1.101&#x2013;1.303], MSE of 1.440 (95% CI: 1.211&#x2013;1.697), MAE of 0.895 (95% CI: 0.818&#x2013;0.981) and R<sup>2</sup> of 0.902 (95% CI: 0.882&#x2013;0.918). In the SHAP analysis, the top 5 modifiable features at the population level were: level of health literacy, recognize self-weight status correctly, sedentariness duration on weekends, participation in professional sports training, frequency of staying up late.</p>
</sec>
<sec>
<title>Conclusion</title>
<p>This study developed a BMI prediction model for adolescents using readily accessible daily information. The model accurately predicts BMI values 1 year later and provides both population-level and individual-level interpretability. Compared to existing studies, it offers key advantages, including independence from complex clinical data, the ability to predict continuous BMI values, and strong model interpretability. Our findings provide a promising research tool for screening high-risk adolescents, informing public health prevention and intervention strategies, and supporting personalized clinical interventions.</p>
</sec>
</abstract>
<kwd-group>
<kwd>BMI</kwd>
<kwd>prediction model</kwd>
<kwd>machine learning</kwd>
<kwd>daily information</kwd>
<kwd>model interpretation</kwd>
<kwd>modifiable factors</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declare that financial support was received for the research and/or publication of this article. This work was funded by Anhui Postdoctoral Scientific Research Program Foundation (No.2024B815).</funding-statement>
</funding-group>
<counts>
<fig-count count="7"/>
<table-count count="5"/>
<equation-count count="0"/>
<ref-count count="49"/>
<page-count count="16"/>
<word-count count="10256"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Public Health and Nutrition</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec1">
<label>1</label>
<title>Introduction</title>
<p>Body mass index (BMI) is a commonly used indicator for assessing the ratio of an individual&#x2019;s weight to height, widely applied in public health to help identify early health risks (<xref ref-type="bibr" rid="ref1">1</xref>). Abnormal BMI values are closely associated with the onset of various chronic diseases, such as cardiovascular diseases, diabetes, and certain types of cancer (<xref ref-type="bibr" rid="ref2 ref3 ref4 ref5">2&#x2013;5</xref>). According to the 2024 Global Burden of Disease Study, high BMI (&#x2265;25&#x202F;kg/m<sup>2</sup>) is a major risk factor for non-communicable diseases, contributing to 5&#x2013;42% of related deaths and 5&#x2013;52% of disability-adjusted life years (<xref ref-type="bibr" rid="ref6">6</xref>). By 2020, the prevalence of high BMI among Chinese children and adolescents aged 5&#x2013;19 had reached 37% and was projected to rise to 72% by 2035 at an average annual growth rate of 2.0%, with more than 31.5 million expected to develop health problems related to non-communicable diseases (<xref ref-type="bibr" rid="ref6">6</xref>). Given the alarming rise in abnormal BMI rates, Chinese children and adolescents represent a large and rapidly growing at-risk population, making BMI-related research in this group highly valuable for public health. Early BMI trajectory identification offers key opportunities for timely interventions to prevent weight-related health risks.</p>
<p>BMI in children and adolescents is influenced by a wide array of factors, including genetic, behavioral, psychological, dietary, familial, school-related, and sociodemographic factors, as supported by previous research (<xref ref-type="bibr" rid="ref7 ref8 ref9">7&#x2013;9</xref>). Given this multifactorial nature, accurately predicting BMI requires methods that can accommodate diverse and potentially nonlinear influences. Traditional statistical approaches are often constrained by strong parametric assumptions, limiting their ability to model such complex interactions. In contrast, machine learning techniques offer greater flexibility and are particularly adept at uncovering deeper connections between features in health-related data, making them well-suited for BMI prediction. In current research, machine learning-based BMI prediction models have been developed using a diverse array of predictive indicators, including medication data, biological markers, body images, smartphone motion sensor data, and lifestyle-related information (<xref ref-type="bibr" rid="ref10 ref11 ref12 ref13 ref14 ref15">10&#x2013;15</xref>). However, current models often depend on complex and hard-to-collect predictors, which limits their scalability and real-world applicability. Meanwhile, some approaches that incorporate daily information still primarily classify obesity status rather than predict continuous BMI values, reducing their ability to capture subtle changes.</p>
<p>Interpretability has become an increasingly important focus in machine learning prediction models, leading to their growing application across diverse clinical contexts (<xref ref-type="bibr" rid="ref16">16</xref>). By enhancing model transparency and interpretability, they support understanding of prediction logic and assist clinical decision-making, thereby narrowing the gap between model development and practical application. Among available interpretation techniques, SHapley Additive exPlanations (SHAP) is theoretically grounded in cooperative game theory (<xref ref-type="bibr" rid="ref17">17</xref>), providing consistent and locally accurate feature attributions, which makes it particularly suitable for individualized interpretation in health-related prediction tasks. SHAP has been successfully applied to the interpretation of clinical prediction models, including frailty, myelosuppression risk, and acute kidney injury in pediatric cardiac surgery patients (<xref ref-type="bibr" rid="ref18 ref19 ref20">18&#x2013;20</xref>). However, the application of model interpretability techniques in BMI research remains limited, hindering a deeper understanding of how various predictors influence BMI predictions and limiting the practical utility of these models in real-world scenarios.</p>
<p>The main contributions of this paper can be summarized as follows:</p>
<list list-type="order">
<list-item>
<p>We developed a machine learning model to predict the BMI of Chinese adolescents using only readily accessible daily information. This approach overcomes the limitations of previous studies that relied heavily on clinical biomarkers or complex datasets. It offers an efficient and cost-effective solution for early identification of high-risk adolescents, with potential for use in community, school, and clinical settings.</p>
</list-item>
<list-item>
<p>We focused on predicting continuous BMI values rather than merely classifying weight status, enabling the detection of subtle changes in body weight. These minor fluctuations can indicate early-stage health risks, which are clinically important for timely interventions, dynamic health monitoring, and chronic disease prevention.</p>
</list-item>
<list-item>
<p>We utilized model interpretability techniques to uncover the impact of modifiable factors on BMI variations at both group and individual levels. This interpretability not only enhances policymakers&#x2019; decision-making efficiency in weight management and health promotion, but also provides clinicians with targeted, personalized intervention strategies, offering substantial practical value.</p>
</list-item>
</list>
</sec>
<sec id="sec2">
<label>2</label>
<title>Literature review</title>
<p>Extensive empirical studies have clarified the key determinants of BMI. Silventoinen et al. examined the genetic and environmental contributions to BMI variation from infancy to early adulthood, revealing that genetic factors played a major role in BMI variation during adolescence, while environmental factors influenced childhood BMI (<xref ref-type="bibr" rid="ref7">7</xref>). Zink et al. found significant longitudinal associations between screen time, physical activity, sleep duration, and BMI in U. S. youth (<xref ref-type="bibr" rid="ref8">8</xref>). Sandri et al. conducted a study to explore the impact of sociodemographic, nutritional, and lifestyle factors on BMI in Spain, highlighting the role of poor dietary habits and sociodemographic characteristics in influencing obesity risk (<xref ref-type="bibr" rid="ref9">9</xref>).</p>
<p>Recent research has applied various machine learning techniques to predict BMI, using a wide range of predictive factors. Park et al. identified specific brain regions&#x2019; functional connectivity as significant biomarkers for predicting BMI changes in adolescents, with high accuracy achieved through machine learning-based neuroimaging analysis (<xref ref-type="bibr" rid="ref10">10</xref>). Yao et al. proposed a deep learning model that uses smartphone motion sensors to predict BMI, demonstrating that motion entropy-based filtering significantly improved the model&#x2019;s prediction accuracy, particularly with jogging as the activity of choice (<xref ref-type="bibr" rid="ref12">12</xref>). Kim et al. presented an approach for predicting BMI and various body part sizes using multi-view body images. Their method demonstrated high accuracy and highlighted the potential of leveraging large-scale open datasets for applications in health monitoring, fitness tracking, and apparel sizing (<xref ref-type="bibr" rid="ref13">13</xref>). Arum&#x00E4;e et al. found that while the five personality domains could predict current BMI, 29 specific personality traits were able to predict both current and future BMI (<xref ref-type="bibr" rid="ref21">21</xref>). Singh and Tawfik reported that early BMI data, along with demographic factors such as age and gender, serve as key predictors for forecasting BMI changes during adolescence (<xref ref-type="bibr" rid="ref22">22</xref>).</p>
<p>Recent studies have increasingly incorporated interpretability techniques to enhance transparency in machine learning-based health predictions. Li et al. developed an individualized prediction model for myelosuppression risk in lung cancer patients using machine learning, employing SHAP to evaluate feature importance, with the analysis indicating white blood cell count, platelet count, neutrophil count, BMI, and age as the most influential predictors (<xref ref-type="bibr" rid="ref18">18</xref>). Luo et al. trained machine learning models to predict cardiac surgery-associated acute kidney injury (CSA-AKI) in pediatric patients, utilizing SHAP to identify key predictors such as baseline serum creatinine level, perfusion time, and operation time (<xref ref-type="bibr" rid="ref19">19</xref>). Yu et al. applied SHAP to interpret the LightGBM model for predicting frailty risk, emphasizing the importance of cognitive function, grip strength, sleep duration, and BMI as key predictors, and demonstrated SHAP&#x2019;s effectiveness in revealing the model&#x2019;s decision-making process (<xref ref-type="bibr" rid="ref20">20</xref>).</p>
</sec>
<sec sec-type="methods" id="sec3">
<label>3</label>
<title>Methods</title>
<sec id="sec4">
<label>3.1</label>
<title>Selection of participants</title>
<p>This study, conducted in September 2023 in Anhui, China, involved students aged 14&#x2013;17 from nine pilot high schools. These pilot schools included both general senior high schools and vocational schools, and were located in central and non-central cities, ensuring the inclusion of students with diverse socioeconomic backgrounds. The inclusion criteria required participants to meet the following conditions: (1) no history of major illnesses; (2) no plans to transfer schools or relocate during the upcoming year; (3) the ability to participate in follow-up surveys for 1 year.</p>
</sec>
<sec id="sec5">
<label>3.2</label>
<title>Data collection</title>
<p>At baseline, participants underwent anthropometric assessments, including body mass and height measurements with digital scales and wall-mounted stadiometers, conducted by trained researchers. BMI was calculated as weight in kilograms divided by height in meters squared <inline-formula>
<mml:math id="M1">
<mml:mo stretchy="true">(</mml:mo>
<mml:mi mathvariant="italic">BMI</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mtext mathvariant="italic">weight</mml:mtext>
<mml:mspace width="0.25em"/>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi mathvariant="italic">kg</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mtext mathvariant="italic">heigh</mml:mtext>
<mml:msup>
<mml:mi>t</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo stretchy="true">(</mml:mo>
<mml:msup>
<mml:mi>m</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:mfrac>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>.</mml:mo>
</mml:math>
</inline-formula> Basal metabolic rate (BMR) was estimated using the FAO/WHO/UNU adolescent (10&#x2013;18 y) predictive equations: for male, BMR (kcal/day)&#x202F;=&#x202F;16.6&#x202F;&#x00D7;&#x202F;Weight (kg)&#x202F;+&#x202F;77&#x202F;&#x00D7;&#x202F;Height (m)&#x202F;+&#x202F;572; for female, BMR (kcal/day)&#x202F;=&#x202F;7.4&#x202F;&#x00D7;&#x202F;Weight (kg)&#x202F;+&#x202F;482&#x202F;&#x00D7;&#x202F;Height (m)&#x202F;+&#x202F;217 (<xref ref-type="bibr" rid="ref23">23</xref>, <xref ref-type="bibr" rid="ref24">24</xref>). Overweight and obesity status was classified based on age- and sex-specific BMI reference standards established by the Working Group on Obesity in China for school-aged children and adolescents (<xref ref-type="bibr" rid="ref25">25</xref>). In addition, students and their parents completed a structured electronic questionnaire through a publicly accessible online system available on both mobile phones and computers. Developed based on prior studies (<xref ref-type="bibr" rid="ref7 ref8 ref9">7&#x2013;9</xref>, <xref ref-type="bibr" rid="ref26">26</xref>, <xref ref-type="bibr" rid="ref27">27</xref>), the questionnaire covered a range of factors, including genetic predispositions, socio-demographic characteristics, daily habits, physical activity patterns, self-perception of body status, and health literacy. The system required participants to complete all items before submission, ensuring no missing data at the individual question level. After 1 year, a follow-up anthropometric assessment was conducted to evaluate BMI changes in the cohort, with assessors blinded to baseline measurements.</p>
</sec>
<sec id="sec6">
<label>3.3</label>
<title>Statistical analysis</title>
<p>This study conducted a power analysis based on a medium effect size (Cohen&#x2019;s <italic>f</italic><sup>2</sup>&#x202F;=&#x202F;0.15) and a significance level of 0.05 to assess whether the sample size was adequate for detecting meaningful effects in the statistical analyses (<xref ref-type="bibr" rid="ref28">28</xref>). To assess potential clustering by school, we fit a two-level random-intercept linear mixed-effects model and computed the intraclass correlation coefficient (ICC) as the ratio of between-school variance to total variance (<xref ref-type="bibr" rid="ref29">29</xref>). The impact of loss to follow-up was examined by comparing baseline characteristics of included vs. excluded participants using the standardized mean difference (SMD), reported as absolute values (|SMD|) (<xref ref-type="bibr" rid="ref30">30</xref>). Differences were considered negligible when |SMD|&#x202F;&#x003C;&#x202F;0.10, and larger values were regarded as imbalanced (<xref ref-type="bibr" rid="ref31">31</xref>). In addition, we quantified differential attrition in key subgroups (age, gender group, and baseline BMI category) by reporting attrition rates and risk differences (RDs) with 95% CIs relative to a prespecified reference level. Descriptive statistics were reported as means &#x00B1; standard deviation for continuous variables and as counts for categorical variables. To assess regression to the mean (RTM), the change score was computed (&#x0394;&#x202F;=&#x202F;Y<sub>2</sub>&#x202F;&#x2212;&#x202F;Y<sub>1</sub>, with Y<sub>1</sub>&#x202F;=&#x202F;baseline BMI and Y<sub>2</sub>&#x202F;=&#x202F;follow-up BMI), and a simple linear model was fitted: (&#x0394;&#x202F;=&#x202F;<italic>&#x03B1;</italic>&#x202F;+&#x202F;<italic>&#xA7B5;</italic>Y<sub>1</sub>&#x202F;+&#x202F;<italic>&#x03B5;</italic>). A negative <italic>&#x03B2;</italic> indicates that higher baseline values are associated with greater negative change (shrinkage toward the mean). The point estimate of <italic>&#x03B2;</italic>, its 95% confidence interval (CI), and the <italic>p</italic>-value were reported. R<sup>2</sup> quantified the proportion of variance in &#x0394; attributable to RTM, with a 95% CI obtained via nonparametric bootstrap.</p>
</sec>
<sec id="sec7">
<label>3.4</label>
<title>Data preprocessing</title>
<p>This study applied the same data preprocessing to all algorithms to keep an identical feature space. Z-score standardization was applied to numerical features, ordered categorical variables were encoded with prespecified ordinal levels, and unordered categorical variables were represented using one-hot encoding. All preprocessing components were fit on training data (or training folds) only and applied to validation/test sets to prevent leakage.</p>
</sec>
<sec id="sec8">
<label>3.5</label>
<title>Model construction</title>
<p>In the development of our prediction model, we adhered to the TRIPOD+AI checklist, and the completed checklist is provided as <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 1</xref>.</p>
<p>To establish a BMI prediction model, the following regression algorithms were utilized, including CatBoost (CB), LightGBM (LGBM), Neural Network (MLP), Decision Tree (DT), Support Vector Regressor (SVR), and K-Nearest Neighbors (KNN). These models represent a range of learning paradigms, allowing for a comprehensive comparison across different modeling strategies. Among them, tree-based models such as CB and LGBM are especially well-suited for handling high-dimensional, noisy, and heterogeneous health data. First, the dataset was split into training (80%) and independent test (20%) sets, with the test set held out throughout model development and tuning. On the 80% training set, all algorithms underwent 5&#x202F;&#x00D7;&#x202F;5 nested cross-validation (CV), with the inner loop performing feature selection and hyperparameter tuning and the outer loop providing unbiased performance estimates (<xref ref-type="bibr" rid="ref32">32</xref>). Within each inner loop, predictors were prescreened on the inner-training folds using the univariable analyses (Pearson correlation for continuous variables and analysis of covariance (ANCOVA) for categorical variables; <italic>p</italic>&#x202F;&#x003C;&#x202F;0.05), followed by five-fold CV with grid search to select the hyperparameter set with the best mean validation score. Features selected across the five inner folds were aggregated by selection frequency to form that outer fold&#x2019;s consensus feature set (<xref ref-type="bibr" rid="ref33">33</xref>). In the outer loop, models were retrained on the outer-training folds using the consensus features and inner-optimal hyperparameters, and then evaluated on the outer test folds for unbiased assessment. Model performance was assessed based on four widely used indicators: root mean squared error (RMSE), mean squared error (MSE), mean absolute error (MAE) and coefficient of determination (R<sup>2</sup>). MSE reflects the average squared difference between predicted and actual values, capturing overall model fit. RMSE, the square root of MSE, reports the typical prediction error in the outcome&#x2019;s original units. MAE provides a direct measure of the average prediction error. R<sup>2</sup> quantifies the proportion of variance in the outcome that is explained by the model, indicating its explanatory power. After completing all outer folds, performance on the outer test folds was summarized as mean &#x00B1; SD. The model with the lowest RMSE/MSE/MAE and highest R<sup>2</sup> was deemed optimal. All outer-fold feature sets were then combined using the same frequency rule to obtain the final feature set. Final hyperparameters were selected via five-fold CV with grid search, then the model was retrained on the entire 80% training set. Complete model-tuning details (hyperparameter search grids, CV folds, seeds, early-stopping settings) are provided in <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 2</xref>.</p>
</sec>
<sec id="sec9">
<label>3.6</label>
<title>Heteroscedasticity investigation and model calibration</title>
<p>Heteroscedasticity was assessed on the 80% training set using out-of-fold residuals via a Breusch&#x2013;Pagan test (<italic>&#x03B1;</italic>&#x202F;=&#x202F;0.05) to examine whether prediction errors varied with baseline BMI (<xref ref-type="bibr" rid="ref34">34</xref>). Results were reported without calibration when the test was not significant, and weighted least-squares (WLS) calibration was applied otherwise (<xref ref-type="bibr" rid="ref35">35</xref>, <xref ref-type="bibr" rid="ref36">36</xref>). Residuals were computed from training out-of-fold predictions and used to fit an empirical variance model to derive sample weights. Using these weights, a linear recalibration of observed versus predicted values was fit on the training data to obtain fixed intercept and slope. The learned weighting function and coefficients were then applied once to the test set without any refitting, avoiding information leakage.</p>
</sec>
<sec id="sec10">
<label>3.7</label>
<title>Integrated evaluation of model performance</title>
<p>Overall model performance was compared on the 20% independent test set. First, generalization was assessed with 1,000 bootstrap resamples, reporting RMSE, MSE, MAE, and R<sup>2</sup> with 95% confidence intervals (CIs) for each model. Paired tests were conducted by bootstrapping the paired differences in RMSE using identical resamples across models (defined as comparator minus best model). This quantified the incremental benefit of the best-performing model, reporting &#x0394;RMSE with its 95% CIs. Second, for the best-performing model, incremental benefit over a trivial baseline (predicting follow-up BMI&#x202F;=&#x202F;baseline BMI) was quantified on the same independent test set using paired bootstrap with 1,000 resamples. Paired differences were defined as &#x0394;RMSE/&#x0394;MSE/&#x0394;MAE&#x202F;=&#x202F;baseline &#x2212; best and &#x0394;R<sup>2</sup>&#x202F;=&#x202F;best &#x2212; baseline, and &#x0394; values were reported with 95% CIs. Third, to assess robustness to a dominant predictor, a sensitivity analysis excluding baseline BMI was performed. Under an identical modeling pipeline to the primary analysis, we retrained and evaluated the model without baseline BMI using the best-performing algorithmic framework, and reported RMSE, MSE, MAE, and R<sup>2</sup> on the same independent test set.</p>
<p>Stratified performance was evaluated across clinically relevant subgroups, including gender (male vs. female), age groups (14&#x2013;15 vs. 16&#x2013;17&#x202F;years), and baseline BMI category (normal vs. overweight/obesity). For each subgroup, we reported RMSE, MSE, MAE, and R<sup>2</sup> with 95% CIs. We also computed and reported between-group differences (&#x0394;) in each metric with 95% CIs. Subgroup heterogeneity was assessed using permutation tests, and the significance level was set at 0.05.</p>
<p>Using the same independent test set, overall and stratified error analyses were performed for the calibrated best-performing model. For overall error visualization, we generated predicted-versus-observed scatterplots with a smoothing line and Bland&#x2013;Altman plot. To characterize the error distribution, we calculated mean error &#x00B1; SD, mean absolute error (MAD), the interquartile range of |error| (IQR|e|), and the 90th/95th percentiles of |error| (P90|e|/P95|e|), and reported these metrics both overall and stratified by gender, age group, and baseline BMI category.</p>
</sec>
<sec id="sec11">
<label>3.8</label>
<title>Model interpretation</title>
<p>This study applied SHAP to provide both global and local interpretation of the best-performing model. We quantified the contribution of each feature to model predictions by examining feature interactions, and decomposed individual predictions into additive feature contributions, using visualizations to convey overall patterns and individual differences. Non-modifiable features were excluded from SHAP visualizations. For global interpretation, we generated a SHAP summary plot based on mean absolute SHAP values across all samples, ranking modifiable predictors for BMI. In addition, we computed SHAP interaction values and displayed a heatmap of their mean strength across samples. SHAP dependence plots were used to examine the overall effect shapes of specific features across the cohort and to highlight potential interactions. For local interpretation, SHAP waterfall plots were generated to break each individual&#x2019;s predicted BMI into the model&#x2019;s base value (the average prediction) plus the additive contributions of features, thereby visualizing how specific factors influence the prediction at the individual level.</p>
</sec>
<sec id="sec12">
<label>3.9</label>
<title>Reproducibility statement</title>
<p>Analyses were run in Python 3.11.3 (scikit-learn 1.5.2, CatBoost 1.2.7, LightGBM 4.5.0, SHAP 0.46.0). Randomness was controlled by fixing seeds for dataset splitting, outer/inner nested CV splitters, final 5-fold CV, algorithm random seeds. All seed values are listed in <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 2</xref>. Development was performed in PyCharm 2023.1.3.</p>
</sec>
</sec>
<sec sec-type="results" id="sec13">
<label>4</label>
<title>Results</title>
<sec id="sec14">
<label>4.1</label>
<title>Study characteristics</title>
<p>At baseline, a total of 2,006 students aged 14&#x2013;17 were enrolled in the study. During the one-year follow-up period, 98 students were excluded due to incomplete baseline anthropometrics data, 21 students withdrew due to lack of interest or parental refusal, 45 students were excluded due to incomplete follow-up measurements, and 15 students experienced health complications that hindered their continued participation. Consequently, 1,827 students (1,009 males and 818 females) with complete data at both time points were included in the final analysis. Notably, the electronic questionnaire system ensured all items were completed, guaranteeing no missing values in baseline data. Anthropometric measurements were 100% complete in the final cohort. The participant eligibility, follow-up, and analysis process is illustrated in <xref ref-type="fig" rid="fig1">Figure 1</xref>. At baseline, the mean BMI was 21.18&#x202F;&#x00B1;&#x202F;3.63&#x202F;kg/m<sup>2</sup>, and after 1 year, it increased to 21.54&#x202F;&#x00B1;&#x202F;3.59&#x202F;kg/m<sup>2</sup>. The mean &#x0394;BMI over the one-year period, calculated as the individual-level difference between follow-up and baseline BMI, was 0.36&#x202F;&#x00B1;&#x202F;1.40&#x202F;kg/m<sup>2</sup>, reflecting a slight upward trend within the cohort. The power analysis confirmed that the final sample size was sufficient to detect medium-sized effects with adequate statistical power, supporting the validity of the univariate analyses. In addition, the school-level ICC for baseline BMI was approximately zero (95% CI: 0.000&#x2013;0.0037; <italic>p</italic>&#x202F;&#x003E;&#x202F;0.05), indicating negligible clustering. Baseline characteristics are presented in <xref ref-type="table" rid="tab1">Table 1</xref>, comparing included and excluded participants. All |SMD| values were &#x003C;0.10, indicating good balance of baseline characteristics with negligible differences. The overall loss to follow-up was 4.2%. In prespecified subgroup analyses of attrition rates and RDs versus the reference level, differences were small and imprecise. Age: 14&#x2013;15&#x202F;years 4.2% (reference) vs. 16&#x2013;17&#x202F;years 4.4%, RD&#x202F;=&#x202F;+0.3 percentage points (95% CI: &#x2212;1.7&#x2013;2.2). Gender: female 4.0% (reference) vs. male 4.5%, RD&#x202F;=&#x202F;+0.5 percentage points (95% CI: &#x2212;1.3&#x2013;2.3). Baseline BMI category: non-overweight/obese 4.1% (reference) vs. overweight/obese 4.7%, RD&#x202F;=&#x202F;+0.6 percentage points (95% CI: &#x2212;1.5&#x2013;2.7). Collectively, subgroup RDs were close to zero and all 95% CIs included zero, providing no evidence of differential attrition. Furthermore, the RTM analysis yielded <italic>&#x03B2;</italic>&#x202F;=&#x202F;&#x2212;0.084 (95% CI: &#x2212;0.122 to &#x2212;0.047; <italic>p</italic>&#x202F;&#x003C;&#x202F;0.01), indicating evidence of regression to the mean. The coefficient of determination was R<sup>2</sup>&#x202F;=&#x202F;0.051 (95% CI: 0.008&#x2013;0.119), implying that baseline BMI accounts for about 5% (95% CI: 0.8&#x2013;11.9%) of the variance in the observed change.</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>CONSORT flow diagram.</p>
</caption>
<graphic xlink:href="fpubh-13-1657551-g001.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Flowchart depicting a study's participant selection process. Initially, 2,006 were assessed for eligibility; 98 were excluded due to incomplete baseline data. Of the 1,908 eligible, 36 were lost to follow-up due to lack of interest or health issues. Finally, 1,827 were analyzed after excluding 45 for incomplete follow-up data.</alt-text>
</graphic>
</fig>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Baseline characteristics of included and excluded participants.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Baseline characteristic</th>
<th align="center" valign="top">Included participants (<italic>n</italic>&#x202F;=&#x202F;1,827)</th>
<th align="center" valign="top">Excluded participants (<italic>n</italic>&#x202F;=&#x202F;81)</th>
<th align="center" valign="top">SMD</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Baseline BMI</td>
<td align="center" valign="middle">21.18&#x202F;&#x00B1;&#x202F;3.63</td>
<td align="center" valign="middle">21.14&#x202F;&#x00B1;&#x202F;2.85</td>
<td align="char" valign="middle" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="middle">Baseline BMR</td>
<td align="center" valign="middle">1,601.24&#x202F;&#x00B1;&#x202F;223.39</td>
<td align="center" valign="middle">1621.06&#x202F;&#x00B1;&#x202F;250.58</td>
<td align="char" valign="middle" char=".">0.08</td>
</tr>
<tr>
<td align="left" valign="middle">Age</td>
<td align="center" valign="middle">15.19&#x202F;&#x00B1;&#x202F;1.12</td>
<td align="center" valign="middle">15.21&#x202F;&#x00B1;&#x202F;1.10</td>
<td align="char" valign="middle" char=".">0.02</td>
</tr>
<tr>
<td align="left" valign="middle" colspan="4">Gender</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Male</td>
<td align="center" valign="middle">1,009 (55.2%)</td>
<td align="center" valign="bottom">47 (58.0%)</td>
<td align="char" valign="bottom" char=".">0.06</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Female</td>
<td align="center" valign="middle">818 (44.8%)</td>
<td align="center" valign="bottom">34 (42.0%)</td>
<td align="char" valign="bottom" char=".">0.06</td>
</tr>
<tr>
<td align="left" valign="middle" colspan="4">Paternal overweight/obesity status</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Yes</td>
<td align="center" valign="middle">713 (39.0%)</td>
<td align="center" valign="bottom">32 (39.5%)</td>
<td align="char" valign="bottom" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;No</td>
<td align="center" valign="middle">1,114 (61.0%)</td>
<td align="center" valign="bottom">49 (60.5%)</td>
<td align="char" valign="bottom" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="middle" colspan="4">Paternal educational level</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Lower</td>
<td align="center" valign="middle">872 (47.7%)</td>
<td align="center" valign="bottom">38 (46.9%)</td>
<td align="char" valign="bottom" char=".">0.02</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Middle</td>
<td align="center" valign="middle">630 (34.5%)</td>
<td align="center" valign="bottom">29 (35.8%)</td>
<td align="char" valign="bottom" char=".">0.03</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Higher</td>
<td align="center" valign="middle">325 (17.8%)</td>
<td align="center" valign="bottom">14 (17.3%)</td>
<td align="char" valign="bottom" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="middle" colspan="4">Paternal occupation</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Civil servants and public institutions</td>
<td align="center" valign="middle">210 (11.5%)</td>
<td align="center" valign="bottom">10 (12.3%)</td>
<td align="char" valign="bottom" char=".">0.03</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Professional technicians</td>
<td align="center" valign="top">195 (10.7%)</td>
<td align="center" valign="bottom">11 (13.6%)</td>
<td align="char" valign="bottom" char=".">0.09</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Business and service industries</td>
<td align="center" valign="top">496 (27.1%)</td>
<td align="center" valign="bottom">25 (30.9%)</td>
<td align="char" valign="bottom" char=".">0.08</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Workers or farmers</td>
<td align="center" valign="top">337 (18.4%)</td>
<td align="center" valign="bottom">12 (14.8%)</td>
<td align="char" valign="bottom" char=".">0.09</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Homemakers or unemployed</td>
<td align="center" valign="top">16 (0.9%)</td>
<td align="center" valign="bottom">1 (1.2%)</td>
<td align="char" valign="bottom" char=".">0.04</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Others</td>
<td align="center" valign="top">573 (31.4%)</td>
<td align="center" valign="bottom">22 (27.2%)</td>
<td align="char" valign="bottom" char=".">0.09</td>
</tr>
<tr>
<td align="left" valign="middle" colspan="4">Maternal overweight/obesity status</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Yes</td>
<td align="center" valign="top">358 (19.6%)</td>
<td align="center" valign="bottom">15 (18.5%)</td>
<td align="char" valign="bottom" char=".">0.03</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;No</td>
<td align="center" valign="top">1,469 (80.4%)</td>
<td align="center" valign="bottom">66 (81.5%)</td>
<td align="char" valign="bottom" char=".">0.03</td>
</tr>
<tr>
<td align="left" valign="middle" colspan="4">Maternal educational level</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Lower</td>
<td align="center" valign="top">966 (52.9%)</td>
<td align="center" valign="bottom">42 (51.9%)</td>
<td align="char" valign="bottom" char=".">0.02</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Middle</td>
<td align="center" valign="top">637 (34.9%)</td>
<td align="center" valign="bottom">27 (33.3%)</td>
<td align="char" valign="bottom" char=".">0.03</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Higher</td>
<td align="center" valign="top">224 (12.3%)</td>
<td align="center" valign="bottom">12 (14.8%)</td>
<td align="char" valign="bottom" char=".">0.08</td>
</tr>
<tr>
<td align="left" valign="middle" colspan="4">Maternal occupation</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Civil servants and public institutions</td>
<td align="center" valign="middle">167 (9.1%)</td>
<td align="center" valign="bottom">7 (8.6%)</td>
<td align="char" valign="bottom" char=".">0.02</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Professional technicians</td>
<td align="center" valign="middle">96 (5.3%)</td>
<td align="center" valign="bottom">5 (6.2%)</td>
<td align="char" valign="bottom" char=".">0.04</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Business and service industries</td>
<td align="center" valign="middle">449 (24.6%)</td>
<td align="center" valign="bottom">20 (24.7%)</td>
<td align="char" valign="bottom" char=".">0.00</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Workers or farmers</td>
<td align="center" valign="middle">248 (13.6%)</td>
<td align="center" valign="bottom">11 (13.6%)</td>
<td align="char" valign="bottom" char=".">0.00</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Homemakers or unemployed</td>
<td align="center" valign="middle">311 (17.0%)</td>
<td align="center" valign="bottom">14 (17.3%)</td>
<td align="char" valign="bottom" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Others</td>
<td align="center" valign="middle">556 (30.4%)</td>
<td align="center" valign="bottom">24 (29.6%)</td>
<td align="char" valign="bottom" char=".">0.02</td>
</tr>
<tr>
<td align="left" valign="middle" colspan="4">Family income</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Lowest</td>
<td align="center" valign="top">514 (28.1%)</td>
<td align="center" valign="bottom">23 (28.4%)</td>
<td align="char" valign="bottom" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Lower middle</td>
<td align="center" valign="top">655 (35.9%)</td>
<td align="center" valign="bottom">28 (34.6%)</td>
<td align="char" valign="bottom" char=".">0.03</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Upper middle</td>
<td align="center" valign="top">469 (25.7%)</td>
<td align="center" valign="bottom">22 (27.2%)</td>
<td align="char" valign="bottom" char=".">0.03</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Highest</td>
<td align="center" valign="top">189 (10.3%)</td>
<td align="center" valign="bottom">8 (9.9%)</td>
<td align="char" valign="bottom" char=".">0.02</td>
</tr>
<tr>
<td align="left" valign="top" colspan="4">Family residence location</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;Center city</td>
<td align="center" valign="top">937 (51.3%)</td>
<td align="center" valign="bottom">42 (51.9%)</td>
<td align="char" valign="bottom" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;Non-center city</td>
<td align="center" valign="top">890 (48.7%)</td>
<td align="center" valign="bottom">39 (48.1%)</td>
<td align="char" valign="bottom" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="middle" colspan="4">On-campus residence</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Yes</td>
<td align="center" valign="middle">1,315 (72.0%)</td>
<td align="center" valign="bottom">58 (71.6%)</td>
<td align="char" valign="bottom" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;No</td>
<td align="center" valign="middle">512 (28.0%)</td>
<td align="center" valign="bottom">23 (28.4%)</td>
<td align="char" valign="bottom" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="middle" colspan="4">Daily sleep duration</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;&#x003C;6&#x202F;h/day</td>
<td align="center" valign="middle">305 (16.7%)</td>
<td align="center" valign="bottom">15 (18.5%)</td>
<td align="char" valign="bottom" char=".">0.05</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;6&#x2013;8&#x202F;h/day</td>
<td align="center" valign="middle">1,299 (71.1%)</td>
<td align="center" valign="bottom">56 (69.1%)</td>
<td align="char" valign="bottom" char=".">0.04</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;&#x003E;8&#x202F;h/day</td>
<td align="center" valign="middle">233 (12.2%)</td>
<td align="center" valign="bottom">10 (12.3%)</td>
<td align="char" valign="bottom" char=".">0.00</td>
</tr>
<tr>
<td align="left" valign="middle" colspan="4">Frequency of staying up late</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Never</td>
<td align="center" valign="middle">370 (20.3%)</td>
<td align="center" valign="bottom">16 (19.8%)</td>
<td align="char" valign="bottom" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Sometimes</td>
<td align="center" valign="middle">839 (45.9%)</td>
<td align="center" valign="bottom">37 (45.7%)</td>
<td align="char" valign="bottom" char=".">0.00</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Often</td>
<td align="center" valign="middle">300 (16.4%)</td>
<td align="center" valign="bottom">14 (17.3%)</td>
<td align="char" valign="bottom" char=".">0.02</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Always</td>
<td align="center" valign="middle">318 (17.4%)</td>
<td align="center" valign="bottom">14 (17.3%)</td>
<td align="char" valign="bottom" char=".">0.00</td>
</tr>
<tr>
<td align="left" valign="middle" colspan="4">Sedentariness duration on weekends</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;3&#x2013;5&#x202F;h/day</td>
<td align="center" valign="middle">355 (19.4%)</td>
<td align="center" valign="bottom">15 (18.5%)</td>
<td align="char" valign="bottom" char=".">0.02</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;5&#x2013;7&#x202F;h/day</td>
<td align="center" valign="middle">420 (23.0%)</td>
<td align="center" valign="bottom">20 (24.7%)</td>
<td align="char" valign="bottom" char=".">0.04</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;7&#x2013;9&#x202F;h/day</td>
<td align="center" valign="middle">748 (40.9%)</td>
<td align="center" valign="bottom">33 (40.7%)</td>
<td align="char" valign="bottom" char=".">0.00</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;&#x003E;9&#x202F;h/day</td>
<td align="center" valign="middle">304 (16.6%)</td>
<td align="center" valign="bottom">13 (16.0%)</td>
<td align="char" valign="bottom" char=".">0.02</td>
</tr>
<tr>
<td align="left" valign="middle" colspan="4">Schoolwork burden</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Minimal</td>
<td align="center" valign="middle">51 (2.8%)</td>
<td align="center" valign="bottom">2 (2.5%)</td>
<td align="char" valign="bottom" char=".">0.02</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Manageable</td>
<td align="center" valign="middle">954 (52.2%)</td>
<td align="center" valign="bottom">43 (53.1%)</td>
<td align="char" valign="bottom" char=".">0.02</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;High</td>
<td align="center" valign="middle">689 (37.7%)</td>
<td align="center" valign="bottom">31 (38.3%)</td>
<td align="char" valign="bottom" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Overwhelming</td>
<td align="center" valign="middle">133 (7.3%)</td>
<td align="center" valign="bottom">5 (6.2%)</td>
<td align="char" valign="bottom" char=".">0.04</td>
</tr>
<tr>
<td align="left" valign="middle" colspan="4">Frequency of high-protein food intake</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Never</td>
<td align="center" valign="middle">210 (11.5%)</td>
<td align="center" valign="bottom">10 (12.3%)</td>
<td align="char" valign="bottom" char=".">0.03</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Sometimes</td>
<td align="center" valign="middle">808 (44.2%)</td>
<td align="center" valign="bottom">36 (44.4%)</td>
<td align="char" valign="bottom" char=".">0.00</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Often</td>
<td align="center" valign="middle">463 (25.3%)</td>
<td align="center" valign="bottom">20 (24.7%)</td>
<td align="char" valign="bottom" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Always</td>
<td align="center" valign="middle">346 (18.9%)</td>
<td align="center" valign="bottom">15 (18.5%)</td>
<td align="char" valign="bottom" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="middle" colspan="4">Frequency of midnight snack intake</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Never</td>
<td align="center" valign="middle">503 (27.5%)</td>
<td align="center" valign="bottom">22 (27.2%)</td>
<td align="char" valign="bottom" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Sometimes</td>
<td align="center" valign="middle">1,056 (57.8%)</td>
<td align="center" valign="bottom">47 (58.0%)</td>
<td align="char" valign="bottom" char=".">0.00</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Often</td>
<td align="center" valign="middle">195 (10.7%)</td>
<td align="center" valign="bottom">9 (11.1%)</td>
<td align="char" valign="bottom" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Always</td>
<td align="center" valign="middle">73 (4.0%)</td>
<td align="center" valign="bottom">3 (3.7%)</td>
<td align="char" valign="bottom" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="middle" colspan="4">Frequency of high-calorie foods intake</td>
</tr>
<tr>
<td align="left" valign="middle">&#x2003;Never</td>
<td align="center" valign="middle">309 (16.9%)</td>
<td align="center" valign="bottom">13 (16.0%)</td>
<td align="char" valign="top" char=".">0.02</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;Sometimes</td>
<td align="center" valign="top">916 (50.1%)</td>
<td align="center" valign="top">41 (50.6%)</td>
<td align="char" valign="top" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;Often</td>
<td align="center" valign="top">458 (25.1%)</td>
<td align="center" valign="top">20 (24.7%)</td>
<td align="char" valign="top" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;Always</td>
<td align="center" valign="top">144 (7.9%)</td>
<td align="center" valign="top">7 (8.6%)</td>
<td align="char" valign="top" char=".">0.03</td>
</tr>
<tr>
<td align="left" valign="top" colspan="4">Frequency of participation in physical activities</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;0 times/week</td>
<td align="center" valign="top">253 (13.8%)</td>
<td align="center" valign="top">13 (16.0%)</td>
<td align="char" valign="top" char=".">0.06</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;1&#x2013;2 times/week</td>
<td align="center" valign="top">543 (29.7%)</td>
<td align="center" valign="top">21 (25.9%)</td>
<td align="char" valign="top" char=".">0.08</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;2&#x2013;3 times/week</td>
<td align="center" valign="top">784 (42.9%)</td>
<td align="center" valign="top">35 (43.2%)</td>
<td align="char" valign="top" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;&#x003E; 3 times/week</td>
<td align="center" valign="top">247 (13.5%)</td>
<td align="center" valign="top">12 (14.8%)</td>
<td align="char" valign="top" char=".">0.04</td>
</tr>
<tr>
<td align="left" valign="top" colspan="4">Post-exercise sensations</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;Relaxed</td>
<td align="center" valign="top">147 (8.0%)</td>
<td align="center" valign="top">8 (9.9%)</td>
<td align="char" valign="top" char=".">0.07</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;Slightly tired</td>
<td align="center" valign="top">1,103 (60.4%)</td>
<td align="center" valign="top">49 (60.5%)</td>
<td align="char" valign="top" char=".">0.00</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;Fairly tired</td>
<td align="center" valign="top">452 (24.7%)</td>
<td align="center" valign="top">19 (23.5%)</td>
<td align="char" valign="top" char=".">0.03</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;Extremely tired</td>
<td align="center" valign="top">125 (6.8%)</td>
<td align="center" valign="top">5 (6.2%)</td>
<td align="char" valign="top" char=".">0.03</td>
</tr>
<tr>
<td align="left" valign="top" colspan="4">Physical activities duration on weekends</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;&#x003C;1&#x202F;h/day</td>
<td align="center" valign="top">1,130 (61.9%)</td>
<td align="center" valign="top">49 (60.5%)</td>
<td align="char" valign="top" char=".">0.03</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;1&#x2013;2&#x202F;h/day</td>
<td align="center" valign="top">517 (28.3%)</td>
<td align="center" valign="top">24 (29.6%)</td>
<td align="char" valign="top" char=".">0.03</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;2&#x2013;3&#x202F;h/day</td>
<td align="center" valign="top">101 (5.5%)</td>
<td align="center" valign="top">4 (4.9%)</td>
<td align="char" valign="top" char=".">0.03</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;&#x003E;3&#x202F;h/day</td>
<td align="center" valign="top">79 (4.3%)</td>
<td align="center" valign="top">4 (4.9%)</td>
<td align="char" valign="top" char=".">0.03</td>
</tr>
<tr>
<td align="left" valign="top" colspan="4">Participation in professional sports training</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;Yes</td>
<td align="center" valign="top">207 (11.3%)</td>
<td align="center" valign="top">11 (13.6%)</td>
<td align="char" valign="top" char=".">0.07</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;No</td>
<td align="center" valign="top">1,620 (88.7%)</td>
<td align="center" valign="top">70 (86.4%)</td>
<td align="char" valign="top" char=".">0.07</td>
</tr>
<tr>
<td align="left" valign="top" colspan="4">Parental support for sports involvement</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;Low support</td>
<td align="center" valign="top">100 (5.5%)</td>
<td align="center" valign="top">5 (6.2%)</td>
<td align="char" valign="top" char=".">0.03</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;Moderate support</td>
<td align="center" valign="top">1,088 (59.6%)</td>
<td align="center" valign="top">48 (59.3%)</td>
<td align="char" valign="top" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;High support</td>
<td align="center" valign="top">639 (35.0%)</td>
<td align="center" valign="top">28 (34.6%)</td>
<td align="char" valign="top" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="top" colspan="4">Recognize self-weight status correctly</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;Yes</td>
<td align="center" valign="top">1,454 (79.6%)</td>
<td align="center" valign="top">65 (80.2%)</td>
<td align="char" valign="top" char=".">0.02</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;No</td>
<td align="center" valign="top">373 (20.4%)</td>
<td align="center" valign="top">16 (19.8%)</td>
<td align="char" valign="top" char=".">0.02</td>
</tr>
<tr>
<td align="left" valign="top" colspan="4">Satisfaction with body size</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;Yes</td>
<td align="center" valign="top">770 (42.1%)</td>
<td align="center" valign="top">35 (43.2%)</td>
<td align="char" valign="top" char=".">0.02</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;No</td>
<td align="center" valign="top">1,057 (57.9%)</td>
<td align="center" valign="top">46 (56.8%)</td>
<td align="char" valign="top" char=".">0.02</td>
</tr>
<tr>
<td align="left" valign="top" colspan="4">Considered changing body size</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;Yes</td>
<td align="center" valign="top">1,184 (64.8%)</td>
<td align="center" valign="top">54 (66.7%)</td>
<td align="char" valign="top" char=".">0.04</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;No</td>
<td align="center" valign="top">643 (35.2%)</td>
<td align="center" valign="top">27 (33.3%)</td>
<td align="char" valign="top" char=".">0.04</td>
</tr>
<tr>
<td align="left" valign="top" colspan="4">Level of health literacy</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;Lowest</td>
<td align="center" valign="top">345 (18.9%)</td>
<td align="center" valign="top">15 (18.5%)</td>
<td align="char" valign="top" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;Lower middle</td>
<td align="center" valign="top">408 (22.3%)</td>
<td align="center" valign="top">18 (22.2%)</td>
<td align="char" valign="top" char=".">0.00</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;Upper middle</td>
<td align="center" valign="top">480 (26.3%)</td>
<td align="center" valign="top">21 (25.9%)</td>
<td align="char" valign="top" char=".">0.01</td>
</tr>
<tr>
<td align="left" valign="top">&#x2003;Highest</td>
<td align="center" valign="top">594 (32.5%)</td>
<td align="center" valign="top">27 (33.3%)</td>
<td align="char" valign="top" char=".">0.02</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Values are mean &#x00B1; SD for continuous variables and n (%) for categorical variables; SMD are reported as absolute value (|SMD|).</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="sec15">
<label>4.2</label>
<title>Integrated overall model performance</title>
<p>Under nested cross-validation, the CB model performed the best, achieving the lowest RMSE, MSE, and MAE, and the highest R<sup>2</sup>, as shown in <xref ref-type="table" rid="tab2">Table 2</xref>. The final hyperparameters selected for all models are reported in <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 2</xref>. The final CB model included the following predictors: baseline BMI, baseline BMR, level of health literacy, recognize self-weight status correctly, sedentariness duration on weekends, participation in professional sports training, frequency of staying up late, daily sleep duration, frequency of high-calorie food intake, physical activities duration on weekends, post-exercise sensations, satisfaction with body size, family residence location, and on-campus residence. <xref ref-type="table" rid="tab3">Table 3</xref> summarizes the performance of all final models on the independent test set, reporting RMSE, MSE, MAE, and R<sup>2</sup> with 95% CIs. The CB model demonstrated the best generalization. In Addition, paired bootstrap RMSE difference (comparator&#x2014;CB) for MLP 0.080 (95% CI: 0.012&#x2013;0.151), LGBM 0.124 (95% CI: 0.065&#x2013;0.185), SVR 0.076 (95% CI: 0.011&#x2013;0.142), KNN 0.453 (95% CI: 0.340&#x2013;0.578), and DT 0.171 (95% CI: 0.074&#x2013;0.268), with all intervals strictly positive, confirming lower RMSE for CB and its significant superiority over the comparator models.</p>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>The performance of each algorithm in terms of RMSE, MSE, MAE, and R<sup>2</sup> on nested cross-validation.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Model</th>
<th align="center" valign="top">RMSE (mean &#x00B1; SD)</th>
<th align="center" valign="top">MSE (mean &#x00B1; SD)</th>
<th align="center" valign="top">MAE (mean &#x00B1; SD)</th>
<th align="center" valign="top">R<sup>2</sup> (mean &#x00B1; SD)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">CatBoost</td>
<td align="char" valign="middle" char="&#x00B1;">1.204 <bold>&#x00B1;</bold> 0.063</td>
<td align="char" valign="middle" char="&#x00B1;">1.453 <bold>&#x00B1;</bold> 0.152</td>
<td align="char" valign="middle" char="&#x00B1;">0.900 <bold>&#x00B1;</bold> 0.043</td>
<td align="char" valign="middle" char="&#x00B1;">0.882 <bold>&#x00B1;</bold> 0.013</td>
</tr>
<tr>
<td align="left" valign="middle">LightGBM</td>
<td align="char" valign="top" char="&#x00B1;">1.288 <bold>&#x00B1;</bold> 0.034</td>
<td align="char" valign="top" char="&#x00B1;">1.659 <bold>&#x00B1;</bold> 0.087</td>
<td align="char" valign="top" char="&#x00B1;">0.969 <bold>&#x00B1;</bold> 0.029</td>
<td align="char" valign="top" char="&#x00B1;">0.864 <bold>&#x00B1;</bold> 0.018</td>
</tr>
<tr>
<td align="left" valign="middle">Neural Network</td>
<td align="char" valign="top" char="&#x00B1;">1.233 <bold>&#x00B1;</bold> 0.080</td>
<td align="char" valign="top" char="&#x00B1;">1.526 <bold>&#x00B1;</bold> 0.192</td>
<td align="char" valign="top" char="&#x00B1;">0.935 <bold>&#x00B1;</bold> 0.059</td>
<td align="char" valign="top" char="&#x00B1;">0.875 <bold>&#x00B1;</bold> 0.020</td>
</tr>
<tr>
<td align="left" valign="middle">Decision Tree</td>
<td align="char" valign="top" char="&#x00B1;">1.423 <bold>&#x00B1;</bold> 0.091</td>
<td align="char" valign="top" char="&#x00B1;">2.032 <bold>&#x00B1;</bold> 0.258</td>
<td align="char" valign="top" char="&#x00B1;">1.034 <bold>&#x00B1;</bold> 0.047</td>
<td align="char" valign="top" char="&#x00B1;">0.833 <bold>&#x00B1;</bold> 0.035</td>
</tr>
<tr>
<td align="left" valign="middle">Support Vector Regressor</td>
<td align="char" valign="top" char="&#x00B1;">1.247 <bold>&#x00B1;</bold> 0.059</td>
<td align="char" valign="top" char="&#x00B1;">1.558 <bold>&#x00B1;</bold> 0.145</td>
<td align="char" valign="top" char="&#x00B1;">0.918 <bold>&#x00B1;</bold> 0.041</td>
<td align="char" valign="top" char="&#x00B1;">0.873 <bold>&#x00B1;</bold> 0.014</td>
</tr>
<tr>
<td align="left" valign="middle">K-Nearest Neighbors</td>
<td align="char" valign="top" char="&#x00B1;">1.617 <bold>&#x00B1;</bold> 0.052</td>
<td align="char" valign="top" char="&#x00B1;">2.618 <bold>&#x00B1;</bold> 0.167</td>
<td align="char" valign="top" char="&#x00B1;">1.251 <bold>&#x00B1;</bold> 0.044</td>
<td align="char" valign="top" char="&#x00B1;">0.787 <bold>&#x00B1;</bold> 0.022</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>RMSE, root mean squared error; MSE, mean squared Error; MAE, mean absolute error; R<sup>2</sup>, coefficient of determination; CI, confidence interval.</p>
</table-wrap-foot>
</table-wrap>
<table-wrap position="float" id="tab3">
<label>Table 3</label>
<caption>
<p>The performance of each algorithm in terms of MSE, RMSE, MAE, and R<sup>2</sup> on the independent test set.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Model</th>
<th align="center" valign="top">RMSE (95% CI)</th>
<th align="center" valign="top">MSE (95% CI)</th>
<th align="center" valign="top">MAE (95% CI)</th>
<th align="center" valign="top">R<sup>2</sup> (95% CI)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">CatBoost</td>
<td align="char" valign="middle" char="(">1.212 (1.113&#x2013;1.322)</td>
<td align="char" valign="middle" char="(">1.468 (1.238&#x2013;1.748)</td>
<td align="char" valign="middle" char="(">0.897 (0.820&#x2013;0.987)</td>
<td align="char" valign="middle" char="(">0.900 (0.878&#x2013;0.917)</td>
</tr>
<tr>
<td align="left" valign="middle">LightGBM</td>
<td align="char" valign="top" char="(">1.336 (1.225&#x2013;1.451)</td>
<td align="char" valign="top" char="(">1.784 (1.501&#x2013;2.107)</td>
<td align="char" valign="top" char="(">0.990 (0.902&#x2013;1.085)</td>
<td align="char" valign="top" char="(">0.879 (0.852&#x2013;0.899)</td>
</tr>
<tr>
<td align="left" valign="middle">Neural Network</td>
<td align="char" valign="top" char="(">1.293 (1.174&#x2013;1.413)</td>
<td align="char" valign="top" char="(">1.671 (1.377&#x2013;1.995)</td>
<td align="char" valign="top" char="(">0.965 (0.879&#x2013;1.051)</td>
<td align="char" valign="top" char="(">0.886 (0.862&#x2013;0.907)</td>
</tr>
<tr>
<td align="left" valign="middle">Decision Tree</td>
<td align="char" valign="top" char="(">1.381 (1.256&#x2013;1.516)</td>
<td align="char" valign="top" char="(">1.908 (1.576&#x2013;2.299)</td>
<td align="char" valign="top" char="(">0.981 (0.890&#x2013;1.083)</td>
<td align="char" valign="top" char="(">0.870 (0.842&#x2013;0.893)</td>
</tr>
<tr>
<td align="left" valign="middle">Support Vector Regressor</td>
<td align="char" valign="top" char="(">1.288 (1.165&#x2013;1.419)</td>
<td align="char" valign="top" char="(">1.658 (1.357&#x2013;2.014)</td>
<td align="char" valign="top" char="(">0.935 (0.849&#x2013;1.030)</td>
<td align="char" valign="top" char="(">0.887 (0.861&#x2013;0.909)</td>
</tr>
<tr>
<td align="left" valign="middle">K-Nearest Neighbors</td>
<td align="char" valign="top" char="(">1.667 (1.513&#x2013;1.820)</td>
<td align="char" valign="top" char="(">2.778 (2.288&#x2013;3.311)</td>
<td align="char" valign="top" char="(">1.274 (1.165&#x2013;1.385)</td>
<td align="char" valign="top" char="(">0.811 (0.780&#x2013;0.839)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>RMSE, root mean squared error; MSE, mean squared Error; MAE, mean absolute error; R<sup>2</sup>, coefficient of determination; CI, confidence interval.</p>
</table-wrap-foot>
</table-wrap>
<p>The Breusch&#x2013;Pagan test on training out-of-fold residuals indicated heteroscedasticity of errors with baseline BMI (<italic>p</italic>&#x202F;&#x003C;&#x202F;0.05), so we applied WLS calibration learned on the training data and then applied to the test set without refitting. After calibration, test-set performance was: RMSE 1.200 (95% CI: 1.101&#x2013;1.303), MSE 1.440 (95% CI: 1.211&#x2013;1.697), MAE 0.895 (95% CI: 0.818&#x2013;0.981) and R<sup>2</sup> 0.902 (95% CI: 0.882&#x2013;0.918).</p>
<p>As a trivial baseline model (predicting follow-up BMI equals baseline BMI), performance on the independent test set was MSE 2.212 (95% CI: 1.766&#x2013;2.767), RMSE 1.487 (95% CI: 1.329&#x2013;1.663), MAE 1.065 (95% CI: 0.966&#x2013;1.181) and R<sup>2</sup> 0.850 (95% CI: 0.808&#x2013;0.883). Using bootstrap of paired differences, the incremental performance benefit of the CB-based model over the trivial baseline model was: &#x0394;RMSE (baseline &#x2212; CB) 0.276 (95% CI: 0.159&#x2013;0.389), &#x0394;MSE 0.752 (95% CI: 0.415&#x2013;1.115), &#x0394;MAE 0.169 (95% CI: 0.093&#x2013;0.242) and &#x0394;R<sup>2</sup> (CB&#x202F;&#x2212;&#x202F;baseline) 0.051 (95% CI: 0.027&#x2013;0.078). All intervals exclude zero, indicating that the CB model provides a statistically significant improvement over the baseline.</p>
<p>After excluding baseline BMI, the CB model achieved RMSE 2.497 (95%: CI 2.270&#x2013;2.728), MSE 6.233 (95% CI: 5.154&#x2013;7.440), MAE 1.877 (95% CI: 1.709&#x2013;2.045), and R<sup>2</sup> 0.576 (95% CI: 0.502&#x2013;0.644) on the independent test set. Compared with the primary model, performance declined, suggesting that baseline BMI is likely a dominant predictor.</p>
</sec>
<sec id="sec16">
<label>4.3</label>
<title>Stratified performance and error distribution</title>
<p>In subgroup performance evaluation, <xref ref-type="table" rid="tab4">Table 4</xref> presents the stratified metrics (RMSE, MSE, MAE, R<sup>2</sup>) of the calibrated CB model, with paired-bootstrap between-group differences (&#x0394; with 95% CIs). In permutation tests, <italic>p</italic>-values for all performance metrics by gender and by age group were &#x003E;0.05. For baseline BMI categories, <italic>p</italic> &#x003C; 0.05. These results indicate that performance differences across gender and age groups were small, with no statistically detectable heterogeneity. Statistically significant differences were observed across baseline BMI categories.</p>
<table-wrap position="float" id="tab4">
<label>Table 4</label>
<caption>
<p>Stratified model performance and between-group differences.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Feature</th>
<th align="left" valign="top">Group</th>
<th align="center" valign="top">RMSE (95% CI)</th>
<th align="center" valign="top">MSE (95% CI)</th>
<th align="center" valign="top">MAE (95% CI)</th>
<th align="center" valign="top">R<sup>2</sup>(95% CI)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle" rowspan="3">Gender</td>
<td align="left" valign="middle">Male</td>
<td align="char" valign="middle" char="(">1.197 (1.065&#x2013;1.332)</td>
<td align="char" valign="middle" char="(">1.433 (1.135&#x2013;1.775)</td>
<td align="char" valign="middle" char="(">0.898 (0.794&#x2013;1.015)</td>
<td align="char" valign="middle" char="(">0.903 (0.876&#x2013;0.925)</td>
</tr>
<tr>
<td align="left" valign="middle">Female</td>
<td align="char" valign="middle" char="(">1.217 (1.050&#x2013;1.380)</td>
<td align="char" valign="middle" char="(">1.480 (1.102&#x2013;1.906)</td>
<td align="char" valign="middle" char="(">0.895 (0.771&#x2013;1.027)</td>
<td align="char" valign="middle" char="(">0.899 (0.866&#x2013;0.923)</td>
</tr>
<tr>
<td align="left" valign="middle">&#x0394; (female&#x2013;male)</td>
<td align="char" valign="middle" char="(">0.088 (0.004&#x2013;0.239)</td>
<td align="char" valign="middle" char="(">0.212 (0.009&#x2013;0.601)</td>
<td align="char" valign="middle" char="(">0.067 (0.002&#x2013;0.189)</td>
<td align="char" valign="middle" char="(">0.015 (0.001&#x2013;0.044)</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="3">Age</td>
<td align="left" valign="middle">14&#x2013;15 y</td>
<td align="char" valign="middle" char="(">1.226 (1.098&#x2013;1.349)</td>
<td align="char" valign="middle" char="(">1.502 (1.206&#x2013;1.819)</td>
<td align="char" valign="middle" char="(">0.918 (0.818&#x2013;1.018)</td>
<td align="char" valign="middle" char="(">0.899 (0.875&#x2013;0.918)</td>
</tr>
<tr>
<td align="left" valign="middle">16&#x2013;17 y</td>
<td align="char" valign="middle" char="(">1.169 (0.982&#x2013;1.343)</td>
<td align="char" valign="middle" char="(">1.366 (0.965&#x2013;1.805)</td>
<td align="char" valign="middle" char="(">0.860 (0.728&#x2013;1.008)</td>
<td align="char" valign="middle" char="(">0.904 (0.863&#x2013;0.931)</td>
</tr>
<tr>
<td align="left" valign="middle">&#x0394; (16&#x2013;17&#x2212;14&#x2013;15)</td>
<td align="char" valign="middle" char="(">0.102 (0.003&#x2013;0.273)</td>
<td align="char" valign="middle" char="(">0.243 (0.008&#x2013;0.653)</td>
<td align="char" valign="middle" char="(">0.085 (0.004&#x2013;0.226)</td>
<td align="char" valign="middle" char="(">0.016 (0.001&#x2013;0.044)</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="3">Baseline BMI</td>
<td align="left" valign="middle">Normal</td>
<td align="char" valign="middle" char="(">0.979 (0.878&#x2013;1.079)</td>
<td align="char" valign="middle" char="(">0.959 (0.770&#x2013;1.165)</td>
<td align="char" valign="middle" char="(">0.734 (0.661&#x2013;0.810)</td>
<td align="char" valign="middle" char="(">0.806 (0.748&#x2013;0.847)</td>
</tr>
<tr>
<td align="left" valign="middle">Overweight/obesity</td>
<td align="char" valign="middle" char="(">1.658 (1.460&#x2013;1.862)</td>
<td align="char" valign="middle" char="(">2.748 (2.130&#x2013;3.469)</td>
<td align="char" valign="middle" char="(">1.324 (1.147&#x2013;1.524)</td>
<td align="char" valign="middle" char="(">0.670 (0.550&#x2013;0.739)</td>
</tr>
<tr>
<td align="left" valign="middle">&#x0394; (OW/OB&#x202F;&#x2212;&#x202F;Normal)</td>
<td align="char" valign="middle" char="(">0.682 (0.473&#x2013;0.895)</td>
<td align="char" valign="middle" char="(">1.811 (1.171&#x2013;2.483)</td>
<td align="char" valign="middle" char="(">0.597 (0.407&#x2013;0.790)</td>
<td align="char" valign="middle" char="(">0.142 (0.044&#x2013;0.251)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>RMSE, root mean squared error; MSE, mean squared Error; MAE, mean absolute error; R<sup>2</sup>, coefficient of determination; CI, confidence interval; &#x0394;, between-group difference.</p>
</table-wrap-foot>
</table-wrap>
<p>In error distribution analysis, the calibrated CB model showed near-zero overall bias, mean error&#x202F;=&#x202F;0.03&#x202F;&#x00B1;&#x202F;1.21, with central and tail dispersion MAD&#x202F;=&#x202F;0.66, IQR|e|&#x202F;=&#x202F;1.05, P90|e|&#x202F;=&#x202F;1.98, P95|e|&#x202F;=&#x202F;2.71. By gender, males (&#x2212;0.03&#x202F;&#x00B1;&#x202F;1.20) and females (0.10&#x202F;&#x00B1;&#x202F;1.22) were similar, indicating no material heterogeneity by gender. By age, 14&#x2013;15&#x202F;years (&#x2212;0.01&#x202F;&#x00B1;&#x202F;1.23) and 16&#x2013;17&#x202F;years (0.08&#x202F;&#x00B1;&#x202F;1.17) were only mildly different, indicating no material heterogeneity by age. By baseline BMI category, errors in the overweight/obesity group (0.24&#x202F;&#x00B1;&#x202F;1.65) were larger and more right-shifted than in the normal group (&#x2212;0.06&#x202F;&#x00B1;&#x202F;0.98).</p>
<p>For overall error visualization, <xref ref-type="fig" rid="fig2">Figure 2</xref> shows the predicted-versus-observed scatter with a smoothing line, indicating overall fit with only slight departures at the extremes of the prediction range. <xref ref-type="fig" rid="fig3">Figure 3</xref> displays the Bland&#x2013;Altman plot with a near-zero mean bias and approximately symmetric limits of agreement (&#x2212;2.33 to 2.38), and the data points show no systematic drift with the mean, suggesting negligible bias.</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>Predicted-versus-observed scatter plot.</p>
</caption>
<graphic xlink:href="fpubh-13-1657551-g002.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Scatter plot showing predicted versus observed test values with data points clustered around a diagonal line, signifying strong correlation. The x-axis is labeled "Predicted" and y-axis "Observed".</alt-text>
</graphic>
</fig>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption>
<p>Bland&#x2013;Altman plot.</p>
</caption>
<graphic xlink:href="fpubh-13-1657551-g003.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Bland-Altman plot showing differences between observed and predicted values against their averages. The mean difference is marked with a red dashed line at 0.03. Limits of agreement are shown with green dashed lines at -2.33 and 2.38. Blue dots represent data points.</alt-text>
</graphic>
</fig>
</sec>
<sec id="sec17">
<label>4.4</label>
<title>Global interpretability</title>
<p>Population-level feature importance was assessed using SHAP global interpretation applied to the CB-based model. To focus on modifiable features for intervention insights, non-modifiable features such as baseline BMI, gender, family residence location, among others, were excluded from SHAP visual analysis. <xref ref-type="fig" rid="fig4">Figure 4</xref> displays the SHAP summary plot, ranking modifiable features by their mean absolute SHAP values, which represent their average contribution to BMI prediction across the entire population. The plot also visualizes the distribution of SHAP values, with feature color indicating feature values (red: high, blue: low). Features with positive SHAP values contribute positively to BMI prediction, while negative values indicate a decreasing effect. The modifiable features ranked in descending order were: level of health literacy, recognize self-weight status correctly, sedentariness duration on weekends, participation in professional sports training, frequency of staying up late, daily sleep duration, frequency of high-calorie foods intake, physical activities duration on weekends.</p>
<fig position="float" id="fig4">
<label>Figure 4</label>
<caption>
<p>SHAP summary plot for feature importance. Each point represents a sample&#x2019;s SHAP value for a feature. Color indicates the value of the feature (red&#x202F;=&#x202F;high, blue&#x202F;=&#x202F;low). Features are ranked by their mean absolute SHAP values, reflecting their overall contribution to BMI prediction.</p>
</caption>
<graphic xlink:href="fpubh-13-1657551-g004.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">SHAP summary plot illustrating SHAP values indicating the impact of various features on model output. Features include level of health literacy, recognition of self-weight status, sedentariness duration on weekends, participation in professional sports training, frequency of staying up late, daily sleep duration, frequency of high-calorie foods intake, and physical activities duration on weekends. Points are colored by feature value, ranging from low (blue) to high (red). Values on the x-axis represent SHAP values from negative to positive, showing influence on model predictions.</alt-text>
</graphic>
</fig>
<p>The SHAP interaction analyses are presented in <xref ref-type="fig" rid="fig5">Figure 5</xref>. The heatmap shows the mean absolute SHAP interaction value for each feature pair. Color intensity encodes interaction strength, with darker/warmer colors indicating stronger interactions and lighter/cooler colors indicating weaker ones. Diagonal cells approximate main effects, while off-diagonal cells reflect pairwise interactions. As shown in <xref ref-type="fig" rid="fig5">Figure 5</xref>, most feature pairs exhibit near-zero interaction values, indicating predominance of main effects and no strong pairwise interactions.</p>
<fig position="float" id="fig5">
<label>Figure 5</label>
<caption>
<p>Heatmap of SHAP interaction values. Color intensity encodes interaction strength, with darker/warmer colors indicating stronger interactions and lighter/cooler colors indicating weaker ones. LHL, Level of health literacy; SDOW, Sedentariness duration on weekends; FSUL, Frequency of staying up late; DSD, Daily sleep duration; RSWSC, Recognize self-weight status correctly; FHCFI, Frequency of high-calorie foods intake; PADOW, Physical activities duration on weekends; PPST, Participation in professional sports training.</p>
</caption>
<graphic xlink:href="fpubh-13-1657551-g005.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Heatmap showing SHAP interaction values between variables PPST, PADOW, FHCFI, RSWSC, FSUL, DSD, SDOW, LHL. Color gradient from blue to red represents increasing interaction values, with red indicating higher interactions.</alt-text>
</graphic>
</fig>
<p><xref ref-type="fig" rid="fig6">Figure 6</xref> shows SHAP dependence plots for the top four features ranked by global importance (mean |SHAP|). The x-axis shows feature values and the y-axis shows SHAP values. Point color indicates the strongest interacting feature, and vertical dispersion reflects potential interactions. Level of health literacy was positive for &#x201C;lowest/lower-middle&#x201D; and negative for &#x201C;upper-middle/highest&#x201D; (<xref ref-type="fig" rid="fig6">Figure 6A</xref>). Recognize self-weight status correctly was negative for &#x201C;Yes&#x201D; and positive for &#x201C;No&#x201D; (<xref ref-type="fig" rid="fig6">Figure 6B</xref>). Sedentariness duration on weekends &#x003E;9&#x202F;h/day showed the largest positive SHAP values, whereas 3&#x2013;9&#x202F;h/day was negative to mildly negative (<xref ref-type="fig" rid="fig6">Figure 6C</xref>). Participation in professional sports training was negative for &#x201C;Yes&#x201D; and near-zero to mildly positive for &#x201C;No&#x201D; (<xref ref-type="fig" rid="fig6">Figure 6D</xref>). Overall, the plots show smooth directional trends with limited vertical spread, indicating predominance of main effects and modest interactions.</p>
<fig position="float" id="fig6">
<label>Figure 6</label>
<caption>
<p>SHAP dependence plots. <bold>(A)</bold> Level of health literacy. <bold>(B)</bold> Recognize self-weight status correctly. <bold>(C)</bold> Sedentariness duration on weekends. <bold>(D)</bold> Participation in professional sports training. The x-axis shows feature values and the y-axis shows SHAP values. Point color indicates the strongest interacting feature, and vertical dispersion reflects potential interactions.</p>
</caption>
<graphic xlink:href="fpubh-13-1657551-g006.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Four SHAP dependence plots labeled A, B, C, and D display how SHAP values vary with feature values. A) Level of health literacy varies with sedentary duration on weekends. B) Recognition of self-weight status is compared with daily sleep duration. C) Sedentariness duration on weekends versus recognition of self-weight status. D) Participation in professional sports training is related to physical activity duration on weekends. The x-axis shows feature values and the y-axis shows SHAP values. Point color indicates the strongest interacting feature, and vertical dispersion reflects potential interactions.</alt-text>
</graphic>
</fig>
</sec>
<sec id="sec18">
<label>4.5</label>
<title>Local interpretability</title>
<p>SHAP values quantify each feature&#x2019;s association with the model&#x2019;s prediction, providing a detailed view of how individual modifiable features relate to the model&#x2019;s predicted BMI values for specific samples. SHAP waterfall plots for two specific samples are shown in <xref ref-type="fig" rid="fig7">Figure 7</xref>. The red bars represent positive contributions, indicating an increase in the predicted outcome, while the blue bars represent negative contributions, indicating a decrease in the predicted outcome. The red bars indicate positive contributions (increasing the predicted BMI) and the blue bars indicate negative contributions (decreasing the predicted BMI). The bar length and its numeric label reflect the effect magnitude in kg/m<sup>2</sup>, and longer bars denote larger increases or decreases in the predicted BMI. As shown in <xref ref-type="fig" rid="fig7">Figure 7A</xref>, the predicted BMI for this individual is 21.00&#x202F;kg/m<sup>2</sup>. Negative contributions dominate and reduce the overall prediction. The main negative features and their numerical contributions to the predicted BMI are: PPST (Participation in professional sports training)&#x202F;=&#x202F;Yes (&#x2212;0.45&#x202F;kg/m<sup>2</sup>), SDOW (Sedentariness duration on weekends)&#x202F;=&#x202F;3&#x2013;5&#x202F;h/day (&#x2212;0.15&#x202F;kg/m<sup>2</sup>), and RSWSC (Recognize self-weight status correctly)&#x202F;=&#x202F;Yes (&#x2212;0.13&#x202F;kg/m<sup>2</sup>). Positive contributions are smaller, including DSD (Daily sleep duration) =&#x202F;&#x003E;&#x202F;8&#x202F;h (+0.26&#x202F;kg/m<sup>2</sup>) and FHCFI (Frequency of high-calorie foods intake)&#x202F;=&#x202F;Always (+0.14&#x202F;kg/m<sup>2</sup>). In <xref ref-type="fig" rid="fig7">Figure 7B</xref>, positive contributions predominate, leading to a higher predicted BMI for this individual.</p>
<fig position="float" id="fig7">
<label>Figure 7</label>
<caption>
<p>SHAP waterfall plots. The influence of features on BMI prediction for two samples: <bold>(A)</bold> shows a sample with a decrease, and <bold>(B)</bold> shows a sample with an increase in BMI. The red bars indicate positive contributions (increasing the predicted BMI) and the blue bars indicate negative contributions (decreasing the predicted BMI). The bar length and its numeric label reflect the effect magnitude in kg/m<sup>2</sup>, and longer bars denote larger increases or decreases in the predicted BMI. LHL, Level of health literacy; SDOW, Sedentariness duration on weekends; FSUL, Frequency of staying up late; DSD, Daily sleep duration; RSWSC, Recognize self-weight status correctly; FHCFI, Frequency of high-calorie foods intake; PADOW, Physical activities duration on weekends; PPST, Participation in professional sports training.</p>
</caption>
<graphic xlink:href="fpubh-13-1657551-g007.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">SHAP waterfall plots labeled A and B compare different categories with their respective values on a horizontal axis. Each category has a corresponding color-coded bar indicating either a positive (red) or negative (blue) influence on a central value. Chart A has a central line marked at 21.5, while Chart B is centered around the same value, with some bars extending to higher values. The values beside the bars indicate specific differences from the central value. Both charts represent interactions between multiple variables and their effects.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec sec-type="discussion" id="sec19">
<label>5</label>
<title>Discussion</title>
<p>This study investigated the factors influencing BMI changes in adolescents and conducted a one-year longitudinal cohort study to examine the changes in BMI over this period. Subsequently, a BMI prediction model was developed and validated using machine learning algorithms. Finally, the SHAP model interpretation technique was employed to explore the impact of modifiable factors on BMI changes in adolescents.</p>
<p>In this study, predictors were prescreened on the inner-training folds using the univariable analyses. The statistically significant associations observed suggest that these factors may play an important role in BMI changes among adolescents. Specifically, we found that among adolescents, poor sleep quality, sedentary behavior, and unhealthy eating habits were associated with higher model-predicted BMI, while regular physical activity helps reduce them. In addition, our study shows that adolescents who correctly recognize their weight status, as well as those dissatisfied with their body shape and willing to change, are more likely to drive BMI changes through active intervention. Therefore, self-perception factors play a crucial role in BMI changes, particularly when adolescents recognize their weight issues, which makes them more likely to take proactive steps to manage their weight. Moreover, greater health literacy was also associated with better weight control by enabling adolescents to make informed dietary and lifestyle choices, leading to more stable BMI changes. These findings are consistent with previous research (<xref ref-type="bibr" rid="ref8">8</xref>, <xref ref-type="bibr" rid="ref9">9</xref>, <xref ref-type="bibr" rid="ref26">26</xref>, <xref ref-type="bibr" rid="ref37">37</xref>, <xref ref-type="bibr" rid="ref38">38</xref>).</p>
<p>The CB regression algorithm demonstrated superior performance among the developed BMI prediction models, exceeding results reported in previous research (<xref ref-type="bibr" rid="ref10">10</xref>, <xref ref-type="bibr" rid="ref12">12</xref>, <xref ref-type="bibr" rid="ref14">14</xref>, <xref ref-type="bibr" rid="ref22">22</xref>, <xref ref-type="bibr" rid="ref39 ref40 ref41">39&#x2013;41</xref>), as summarized in <xref ref-type="table" rid="tab5">Table 5</xref>. Our model&#x2019;s strong performance reflects both algorithmic and methodological choices. Specifically, CB&#x2019;s ordered boosting suppresses target leakage during training and reduces prediction shift. In particular, its symmetric tree structure helps control variance, enhances stability, and mitigates overfitting. Furthermore, CB captures nonlinear relationships effectively under mixed feature types and moderate sample sizes, making it well suited to BMI prediction tasks that involve diverse health data. For model training, this study employed nested CV and achieved strong performance on an independent test set, supporting the model&#x2019;s generalizability. Nested CV separates hyperparameter tuning from performance assessment, reducing optimistic bias and guarding against information leakage. Despite the use of nested cross-validation and evaluation on an independent test set supporting generalizability, real-world deployment still requires external validation across heterogeneous populations. Because sociocultural norms, dietary patterns, and environmental factors may influence participants&#x2019; lifestyle behaviors, body composition, and health awareness, which in turn affect key predictive features of the model, future research will include external validation across more diverse populations to enhance its generalizability. In addition, in the comparison between the CB model and the trivial baseline model, all performance metrics showed significant improvements. The results indicate that the CB model clearly outperforms the baseline model, highlighting the added value of machine learning algorithms in predicting BMI. Moreover, in the sensitivity analysis excluding baseline BMI, all performance metrics of the model declined, indicating that baseline BMI is a key predictor in our model. The importance of baseline BMI provides a foundation for future model improvements.</p>
<table-wrap position="float" id="tab5">
<label>Table 5</label>
<caption>
<p>Comparison of BMI prediction models in previous studies.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Author name</th>
<th align="left" valign="top">Features</th>
<th align="left" valign="top">Method</th>
<th align="left" valign="top">Main model performance</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Park et al. (<xref ref-type="bibr" rid="ref10">10</xref>)</td>
<td align="left" valign="middle">Neuroimaging features</td>
<td align="left" valign="middle">Machine learning approaches</td>
<td align="left" valign="middle">RMSE&#x202F;=&#x202F;1.29</td>
</tr>
<tr>
<td align="left" valign="middle">Yao et al. (<xref ref-type="bibr" rid="ref12">12</xref>)</td>
<td align="left" valign="middle">Smartphone motion sensor data</td>
<td align="left" valign="middle">Hybrid deep neural network</td>
<td align="left" valign="middle">MAEs&#x202F;=&#x202F;2.461&#x202F;&#x00B1;&#x202F;1.000 at MobiAct dataset, 3.137&#x202F;&#x00B1;&#x202F;1.300 at Motion-Sense dataset</td>
</tr>
<tr>
<td align="left" valign="middle">Ali et al. (<xref ref-type="bibr" rid="ref14">14</xref>)</td>
<td align="left" valign="middle">Medication data</td>
<td align="left" valign="middle">Gradient-boosted machine (GBM) learning</td>
<td align="left" valign="middle">RMSE&#x202F;=&#x202F;4.97</td>
</tr>
<tr>
<td align="left" valign="middle">Singh and Tawfik (<xref ref-type="bibr" rid="ref22">22</xref>)</td>
<td align="left" valign="middle">Earlier BMI values</td>
<td align="left" valign="middle">Regression methods, artificial neural network</td>
<td align="left" valign="middle">MAE&#x202F;=&#x202F;1.42</td>
</tr>
<tr>
<td align="left" valign="middle">Harrison et al. (<xref ref-type="bibr" rid="ref39">39</xref>)</td>
<td align="left" valign="middle">Clinical, genetic and expression data</td>
<td align="left" valign="middle">Eleven standard regression methods</td>
<td align="left" valign="middle"><italic>R</italic><sup>2</sup>&#x202F;=&#x202F;0.829, RMSE&#x202F;=&#x202F;2.84 (Rank 1)</td>
</tr>
<tr>
<td align="left" valign="middle">Cheng et al. (<xref ref-type="bibr" rid="ref40">40</xref>)</td>
<td align="left" valign="middle">Early-life EHR data</td>
<td align="left" valign="middle">Support vector regression</td>
<td align="left" valign="middle">MAE&#x202F;=&#x202F;0.96 at 30&#x2013;36&#x202F;months, 0.98 at 36&#x2013;42&#x202F;months, and 1.00 at 42&#x2013;48&#x202F;months</td>
</tr>
<tr>
<td align="left" valign="middle">Delnevo et al. (<xref ref-type="bibr" rid="ref41">41</xref>)</td>
<td align="left" valign="middle">Psychological variables</td>
<td align="left" valign="middle">Eight machine learning algorithms</td>
<td align="left" valign="middle">MAE&#x202F;=&#x202F;5.27&#x2013;5.50</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In this study, we employed SHAP techniques to interpret the BMI prediction model, providing both global and local model explanations. Recent studies have widely applied SHAP in various domains, including medicine (<xref ref-type="bibr" rid="ref42">42</xref>, <xref ref-type="bibr" rid="ref43">43</xref>), materials science (<xref ref-type="bibr" rid="ref44">44</xref>), transportation (<xref ref-type="bibr" rid="ref45">45</xref>), and others. Compared to methods such as Local Interpretable Model-agnostic Explanation (LIME) and Partial Dependence Plot (PDP), which are commonly used in other studies (<xref ref-type="bibr" rid="ref46">46</xref>, <xref ref-type="bibr" rid="ref47">47</xref>), SHAP offers a more unified and comprehensive framework. In our study, SHAP analysis focused exclusively on modifiable features, such as physical activity, diet, and lifestyle habits, providing both global and local explanations for BMI predictions, enhancing model transparency and credibility, and offering insights to inform personalized interventions.</p>
<p>In the global explanation, bee plots were used to visualize the importance of modifiable features and their overall contribution to the model&#x2019;s BMI predictions across the dataset. This population-level interpretation provides actionable insights for public health policy development in school and community settings, as it highlights which modifiable behaviors are most strongly associated with the model&#x2019;s BMI predictions among adolescents. For instance, policies aimed at integrating health literacy education into school curricula may empower adolescents to make healthier lifestyle choices. School- and community-based health literacy campaigns can be implemented to educate adolescents about nutrition, physical activity, and the health consequences of obesity. Weekend community sports programs and family-oriented outdoor activities should be promoted to reduce sedentary behavior. Policy measures may also be considered to reduce excessive academic pressure that may contribute to sleep deprivation. Moreover, population-level feature importance rankings can help policymakers prioritize resource allocation, thereby enhancing the efficiency of public health agencies in implementing adolescent weight-related prevention and intervention programs. Developing interventions targeting top-ranked factors is likely to yield more substantial population-level health benefits than focusing on lower-ranked ones. In addition, interaction heatmaps and dependence plots help guide actions based on model results. Strong interactions call for paired joint interventions, whereas weak interactions with directional dependence curves support targeting single behaviors.</p>
<p>In the local explanation, waterfall plots were generated to decompose each individual prediction into the model&#x2019;s base value and the numeric contribution of each feature in kg/m<sup>2</sup>, providing clinically interpretable effect sizes. For example, the sample shown in <xref ref-type="fig" rid="fig7">Figure 7B</xref> demonstrates a significant risk of BMI increase over the next year if current lifestyle habits are maintained. The SHAP waterfall plot highlights several key risk factors, such as very long weekend sedentary time (SDOW =&#x202F;&#x003E;&#x202F;9&#x202F;h/day, +0.43&#x202F;kg/m<sup>2</sup>), lower health literacy (LHL&#x202F;=&#x202F;lower middle, +0.40&#x202F;kg/m<sup>2</sup>), always consuming high-calorie foods (FHCFI&#x202F;=&#x202F;always, +0.31&#x202F;kg/m<sup>2</sup>), and not recognizing self-weight status correctly (RSWSC&#x202F;=&#x202F;no, +0.22&#x202F;kg/m<sup>2</sup>). Additionally, although this sample exhibits some positive lifestyle habits, their contributions to the prediction are relatively small, including sometimes staying up late (&#x2212;0.13&#x202F;kg/m<sup>2</sup>), 1&#x2013;2&#x202F;h/day of weekend physical activity (&#x2212;0.07&#x202F;kg/m<sup>2</sup>), and 6&#x2013;8&#x202F;h/day of sleep (&#x2212;0.02&#x202F;kg/m<sup>2</sup>). This individual-level interpretability analysis helps characterize risk and protective factors associated with variation in the model&#x2019;s predicted risk of abnormal BMI. Based on the model outputs, clinicians and health management professionals can refer to these findings to design more targeted, personalized intervention strategies to help individuals modify or maintain their current lifestyle habits, which may enhance the precision and effectiveness of adolescent weight management.</p>
<p>To address practical challenges in potential clinical implementation, our study highlights several exploratory advantages that may facilitate future use, pending external validation and impact assessment. First, compared to prediction models that rely on complex and hard-to-obtain medical data (<xref ref-type="bibr" rid="ref10">10</xref>, <xref ref-type="bibr" rid="ref12">12</xref>, <xref ref-type="bibr" rid="ref14">14</xref>), the predictors used in this study were derived from easily accessible questionnaire data. This approach reduces the burden of data collection and the cost without compromising model performance, providing an efficient solution for screening high-risk adolescents. Second, predicting continuous BMI values may be useful for quantifying subtle weight changes, potentially aiding dynamic health monitoring about risk trends near commonly thresholds. In contrast, models that use BMI categories as prediction targets (<xref ref-type="bibr" rid="ref11">11</xref>, <xref ref-type="bibr" rid="ref15">15</xref>, <xref ref-type="bibr" rid="ref48">48</xref>, <xref ref-type="bibr" rid="ref49">49</xref>) are limited to reflecting coarse changes in weight status and may miss critical early warning signs. Third, the integration of SHAP-based interpretability may improve transparency relative to machine-learning black-box models, highlighting how modifiable factors are associated with the model&#x2019;s predictions of BMI.</p>
<p>The study has the following limitations: (1) The sample was limited in scope, which may affect the generalizability of the findings. Although loss-to-follow-up analyses suggested no strong differential attrition, selection processes and exclusions may still introduce bias. Future research should include participants from different cultural backgrounds, regions, and age groups to enhance model applicability, and should incorporate strategies to mitigate selection and attrition bias. (2) The behavioral habits assessed at baseline may change over the follow-up period. Future studies should consider incorporating dynamic assessments of these behaviors over time to better capture their impact on BMI changes. (3) All questionnaire data were self-reported, which may lead to potential reporting bias. (4) The model has not yet undergone external or temporal validation. While independent datasets yielded strong performance, future work will test the model across external datasets and time points to ensure generalizability. (5) Periodic model updates will be needed to maintain long-term applicability. (6) Pubertal maturation (e.g., Tanner staging) was not collected.</p>
</sec>
<sec sec-type="conclusions" id="sec20">
<label>6</label>
<title>Conclusion</title>
<p>This study successfully developed a machine learning predictive model for BMI in adolescents based on readily accessible daily information, achieving high predictive performance. The integration of SHAP for model interpretation provided valuable insights into the key factors associated with the model&#x2019;s predictions of BMI variation. The findings can provide valuable data to inform the formulation of public health policies and may support health-status monitoring while informing the design of personalized intervention strategies for weight and health management.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="sec21">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec sec-type="ethics-statement" id="sec22">
<title>Ethics statement</title>
<p>The studies involving humans were approved by Ethics Committee of the Hefei Institutes of Physical Science (SWYX-Y-2020-01), Chinese Academy of Sciences. The studies were conducted in accordance with the local legislation and institutional requirements. Written informed consent for participation in this study was provided by the participants&#x2019; legal guardians/next of kin.</p>
</sec>
<sec sec-type="author-contributions" id="sec23">
<title>Author contributions</title>
<p>ZZ: Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing, Software, Conceptualization, Formal analysis, Methodology, Data curation. WP: Writing &#x2013; review &#x0026; editing, Investigation, Funding acquisition, Resources. SS: Methodology, Writing &#x2013; review &#x0026; editing, Investigation, Supervision. FZ: Writing &#x2013; review &#x0026; editing, Supervision, Investigation. YS: Investigation, Supervision, Writing &#x2013; review &#x0026; editing, Methodology. LH: Data curation, Formal analysis, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<ack>
<title>Acknowledgments</title>
<p>We sincerely thank all authors for their contributions to this research.</p>
</ack>
<sec sec-type="COI-statement" id="sec24">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="sec25">
<title>Generative AI statement</title>
<p>The authors declare that no Gen AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="sec26">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="sec27">
<title>Supplementary material</title>
<p>The Supplementary material for this article can be found online at: <ext-link xlink:href="https://www.frontiersin.org/articles/10.3389/fpubh.2025.1657551/full#supplementary-material" ext-link-type="uri">https://www.frontiersin.org/articles/10.3389/fpubh.2025.1657551/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Table_1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><label>1.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Khanna</surname><given-names>D</given-names></name> <name><surname>Peltzer</surname><given-names>C</given-names></name> <name><surname>Kahar</surname><given-names>P</given-names></name> <name><surname>Parmar</surname><given-names>MS</given-names></name></person-group>. <article-title>Body mass index (BMI): a screening tool analysis</article-title>. <source>Cureus</source>. (<year>2022</year>) <volume>14</volume>:<fpage>e22119</fpage>. doi: <pub-id pub-id-type="doi">10.7759/cureus.22119</pub-id>, PMID: <pub-id pub-id-type="pmid">35308730</pub-id></mixed-citation></ref>
<ref id="ref2"><label>2.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wada</surname><given-names>K</given-names></name> <name><surname>Kuboyama</surname><given-names>K</given-names></name> <name><surname>Abe</surname><given-names>SK</given-names></name> <name><surname>Rahman</surname><given-names>MS</given-names></name> <name><surname>Islam</surname><given-names>MR</given-names></name> <name><surname>Saito</surname><given-names>E</given-names></name> <etal/></person-group>. <article-title>Body mass index and breast cancer risk in premenopausal and postmenopausal east Asian women: a pooled analysis of 13 cohort studies</article-title>. <source>Breast Cancer Res</source>. (<year>2024</year>) <volume>26</volume>:<fpage>158</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s13058-024-01907-5</pub-id>, PMID: <pub-id pub-id-type="pmid">39543702</pub-id></mixed-citation></ref>
<ref id="ref3"><label>3.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Samson</surname><given-names>R</given-names></name> <name><surname>Ennezat</surname><given-names>PV</given-names></name> <name><surname>Le Jemtel</surname><given-names>TH</given-names></name> <name><surname>Oparil</surname><given-names>S</given-names></name></person-group>. <article-title>Cardiovascular disease risk reduction and body mass index</article-title>. <source>Curr Hypertens Rep</source>. (<year>2022</year>) <volume>24</volume>:<fpage>535</fpage>&#x2013;<lpage>46</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11906-022-01213-5</pub-id>, PMID: <pub-id pub-id-type="pmid">35788967</pub-id></mixed-citation></ref>
<ref id="ref4"><label>4.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ozawa</surname><given-names>H</given-names></name> <name><surname>Fukui</surname><given-names>K</given-names></name> <name><surname>Fujita</surname><given-names>Y</given-names></name> <name><surname>Ishibashi</surname><given-names>C</given-names></name> <name><surname>Yoneda</surname><given-names>S</given-names></name> <name><surname>Nammo</surname><given-names>T</given-names></name> <etal/></person-group>. <article-title>Expansion of human alpha-cell area is associated with a higher maximum body mass index before the onset of type 2 diabetes</article-title>. <source>J Diabetes</source>. (<year>2023</year>) <volume>15</volume>:<fpage>277</fpage>&#x2013;<lpage>82</lpage>. doi: <pub-id pub-id-type="doi">10.1111/1753-0407.13370</pub-id>, PMID: <pub-id pub-id-type="pmid">36843206</pub-id></mixed-citation></ref>
<ref id="ref5"><label>5.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Seo</surname><given-names>JY</given-names></name> <name><surname>Jin</surname><given-names>EH</given-names></name> <name><surname>Chung</surname><given-names>GE</given-names></name> <name><surname>Kim</surname><given-names>YS</given-names></name> <name><surname>Bae</surname><given-names>JH</given-names></name> <name><surname>Yim</surname><given-names>JY</given-names></name> <etal/></person-group>. <article-title>The risk of colorectal cancer according to obesity status at four-year intervals: a nationwide population-based cohort study</article-title>. <source>Sci Rep</source>. (<year>2023</year>) <volume>13</volume>:<fpage>8928</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-023-36111-6</pub-id>, PMID: <pub-id pub-id-type="pmid">37264099</pub-id></mixed-citation></ref>
<ref id="ref6"><label>6.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><collab id="coll1">World Obesity Federation</collab></person-group>. <source>World obesity atlas 2024</source>, (<year>2024</year>). Available online at: <ext-link xlink:href="https://data.worldobesity.org/publications/?cat=22" ext-link-type="uri">https://data.worldobesity.org/publications/?cat=22</ext-link> (Accessed October 24, 2025).</mixed-citation></ref>
<ref id="ref7"><label>7.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Silventoinen</surname><given-names>K</given-names></name> <name><surname>Jelenkovic</surname><given-names>A</given-names></name> <name><surname>Sund</surname><given-names>R</given-names></name> <name><surname>Hur</surname><given-names>YM</given-names></name> <name><surname>Yokoyama</surname><given-names>Y</given-names></name> <name><surname>Honda</surname><given-names>C</given-names></name> <etal/></person-group>. <article-title>Genetic and environmental effects on body mass index from infancy to the onset of adulthood: an individual-based pooled analysis of 45 twin cohorts participating in the COllaborative project of development of anthropometrical measures in twins (CODATwins) study</article-title>. <source>Am J Clin Nutr</source>. (<year>2016</year>) <volume>104</volume>:<fpage>371</fpage>&#x2013;<lpage>9</lpage>. doi: <pub-id pub-id-type="doi">10.3945/ajcn.116.130252</pub-id>, PMID: <pub-id pub-id-type="pmid">27413137</pub-id></mixed-citation></ref>
<ref id="ref8"><label>8.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zink</surname><given-names>J</given-names></name> <name><surname>Booker</surname><given-names>R</given-names></name> <name><surname>Wolff-Hughes</surname><given-names>DL</given-names></name> <name><surname>Allen</surname><given-names>NB</given-names></name> <name><surname>Carnethon</surname><given-names>MR</given-names></name> <name><surname>Alexandria</surname><given-names>SJ</given-names></name> <etal/></person-group>. <article-title>Longitudinal associations of screen time, physical activity, and sleep duration with body mass index in US youth</article-title>. <source>Int J Behav Nutr Phys Act</source>. (<year>2024</year>) <volume>21</volume>:<fpage>35</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s12966-024-01587-6</pub-id>, PMID: <pub-id pub-id-type="pmid">38566134</pub-id></mixed-citation></ref>
<ref id="ref9"><label>9.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sandri</surname><given-names>E</given-names></name> <name><surname>Piredda</surname><given-names>M</given-names></name> <name><surname>Sguanci</surname><given-names>M</given-names></name> <name><surname>Mancin</surname><given-names>S</given-names></name></person-group>. <article-title>What factors influence obesity in Spain? A multivariate analysis of sociodemographic, nutritional, and lifestyle factors affecting body mass index in the Spanish population</article-title>. <source>Healthcare</source>. (<year>2025</year>) <volume>13</volume>:<fpage>386</fpage>. doi: <pub-id pub-id-type="doi">10.3390/healthcare13040386</pub-id>, PMID: <pub-id pub-id-type="pmid">39997261</pub-id></mixed-citation></ref>
<ref id="ref10"><label>10.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Park</surname><given-names>BY</given-names></name> <name><surname>Chung</surname><given-names>CS</given-names></name> <name><surname>Lee</surname><given-names>MJ</given-names></name> <name><surname>Park</surname><given-names>H</given-names></name></person-group>. <article-title>Accurate neuroimaging biomarkers to predict body mass index in adolescents: a longitudinal study</article-title>. <source>Brain Imaging Behav</source>. (<year>2020</year>) <volume>14</volume>:<fpage>1682</fpage>&#x2013;<lpage>95</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11682-019-00101-y</pub-id>, PMID: <pub-id pub-id-type="pmid">31065926</pub-id></mixed-citation></ref>
<ref id="ref11"><label>11.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gozukara Bag</surname><given-names>HG</given-names></name> <name><surname>Yagin</surname><given-names>FH</given-names></name> <name><surname>Gormez</surname><given-names>Y</given-names></name> <name><surname>Gonz&#x00E1;lez</surname><given-names>PP</given-names></name> <name><surname>Colak</surname><given-names>C</given-names></name> <name><surname>G&#x00FC;l&#x00FC;</surname><given-names>M</given-names></name> <etal/></person-group>. <article-title>Estimation of obesity levels through the proposed predictive approach based on physical activity and nutritional habits</article-title>. <source>Diagnostics</source>. (<year>2023</year>) <volume>13</volume>:<fpage>2949</fpage>. doi: <pub-id pub-id-type="doi">10.3390/diagnostics13182949</pub-id>, PMID: <pub-id pub-id-type="pmid">37761316</pub-id></mixed-citation></ref>
<ref id="ref12"><label>12.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yao</surname><given-names>Y</given-names></name> <name><surname>Song</surname><given-names>L</given-names></name> <name><surname>Ye</surname><given-names>J</given-names></name></person-group>. <article-title>Motion-to-BMI: using motion sensors to predict the body mass index of smartphone users</article-title>. <source>Sensors</source>. (<year>2020</year>) <volume>20</volume>:<fpage>1134</fpage>. doi: <pub-id pub-id-type="doi">10.3390/s20041134</pub-id>, PMID: <pub-id pub-id-type="pmid">32093013</pub-id></mixed-citation></ref>
<ref id="ref13"><label>13.</label><mixed-citation publication-type="confproc"><person-group person-group-type="author"><name><surname>Kim</surname><given-names>S</given-names></name> <name><surname>Lee</surname><given-names>K</given-names></name> <name><surname>Lee</surname><given-names>EC</given-names></name></person-group>. <article-title>Multi-view body image-based prediction of body mass index and various body part sizes</article-title>. <conf-name>2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)</conf-name>. <publisher-loc>Piscataway, NJ, USA</publisher-loc>: <publisher-name>IEEE</publisher-name>. (<year>2023</year>) <fpage>6034</fpage>&#x2013;<lpage>6041</lpage>.</mixed-citation></ref>
<ref id="ref14"><label>14.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ali</surname><given-names>S</given-names></name> <name><surname>Na</surname><given-names>R</given-names></name> <name><surname>Waterhouse</surname><given-names>M</given-names></name> <name><surname>Jordan</surname><given-names>SJ</given-names></name> <name><surname>Olsen</surname><given-names>CM</given-names></name> <name><surname>Whiteman</surname><given-names>DC</given-names></name> <etal/></person-group>. <article-title>Predicting obesity and smoking using medication data: a machine-learning approach</article-title>. <source>Pharmacoepidemiol Drug Saf</source>. (<year>2022</year>) <volume>31</volume>:<fpage>91</fpage>&#x2013;<lpage>9</lpage>. doi: <pub-id pub-id-type="doi">10.1002/pds.5367</pub-id>, PMID: <pub-id pub-id-type="pmid">34611961</pub-id></mixed-citation></ref>
<ref id="ref15"><label>15.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Helforoush</surname><given-names>Z</given-names></name> <name><surname>Sayyad</surname><given-names>H</given-names></name></person-group>. <article-title>Prediction and classification of obesity risk based on a hybrid metaheuristic machine learning approach</article-title>. <source>Front Big Data</source>. (<year>2024</year>) <volume>7</volume>:<fpage>1469981</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fdata.2024.1469981</pub-id>, PMID: <pub-id pub-id-type="pmid">39403430</pub-id></mixed-citation></ref>
<ref id="ref16"><label>16.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Alkhanbouli</surname><given-names>R</given-names></name> <name><surname>Matar Abdulla Almadhaani</surname><given-names>H</given-names></name> <name><surname>Alhosani</surname><given-names>F</given-names></name> <name><surname>Simsekler</surname><given-names>MCE</given-names></name></person-group>. <article-title>The role of explainable artificial intelligence in disease prediction: a systematic literature review and future research directions</article-title>. <source>BMC Med Inform Decis Mak</source>. (<year>2025</year>) <volume>25</volume>:<fpage>110</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s12911-025-02944-6</pub-id>, PMID: <pub-id pub-id-type="pmid">40038704</pub-id></mixed-citation></ref>
<ref id="ref17"><label>17.</label><mixed-citation publication-type="confproc"><person-group person-group-type="author"><name><surname>Lundberg</surname><given-names>SM</given-names></name> <name><surname>Lee</surname><given-names>S-I</given-names></name></person-group>. <article-title>A unified approach to interpreting model predictions</article-title>. <conf-name>Proceedings of the 31st International Conference on Neural Information Processing Systems</conf-name>. <publisher-loc>Red Hook, NY, USA</publisher-loc>: <publisher-name>Curran Associates, Inc</publisher-name>. (<year>2017</year>) <fpage>4768</fpage>&#x2013;<lpage>4777</lpage>.</mixed-citation></ref>
<ref id="ref18"><label>18.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname><given-names>X</given-names></name> <name><surname>Li</surname><given-names>L</given-names></name> <name><surname>Zhang</surname><given-names>L</given-names></name></person-group>. <article-title>Development and validation of a prediction model for myelosuppression in lung cancer patients after platinum-based doublet chemotherapy: a multifactorial analysis approach</article-title>. <source>Am J Cancer Res</source>. (<year>2025</year>) <volume>15</volume>:<fpage>470</fpage>&#x2013;<lpage>86</lpage>. doi: <pub-id pub-id-type="doi">10.62347/TFUC2568</pub-id>, PMID: <pub-id pub-id-type="pmid">40084374</pub-id></mixed-citation></ref>
<ref id="ref19"><label>19.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Luo</surname><given-names>XQ</given-names></name> <name><surname>Kang</surname><given-names>YX</given-names></name> <name><surname>Duan</surname><given-names>SB</given-names></name> <name><surname>Yan</surname><given-names>P</given-names></name> <name><surname>Song</surname><given-names>GB</given-names></name> <name><surname>Zhang</surname><given-names>NY</given-names></name> <etal/></person-group>. <article-title>Machine learning-based prediction of acute kidney injury following pediatric cardiac surgery: model development and validation study</article-title>. <source>J Med Internet Res</source>. (<year>2023</year>) <volume>25</volume>:<fpage>e41142</fpage>. doi: <pub-id pub-id-type="doi">10.2196/41142</pub-id>, PMID: <pub-id pub-id-type="pmid">36603200</pub-id></mixed-citation></ref>
<ref id="ref20"><label>20.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname><given-names>L</given-names></name> <name><surname>Cao</surname><given-names>S</given-names></name> <name><surname>Song</surname><given-names>B</given-names></name> <name><surname>Hu</surname><given-names>Y</given-names></name></person-group>. <article-title>Predicting grip strength-related frailty in middle-aged and older Chinese adults using interpretable machine learning models: a prospective cohort study</article-title>. <source>Front Public Health</source>. (<year>2024</year>) <volume>12</volume>:<fpage>1489848</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fpubh.2024.1489848</pub-id>, PMID: <pub-id pub-id-type="pmid">39741944</pub-id></mixed-citation></ref>
<ref id="ref21"><label>21.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Arum&#x00E4;e</surname><given-names>K</given-names></name> <name><surname>M&#x00F5;ttus</surname><given-names>R</given-names></name> <name><surname>Vainik</surname><given-names>U</given-names></name></person-group>. <article-title>Body mass predicts personality development across 18 years in middle to older adulthood</article-title>. <source>J Pers</source>. (<year>2023</year>) <volume>91</volume>:<fpage>1395</fpage>&#x2013;<lpage>409</lpage>. doi: <pub-id pub-id-type="doi">10.1111/jopy.12816</pub-id>, PMID: <pub-id pub-id-type="pmid">36718127</pub-id></mixed-citation></ref>
<ref id="ref22"><label>22.</label><mixed-citation publication-type="confproc"><person-group person-group-type="author"><name><surname>Singh</surname><given-names>B</given-names></name> <name><surname>Tawfik</surname><given-names>H</given-names></name></person-group>. <article-title>A machine learning approach for predicting weight gain risks in young adults</article-title>. <conf-name>2019 10th International Conference on Dependable Systems, Services and Technologies (DESSERT)</conf-name>. <publisher-loc>Piscataway, NJ, USA</publisher-loc>: <publisher-name>IEEE</publisher-name>. (<year>2019</year>) <fpage>231</fpage>&#x2013;<lpage>234</lpage>.</mixed-citation></ref>
<ref id="ref23"><label>23.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><collab id="coll2">FAO/WHO/UNU</collab></person-group>. <source>Annex 1: equations for the prediction of basal metabolic rate</source>. (<year>2004</year>). Available online at: <ext-link xlink:href="https://www.fao.org/4/aa040e/AA040E15.htm" ext-link-type="uri">https://www.fao.org/4/aa040e/AA040E15.htm</ext-link> (Accessed October 24, 2025).</mixed-citation></ref>
<ref id="ref24"><label>24.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><collab id="coll3">FAO/WHO/UNU Expert Consultation</collab></person-group>. (<year>2004</year>). <source>Human energy requirements</source>. <publisher-loc>Rome</publisher-loc>: <publisher-name>Food and Agriculture Organization of the United Nations</publisher-name>. Available online at: <ext-link xlink:href="https://openknowledge.fao.org/handle/20.500.14283/y5686e" ext-link-type="uri">https://openknowledge.fao.org/handle/20.500.14283/y5686e</ext-link> (Accessed October 24, 2025).</mixed-citation></ref>
<ref id="ref25"><label>25.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><collab id="coll4">Group of China Obesity Task F</collab></person-group>. <article-title>Body mass index reference norm for screening overweight and obesity in Chinese children and adolescents</article-title>. <source>Zhonghua Liu Xing Bing Xue Za Zhi</source>. (<year>2004</year>) <volume>25</volume>:<fpage>97</fpage>&#x2013;<lpage>102</lpage>. doi: <pub-id pub-id-type="doi">10.3760/j.issn:0254-6450.2004.02.003</pub-id> PMID: <pub-id pub-id-type="pmid">15132858</pub-id></mixed-citation></ref>
<ref id="ref26"><label>26.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ramirez Luque</surname><given-names>DB</given-names></name> <name><surname>Rocha Huaman</surname><given-names>NL</given-names></name> <name><surname>Calizaya-Milla</surname><given-names>YE</given-names></name> <name><surname>Calizaya-Milla</surname><given-names>SE</given-names></name> <name><surname>Ramos-Vera</surname><given-names>C</given-names></name> <name><surname>Saintila</surname><given-names>J</given-names></name></person-group>. <article-title>Body self-perception, dietary self-efficacy, and body mass index in young adults: a cross-sectional survey</article-title>. <source>Int J Gen Med</source>. (<year>2023</year>) <volume>16</volume>:<fpage>193</fpage>&#x2013;<lpage>202</lpage>. doi: <pub-id pub-id-type="doi">10.2147/IJGM.S395281</pub-id>, PMID: <pub-id pub-id-type="pmid">36699341</pub-id></mixed-citation></ref>
<ref id="ref27"><label>27.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname><given-names>C</given-names></name> <name><surname>Zhang</surname><given-names>M</given-names></name> <name><surname>Tarken</surname><given-names>AY</given-names></name> <name><surname>Cao</surname><given-names>Y</given-names></name> <name><surname>Li</surname><given-names>Q</given-names></name> <name><surname>Wang</surname><given-names>H</given-names></name></person-group>. <article-title>Secular trends and sociodemographic determinants of thinness, overweight and obesity among Chinese children and adolescents aged 7-18 years from 2010 to 2018</article-title>. <source>Front Public Health</source>. (<year>2023</year>) <volume>11</volume>:<fpage>1128552</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fpubh.2023.1128552</pub-id>, PMID: <pub-id pub-id-type="pmid">37213615</pub-id></mixed-citation></ref>
<ref id="ref28"><label>28.</label><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Cohen</surname><given-names>J.</given-names></name></person-group>, <source>Statistical power analysis for the behavioral sciences</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>Routledge</publisher-name>. (<year>2013</year>) <fpage>410</fpage>&#x2013;<lpage>414</lpage>.</mixed-citation></ref>
<ref id="ref29"><label>29.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nakagawa</surname><given-names>S</given-names></name> <name><surname>Johnson</surname><given-names>PC</given-names></name> <name><surname>Schielzeth</surname><given-names>H</given-names></name></person-group>. <article-title>The coefficient of determination R<sup>2</sup> and intra-class correlation coefficient from generalized linear mixed-effects models revisited and expanded</article-title>. <source>J R Soc Interface</source>. (<year>2017</year>) <volume>14</volume>:<fpage>20170213</fpage>. doi: <pub-id pub-id-type="doi">10.1098/rsif.2017.0213</pub-id>, PMID: <pub-id pub-id-type="pmid">28904005</pub-id></mixed-citation></ref>
<ref id="ref30"><label>30.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Austin</surname><given-names>PC</given-names></name></person-group>. <article-title>Balance diagnostics for comparing the distribution of baseline covariates between treatment groups in propensity-score matched samples</article-title>. <source>Stat Med</source>. (<year>2009</year>) <volume>28</volume>:<fpage>3083</fpage>&#x2013;<lpage>107</lpage>. doi: <pub-id pub-id-type="doi">10.1002/sim.3697</pub-id>, PMID: <pub-id pub-id-type="pmid">19757444</pub-id></mixed-citation></ref>
<ref id="ref31"><label>31.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname><given-names>Z</given-names></name> <name><surname>Kim</surname><given-names>HJ</given-names></name> <name><surname>Lonjon</surname><given-names>G</given-names></name> <name><surname>Zhu</surname><given-names>Y</given-names></name></person-group>. <article-title>Balance diagnostics after propensity score matching</article-title>. <source>Ann Transl Med</source>. (<year>2019</year>) <volume>7</volume>:<fpage>16</fpage>. doi: <pub-id pub-id-type="doi">10.21037/atm.2018.12.10</pub-id>, PMID: <pub-id pub-id-type="pmid">30788363</pub-id></mixed-citation></ref>
<ref id="ref32"><label>32.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Varma</surname><given-names>S</given-names></name> <name><surname>Simon</surname><given-names>R</given-names></name></person-group>. <article-title>Bias in error estimation when using cross-validation for model selection</article-title>. <source>BMC Bioinformatics</source>. (<year>2006</year>) <volume>7</volume>:<fpage>91</fpage>. doi: <pub-id pub-id-type="doi">10.1186/1471-2105-7-91</pub-id>, PMID: <pub-id pub-id-type="pmid">16504092</pub-id></mixed-citation></ref>
<ref id="ref33"><label>33.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Parvandeh</surname><given-names>S</given-names></name> <name><surname>Yeh</surname><given-names>H-W</given-names></name> <name><surname>Paulus</surname><given-names>MP</given-names></name> <name><surname>McKinney</surname><given-names>BA</given-names></name></person-group>. <article-title>Consensus features nested cross-validation</article-title>. <source>Bioinformatics</source>. (<year>2020</year>) <volume>36</volume>:<fpage>3093</fpage>&#x2013;<lpage>8</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btaa046</pub-id>, PMID: <pub-id pub-id-type="pmid">31985777</pub-id></mixed-citation></ref>
<ref id="ref34"><label>34.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Breusch</surname><given-names>TS</given-names></name> <name><surname>Pagan</surname><given-names>AR</given-names></name></person-group>. <article-title>A simple test for heteroscedasticity and random coefficient variation</article-title>. <source>Econometrica</source>. (<year>1979</year>) <volume>47</volume>:<fpage>1287</fpage>&#x2013;<lpage>94</lpage>. doi: <pub-id pub-id-type="doi">10.2307/1911963</pub-id></mixed-citation></ref>
<ref id="ref35"><label>35.</label><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Carroll</surname><given-names>RJ</given-names></name> <name><surname>Ruppert</surname><given-names>D</given-names></name></person-group>. <source>Transformation and weighting in regression</source>. <publisher-loc>New York</publisher-loc>: <publisher-name>Chapman and Hall</publisher-name> (<year>1988</year>).</mixed-citation></ref>
<ref id="ref36"><label>36.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tellinghuisen</surname><given-names>J</given-names></name></person-group>. <article-title>Weighted least squares in calibration: the problem with using &#x201C;quality coefficients&#x201D; to select weighting formulas</article-title>. <source>J Chromatogr B</source>. (<year>2008</year>) <volume>872</volume>:<fpage>162</fpage>&#x2013;<lpage>6</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jchromb.2008.07.043</pub-id>, PMID: <pub-id pub-id-type="pmid">18706869</pub-id></mixed-citation></ref>
<ref id="ref37"><label>37.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Holmen</surname><given-names>H</given-names></name> <name><surname>Fl&#x00F8;lo</surname><given-names>TN</given-names></name> <name><surname>T&#x00F8;rris</surname><given-names>C</given-names></name> <name><surname>Torbj&#x00F8;rnsen</surname><given-names>A</given-names></name> <name><surname>Almendingen</surname><given-names>K</given-names></name> <name><surname>Riiser</surname><given-names>K</given-names></name></person-group>. <article-title>The role of health literacy in intervention studies targeting children living with overweight or obesity and their parents&#x2014;a systematic mixed methods review</article-title>. <source>Front Pediatr</source>. (<year>2025</year>) <volume>12</volume>:<fpage>1507379</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fped.2024.1507379</pub-id>, PMID: <pub-id pub-id-type="pmid">39911768</pub-id></mixed-citation></ref>
<ref id="ref38"><label>38.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ballarin</surname><given-names>G</given-names></name> <name><surname>Galle</surname><given-names>F</given-names></name> <name><surname>Dinacci</surname><given-names>L</given-names></name> <name><surname>Liberti</surname><given-names>F</given-names></name> <name><surname>Cunti</surname><given-names>A</given-names></name> <name><surname>Valerio</surname><given-names>G</given-names></name></person-group>. <article-title>Self-perception profile, body image perception and satisfaction in relation to body mass index: an investigation in a sample of adolescents from the Campania region, Italy</article-title>. <source>Children</source>. (<year>2024</year>) <volume>11</volume>:<fpage>805</fpage>. doi: <pub-id pub-id-type="doi">10.3390/children11070805</pub-id>, PMID: <pub-id pub-id-type="pmid">39062254</pub-id></mixed-citation></ref>
<ref id="ref39"><label>39.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Harrison</surname><given-names>RN</given-names></name> <name><surname>Gaughran</surname><given-names>F</given-names></name> <name><surname>Murray</surname><given-names>RM</given-names></name> <name><surname>Lee</surname><given-names>SH</given-names></name> <name><surname>Cano</surname><given-names>JP</given-names></name> <name><surname>Dempster</surname><given-names>D</given-names></name> <etal/></person-group>. <article-title>Development of multivariable models to predict change in body mass index within a clinical trial population of psychotic individuals</article-title>. <source>Sci Rep</source>. (<year>2017</year>) <volume>7</volume>:<fpage>14738</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-017-15137-7</pub-id>, PMID: <pub-id pub-id-type="pmid">29116126</pub-id></mixed-citation></ref>
<ref id="ref40"><label>40.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cheng</surname><given-names>ER</given-names></name> <name><surname>Cengiz</surname><given-names>AY</given-names></name> <name><surname>Miled</surname><given-names>ZB</given-names></name></person-group>. <article-title>Predicting body mass index in early childhood using data from the first 1000 days</article-title>. <source>Sci Rep</source>. (<year>2023</year>) <volume>13</volume>:<fpage>8781</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-023-35935-6</pub-id>, PMID: <pub-id pub-id-type="pmid">37258628</pub-id></mixed-citation></ref>
<ref id="ref41"><label>41.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Delnevo</surname><given-names>G</given-names></name> <name><surname>Mancini</surname><given-names>G</given-names></name> <name><surname>Roccetti</surname><given-names>M</given-names></name> <name><surname>Salomoni</surname><given-names>P</given-names></name> <name><surname>Trombini</surname><given-names>E</given-names></name> <name><surname>Andrei</surname><given-names>F</given-names></name></person-group>. <article-title>The prediction of body mass index from negative affectivity through machine learning: a confirmatory study</article-title>. <source>Sensors</source>. (<year>2021</year>) <volume>21</volume>:<fpage>2361</fpage>. doi: <pub-id pub-id-type="doi">10.3390/s21072361</pub-id>, PMID: <pub-id pub-id-type="pmid">33805257</pub-id></mixed-citation></ref>
<ref id="ref42"><label>42.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>Y</given-names></name> <name><surname>Zhang</surname><given-names>L</given-names></name> <name><surname>Jiang</surname><given-names>Y</given-names></name> <name><surname>Cheng</surname><given-names>X</given-names></name> <name><surname>He</surname><given-names>W</given-names></name> <name><surname>Yu</surname><given-names>H</given-names></name> <etal/></person-group>. <article-title>Multiparametric magnetic resonance imaging (MRI)-based radiomics model explained by the Shapley additive exPlanations (SHAP) method for predicting complete response to neoadjuvant chemoradiotherapy in locally advanced rectal cancer: a multicenter retrospective study</article-title>. <source>Quant Imaging Med Surg</source>. (<year>2024</year>) <volume>14</volume>:<fpage>4617</fpage>&#x2013;<lpage>34</lpage>. doi: <pub-id pub-id-type="doi">10.21037/qims-24-7</pub-id>, PMID: <pub-id pub-id-type="pmid">39022292</pub-id></mixed-citation></ref>
<ref id="ref43"><label>43.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname><given-names>J</given-names></name> <name><surname>Chen</surname><given-names>T</given-names></name> <name><surname>Fang</surname><given-names>X</given-names></name> <name><surname>Xia</surname><given-names>L</given-names></name> <name><surname>Pan</surname><given-names>X</given-names></name></person-group>. <article-title>Prediction model of pressure injury occurrence in diabetic patients during ICU hospitalization&#x2014;&#x2014;XGBoost machine learning model can be interpreted based on SHAP</article-title>. <source>Intensive Crit Care Nurs</source>. (<year>2024</year>) <volume>83</volume>:<fpage>103715</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.iccn.2024.103715</pub-id>, PMID: <pub-id pub-id-type="pmid">38701634</pub-id></mixed-citation></ref>
<ref id="ref44"><label>44.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>W</given-names></name> <name><surname>Zhao</surname><given-names>Y</given-names></name> <name><surname>Li</surname><given-names>Y</given-names></name></person-group>. <article-title>Ensemble machine learning for predicting the homogenized elastic properties of unidirectional composites: a SHAP-based interpretability analysis</article-title>. <source>Acta Mech Sinica</source>. (<year>2024</year>) <volume>40</volume>:<fpage>423301</fpage>. doi: <pub-id pub-id-type="doi">10.1007/s10409-023-23301-x</pub-id></mixed-citation></ref>
<ref id="ref45"><label>45.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tang</surname><given-names>L</given-names></name> <name><surname>Tang</surname><given-names>C</given-names></name> <name><surname>Fu</surname><given-names>Q</given-names></name> <name><surname>Ma</surname><given-names>C</given-names></name></person-group>. <article-title>Predicting travel mode choice with a robust neural network and Shapley additive explanations analysis</article-title>. <source>IET Intell Transp Syst</source>. (<year>2024</year>) <volume>18</volume>:<fpage>1339</fpage>&#x2013;<lpage>54</lpage>. doi: <pub-id pub-id-type="doi">10.1049/itr2.12514</pub-id></mixed-citation></ref>
<ref id="ref46"><label>46.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nguyen</surname><given-names>HV</given-names></name> <name><surname>Byeon</surname><given-names>H</given-names></name></person-group>. <article-title>A hybrid self-supervised model predicting life satisfaction in South Korea</article-title>. <source>Front Public Health</source>. (<year>2024</year>) <volume>12</volume>:<fpage>1445864</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fpubh.2024.1445864</pub-id>, PMID: <pub-id pub-id-type="pmid">39484355</pub-id></mixed-citation></ref>
<ref id="ref47"><label>47.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>C</given-names></name> <name><surname>Wang</surname><given-names>Q</given-names></name> <name><surname>Ben</surname><given-names>W</given-names></name> <name><surname>Qiao</surname><given-names>M</given-names></name> <name><surname>Ma</surname><given-names>B</given-names></name> <name><surname>Bai</surname><given-names>Y</given-names></name> <etal/></person-group>. <article-title>Machine learning predicts the growth of cyanobacterial genera in river systems and reveals their different environmental responses</article-title>. <source>Sci Total Environ</source>. (<year>2024</year>) <volume>946</volume>:<fpage>174383</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.scitotenv.2024.174383</pub-id>, PMID: <pub-id pub-id-type="pmid">38960197</pub-id></mixed-citation></ref>
<ref id="ref48"><label>48.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ramyaa</surname><given-names>R</given-names></name> <name><surname>Hosseini</surname><given-names>O</given-names></name> <name><surname>Krishnan</surname><given-names>GP</given-names></name> <name><surname>Krishnan</surname><given-names>S</given-names></name></person-group>. <article-title>Phenotyping women based on dietary macronutrients, physical activity, and body weight using machine learning tools</article-title>. <source>Nutrients</source>. (<year>2019</year>) <volume>11</volume>:<fpage>1681</fpage>. doi: <pub-id pub-id-type="doi">10.3390/nu11071681</pub-id>, PMID: <pub-id pub-id-type="pmid">31336626</pub-id></mixed-citation></ref>
<ref id="ref49"><label>49.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname><given-names>C</given-names></name> <name><surname>Costello</surname><given-names>FJ</given-names></name> <name><surname>Lee</surname><given-names>KC</given-names></name> <name><surname>Li</surname><given-names>Y</given-names></name> <name><surname>Li</surname><given-names>C</given-names></name></person-group>. <article-title>Predicting factors affecting adolescent obesity using general Bayesian network and what-if analysis</article-title>. <source>Int J Environ Res Public Health</source>. (<year>2019</year>) <volume>16</volume>:<fpage>4684</fpage>. doi: <pub-id pub-id-type="doi">10.3390/ijerph16234684</pub-id>, PMID: <pub-id pub-id-type="pmid">31775234</pub-id></mixed-citation></ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2096571/overview">Jian Sun</ext-link>, Guangzhou Sport University, China</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3107519/overview">Sebasti&#x00E1;n Rodr&#x00ED;guez</ext-link>, Universidad Nacional de Colombia, Colombia</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3130308/overview">Amrita Das Tipu</ext-link>, Dhaka International University, Bangladesh</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3136686/overview">Hanh Nguyen</ext-link>, Hanoi National University of Education, Vietnam</p>
</fn>
</fn-group>
</back>
</article>