<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Endocrinol.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Endocrinology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Endocrinol.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-2392</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fendo.2026.1772106</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Machine learning&#x2013;based prediction of IVF/ICSI outcomes in male factor infertility highlighting couple-level BMI</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Li</surname><given-names>Hu</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn003"><sup>&#x2020;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Gao</surname><given-names>Jie</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn003"><sup>&#x2020;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1728201/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Li</surname><given-names>Yiran</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3323894/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Shanghai Key Laboratory of Maternal Fetal Medicine, Shanghai Institute of Maternal-Fetal Medicine and Gynecologic Oncology, Shanghai First Maternity and Infant Hospital, School of Medicine, Tongji University</institution>, <city>Shanghai</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>Centre for Assisted Reproduction, Shanghai Key Laboratory of Maternal Fetal Medicine, Shanghai Institute of Maternal-Fetal Medicine and Gynecologic Oncology, Shanghai First Maternity and Infant Hospital, School of Medicine, Tongji University</institution>, <city>Shanghai</city>,&#xa0;<country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Yiran Li, <email xlink:href="mailto:liyiran2007@gmail.com">liyiran2007@gmail.com</email></corresp>
<fn fn-type="equal" id="fn003">
<label>&#x2020;</label>
<p>These authors have contributed equally to this work</p></fn>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-10">
<day>10</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>17</volume>
<elocation-id>1772106</elocation-id>
<history>
<date date-type="received">
<day>20</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>26</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="rev-recd">
<day>23</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Li, Gao and Li.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Li, Gao and Li</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-10">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Background</title>
<p>Most clinical prediction models for assisted reproductive technology focus primarily on female ovarian reserve markers and often under-represent male factors and the metabolic status of both partners. Additionally, traditional parametric models may have limited ability to capture nonlinear patterns within reproductive data. This study aimed to develop and validate a machine learning (ML)&#x2013;based model to predict clinical pregnancy outcomes in couples with male factor infertility undergoing IVF/ICSI, and to explore model interpretability using Shapley Additive exPlanations (SHAP).</p>
</sec>
<sec>
<title>Methods</title>
<p>This retrospective study analyzed 2,565 couples undergoing their first IVF/ICSI cycle for male factor infertility at Shanghai First Maternity and Infant Hospital between 2019 and 2025. The cohort was partitioned according to embryo transfer date, with the first 70% of cases assigned to the training set and the remaining 30% reserved as an temporal internal validation set. Feature selection was conducted using LASSO regression within the training set. Seven ML models, including LightGBM and Logistic Regression, were developed and optimized through 5-fold cross-validation. Model performance was evaluated using the area under the curve (AUC), accuracy, Brier score, and decision curve analysis. SHAP was employed to provide a visual interpretation of the optimal model.</p>
</sec>
<sec>
<title>Results</title>
<p>Five predictors were selected in the training set: female BMI, male BMI, basal FSH, AMH, and female age. In the temporal validation set, all models demonstrated comparable discriminative performance (AUC range: 0.840&#x2013;0.857). LightGBM achieved an AUC of 0.857 (95% CI: 0.830&#x2013;0.882), with an accuracy of 0.775 and specificity of 0.909. DeLong tests indicated no statistically significant differences in AUC between LightGBM and Random Forest (P = 0.918), XGBoost (P = 0.985), or logistic regression (P = 0.067). Based on its overall stability across discrimination, calibration (Brier score = 0.145), and clinical utility, LightGBM was selected for interpretability analysis.</p>
</sec>
<sec>
<title>Conclusions</title>
<p>A LightGBM-based prediction model demonstrated reasonable performance for predicting IVF/ICSI outcomes in couples with male factor infertility. Within this dataset, couple-level metabolic features were strongly associated with model predictions alongside traditional ovarian reserve markers. These findings reflect predictive associations rather than causal effects and suggest that metabolic characteristics may warrant consideration in risk stratification and counseling. Prospective studies are needed to determine whether targeted interventions can improve clinical outcomes.</p>
</sec>
</abstract>
<kwd-group>
<kwd>body mass index</kwd>
<kwd>clinical pregnancy</kwd>
<kwd>lightGBM</kwd>
<kwd>machine learning</kwd>
<kwd>male infertility</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work was supported by the Shanghai Health System Outstanding Talents Program (Grant No. 20234Z0019).</funding-statement>
</funding-group>
<counts>
<fig-count count="4"/>
<table-count count="2"/>
<equation-count count="0"/>
<ref-count count="41"/>
<page-count count="11"/>
<word-count count="4578"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Obesity</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Infertility has become a global public health concern and affects approximately 15% of couples of reproductive age (<xref ref-type="bibr" rid="B1">1</xref>, <xref ref-type="bibr" rid="B2">2</xref>). Male factors account for 40%&#x2013;50% of these cases (<xref ref-type="bibr" rid="B3">3</xref>). For patients with severe oligozoospermia, asthenozoospermia, or teratozoospermia, <italic>in vitro</italic> fertilization (IVF) and intracytoplasmic sperm injection (ICSI) remain the most effective treatment options (<xref ref-type="bibr" rid="B4">4</xref>, <xref ref-type="bibr" rid="B5">5</xref>). Nevertheless, despite advances in assisted reproductive technology, the clinical pregnancy rate per IVF/ICSI cycle is still only 40%&#x2013;60% (<xref ref-type="bibr" rid="B6">6</xref>&#x2013;<xref ref-type="bibr" rid="B8">8</xref>). Failed cycles place a substantial financial burden on patients and are frequently associated with considerable psychological distress (<xref ref-type="bibr" rid="B9">9</xref>, <xref ref-type="bibr" rid="B10">10</xref>). Accordingly, accurate pre-treatment prediction of pregnancy success is important for individualized treatment planning and for setting realistic expectations.</p>
<p>Traditional prediction models, including the Templeton model and the Nelson model, are largely based on logistic regression (<xref ref-type="bibr" rid="B11">11</xref>&#x2013;<xref ref-type="bibr" rid="B13">13</xref>). Although these models have broad applicability in general populations, they have several limitations. First, these models largely center on female age and ovarian reserve markers such as AMH and FSH, while giving limited consideration to partner-related characteristics and their potential interactions, including male BMI and age (<xref ref-type="bibr" rid="B14">14</xref>). Second, although logistic regression is a type of generalized linear model, prespecified regression models may still be limited in their ability to flexibly capture complex nonlinear relationships and high-dimensional structures that are commonly observed in reproductive datasets, unless nonlinear terms or interactions are explicitly modeled (<xref ref-type="bibr" rid="B15">15</xref>). Third, generic models are often not tailored to the male factor infertility subgroup (<xref ref-type="bibr" rid="B16">16</xref>), which can compromise predictive accuracy in this population.</p>
<p>In recent years, rapid progress in artificial intelligence and machine learning (ML) has introduced new approaches to clinical prediction. Compared with traditional statistical methods, ML algorithms&#x2014;including random forest and gradient boosting trees&#x2014;offer advantages in modeling complex, nonlinear, and high-dimensional data (<xref ref-type="bibr" rid="B17">17</xref>, <xref ref-type="bibr" rid="B18">18</xref>). Prior studies have reported promising performance of ML approaches in polycystic ovary syndrome (PCOS) (<xref ref-type="bibr" rid="B19">19</xref>). However, high-precision ML models based on large samples and incorporating couple-level characteristics remain limited in the male factor infertility population. In addition, many ML models are considered &#x201c;black boxes&#x201d; with limited clinical interpretability, which hinders their broader implementation in practice (<xref ref-type="bibr" rid="B20">20</xref>, <xref ref-type="bibr" rid="B21">21</xref>).</p>
<p>This study aims to develop an IVF/ICSI pregnancy outcome prediction model for couples with male factor infertility using a single-center, large-sample retrospective dataset and multiple ML algorithms. Particular attention is given to quantifying the contribution of spousal BMI within the prediction framework using SHAP analysis. The findings may offer additional insight to support clinical decision-making in this setting.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Subjects and design</title>
<p>This retrospective cohort study used data from couples who underwent IVF or ICSI at the Reproductive Medical Center of Shanghai First Maternity and Infant Hospital between January 2019 and January 2025. This study was approved by the Research Ethics Committee of Shanghai First Maternity and Infant Hospital (KS25468). The inclusion criteria were: (1) a primary diagnosis of male factor infertility, including oligozoospermia, asthenozoospermia, or teratozoospermia, defined according to the WHO 5th edition criteria (<xref ref-type="bibr" rid="B22">22</xref>); (2) treatment with conventional IVF or ICSI; (3) complete follow-up records for pregnancy outcomes; (4) only the first IVF/ICSI treatment cycle was included for each couple. The exclusion criteria were: (1) severe uterine malformations or intrauterine adhesions in the female partner; (2) chromosomal karyotype abnormalities in either partner; (3) cycles involving donor sperm or oocytes; and (4) missing values in non-imputable administrative or eligibility variables (none in the final analytic cohort). Ultimately, 2,565 couples were included. To enhance the methodological rigor and better reflect real-world clinical application, the cohort was partitioned strictly according to the date of embryo transfer (<xref ref-type="bibr" rid="B23">23</xref>). The earliest 70% of cases (n = 1,797) were assigned to the training set, and the most recent 30% (n = 768) were reserved as an internal validation set. The training set was used exclusively for feature selection, model development, and hyperparameter tuning, whereas the validation set was used only for final model evaluation.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Data collection</title>
<p>Variables were extracted from the electronic medical record (EMR) system. Demographic characteristics included female age, male age, female body mass index (BMI), male BMI, infertility duration, infertility type, female education, and male education. Clinical characteristics included menstrual regularity; basal follicle-stimulating hormone (FSH), luteinizing hormone (LH), estradiol (E2), progesterone (P), testosterone (T), prolactin (PRL), and anti-M&#xfc;llerian hormone (AMH). The outcome was clinical pregnancy, defined as the presence of a gestational sac with fetal cardiac activity in the uterine cavity on transvaginal ultrasound 28&#x2013;35 days after embryo transfer (<xref ref-type="bibr" rid="B24">24</xref>). Absence of a gestational sac or biochemical pregnancy was classified as non-clinical pregnancy.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Data preprocessing and feature selection</title>
<p>For missing data, multiple imputation was performed using the mice package in R (<xref ref-type="bibr" rid="B25">25</xref>). In this dataset, all candidate predictors exhibited low levels of missingness (&lt;5% in both the training and validation sets; <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table S1</bold></xref> and <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure S1</bold></xref>) (<xref ref-type="bibr" rid="B25">25</xref>). No participants were excluded due to missing non-imputable administrative or eligibility-defining variables (e.g., outcome follow-up), and therefore the final analytic cohort contained complete information on all eligibility-defining variables. Given the low proportion of missingness across predictors, multiple imputation was applied to all predictors to avoid unnecessary case deletion while minimizing potential instability associated with highly incomplete variables. Five imputed datasets were generated (m = 5, seed = 123). To avoid information leakage, imputation was conducted after the temporal split and performed separately within the training set and the temporal validation set. In the training set, the imputation model included all candidate predictors and the outcome variable in order to preserve predictor&#x2013;outcome associations for model development. In the validation set, the imputation model included only predictors and explicitly excluded the outcome variable, thereby preventing outcome-informed imputation during model evaluation. Predictive mean matching was used for continuous variables and logistic regression for binary variables, with 20 iterations per imputation. The predictor matrix followed the default mice setting in which predictors were allowed to inform each other, except where structurally inappropriate; specifically, the outcome variable was excluded from all validation-set imputation models, and administrative or eligibility-defining variables were not imputed. The full imputation methods and predictor matrix for the training set are reported in <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table S2</bold></xref> and <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure S2</bold></xref>. Feature selection was performed using the least absolute shrinkage and selection operator (LASSO) within the training set only (<xref ref-type="bibr" rid="B26">26</xref>). LASSO was implemented in R using the glmnet package with internal standardization (standardize = TRUE). LASSO was fitted separately within each of the five imputed training datasets, and predictors were retained in the final feature set if they showed stable selection at &#x3bb;<sub>1</sub>SE across imputations, operationalized as non-zero coefficients in at least four of the five imputed datasets (<xref ref-type="bibr" rid="B27">27</xref>). The penalty parameter &#x3bb; was selected using 10-fold cross-validation, and the minimum deviance occurred at &#x3bb;_min = 0.00406, while the 1-standard-error criterion selected &#x3bb;_1SE = 0.01801. For downstream machine learning analyses, continuous predictors were standardized to Z-scores using the StandardScaler function in the Python scikit-learn library (<xref ref-type="bibr" rid="B28">28</xref>), with scaling parameters learned from the training set and then applied to the validation set. Model development proceeded separately within each imputed training dataset. Each fitted model was then applied to each imputed validation dataset to generate predicted probabilities; for each individual, predicted probabilities were averaged across imputations to obtain pooled predictions. All performance metrics were calculated using these pooled predicted probabilities.</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Model construction and hyperparameter tuning</title>
<p>Seven commonly used machine learning algorithms were developed to predict pregnancy outcomes: logistic regression (LR), decision tree (DT), random forest (RF), support vector machine (SVM), artificial neural network (ANN), XGBoost, and LightGBM. Hyperparameters for each model were optimized using 5-fold cross-validation with grid search within the training set. To ensure comparability and avoid optimistic bias under multiple imputation, hyperparameter tuning was performed only once using the first imputed training dataset, with a fixed random seed (seed = 123) (<xref ref-type="bibr" rid="B29">29</xref>). The resulting optimal hyperparameters were then held constant and applied to all five imputed training datasets for model fitting. This strategy ensured that model complexity and tuning degrees of freedom were consistent across imputations while allowing uncertainty due to imputation to be reflected in model estimation.</p>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>Model evaluation and interpretation</title>
<p>Model performance was evaluated in the validation set using the area under the receiver operating characteristic curve (AUC), accuracy, sensitivity, specificity, positive predictive value (PPV), negative predictive value (NPV) and F1 score. For all classification metrics, predicted probabilities were dichotomized using a fixed operating threshold of 0.5, which was applied consistently across all models and datasets to ensure comparability.</p>
<p>Calibration was evaluated using the Brier score (<xref ref-type="bibr" rid="B30">30</xref>) and calibration curves constructed by grouping predicted probabilities into deciles and plotting observed versus predicted outcome probabilities. Clinical utility was examined using decision curve analysis (DCA) over a clinically plausible threshold probability range of 20&#x2013;80%, reflecting the range in which clinicians may reasonably consider counseling or intervention in the context of IVF/ICSI outcome prediction.</p>
<p>Models were fitted separately within each of the five imputed datasets. For each individual and each model, predicted probabilities were averaged across imputations to obtain a single pooled predicted probability. All discrimination metrics (including AUC), classification metrics, calibration analyses, and decision curve analyses were computed based on these pooled predicted probabilities.</p>
<p>The 95% confidence intervals for AUC were estimated using nonparametric bootstrap resampling (1,000 replications) applied to the pooled predicted probabilities within each dataset. Pairwise comparisons of AUCs between models were conducted using DeLong&#x2019;s test for correlated receiver operating characteristic curves based on ROC curves constructed from the pooled predicted probabilities. Specifically, the AUCs of each model were compared against those of LightGBM and logistic regression, respectively, and the corresponding P values were reported.</p>
<p>The representative model was then interpreted using SHAP to quantify the marginal contribution of each feature (<xref ref-type="bibr" rid="B31">31</xref>). SHAP summary plots, beeswarm plots, and dependence plots were generated to illustrate the model&#x2019;s decision patterns. SHAP values were computed using the standard interventional SHAP implementation provided by the SHAP Python package. We acknowledge that when predictors are correlated, feature attributions may be influenced by feature dependence, and therefore SHAP results should be interpreted as model-based associations rather than causal effects.</p>
</sec>
<sec id="s2_6">
<label>2.6</label>
<title>Statistical analysis</title>
<p>All analyses were performed using R (version 4.2.0) and Python (version 3.9.0). In R, data preprocessing, multiple imputation, descriptive analyses, dataset partitioning, and LASSO feature selection were conducted using standard statistical packages, including mice, caret, tableone, and glmnet. All machine learning model development, hyperparameter tuning, and performance evaluation were conducted in Python. Graphical analyses, including ROC curves, calibration curves, and decision curve analysis, were generated using matplotlib and ggplot2.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Baseline characteristics</title>
<p>Baseline characteristics of the total cohort, training set, and validation set are presented in <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>. The distributions of outcomes, demographic variables, and clinical characteristics were highly comparable between the training and validation sets. All standardized mean differences (SMDs) were below 0.25, with most variables showing SMDs &lt; 0.10, indicating good balance between the two datasets. Slight imbalances were observed for female age (SMD = 0.218) and AMH (SMD = 0.113), but the overall clinical characteristics remained broadly similar across the two subsets, supporting the appropriateness of the temporal split for model development and validation.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Baseline characteristics of the overall cohort and comparison between the training and validation sets.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Characteristics</th>
<th valign="middle" align="center">Total (n=2565)</th>
<th valign="middle" align="center">Training set (n=1797)</th>
<th valign="middle" align="center">Validation set (n=768)</th>
<th valign="middle" align="center"><italic>SMD<sup>1</sup></italic></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Outcomes (%)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.06</td>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;No Clinical pregnancy</td>
<td valign="middle" align="center">1413 (55.09)</td>
<td valign="middle" align="center">974 (54.20)</td>
<td valign="middle" align="center">439 (57.16)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;Clinical pregnancy</td>
<td valign="middle" align="center">1152 (44.91)</td>
<td valign="middle" align="center">823 (45.80)</td>
<td valign="middle" align="center">329 (42.84)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">female_education (%)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.078</td>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;lower education level</td>
<td valign="middle" align="center">624 (24.33)</td>
<td valign="middle" align="center">455 (25.32)</td>
<td valign="middle" align="center">169 (22.01)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;higher education level</td>
<td valign="middle" align="center">1941 (75.67)</td>
<td valign="middle" align="center">1342 (74.68)</td>
<td valign="middle" align="center">599 (77.99)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">male_education (%)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.066</td>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;lower education level</td>
<td valign="middle" align="center">618 (24.09)</td>
<td valign="middle" align="center">448 (24.93)</td>
<td valign="middle" align="center">170 (22.14)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;higher education level</td>
<td valign="middle" align="center">1947 (75.91)</td>
<td valign="middle" align="center">1349 (75.07)</td>
<td valign="middle" align="center">598 (77.86)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">infertility_type (%)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.057</td>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;primary infertility</td>
<td valign="middle" align="center">1695 (66.08)</td>
<td valign="middle" align="center">1173 (65.28)</td>
<td valign="middle" align="center">522 (67.97)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;secondary infertility</td>
<td valign="middle" align="center">870 (33.92)</td>
<td valign="middle" align="center">624 (34.72)</td>
<td valign="middle" align="center">246 (32.03)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">menstrual_pattern (%)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.002</td>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;regular menstrual cycle</td>
<td valign="middle" align="center">1869 (72.87)</td>
<td valign="middle" align="center">1309 (72.84)</td>
<td valign="middle" align="center">560 (72.92)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;irregular menstrual cycle</td>
<td valign="middle" align="center">696 (27.13)</td>
<td valign="middle" align="center">488 (27.16)</td>
<td valign="middle" align="center">208 (27.08)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">female_age</td>
<td valign="middle" align="center">32.00 (30.00, 35.00)</td>
<td valign="middle" align="center">32.00 [29.00, 34.00]</td>
<td valign="middle" align="center">33.00 [30.00, 35.00]</td>
<td valign="middle" align="center">0.218</td>
</tr>
<tr>
<td valign="middle" align="left">male_age</td>
<td valign="middle" align="center">33.00 (31.00, 36.00)</td>
<td valign="middle" align="center">33.00 [31.00, 36.00]</td>
<td valign="middle" align="center">34.00 [31.00, 37.00]</td>
<td valign="middle" align="center">0.159</td>
</tr>
<tr>
<td valign="middle" align="left">female_bmi</td>
<td valign="middle" align="center">21.70 (19.50, 24.10)</td>
<td valign="middle" align="center">21.60 [19.50, 24.10]</td>
<td valign="middle" align="center">21.75 [19.58, 24.20]</td>
<td valign="middle" align="center">0.076</td>
</tr>
<tr>
<td valign="middle" align="left">male_bmi</td>
<td valign="middle" align="center">25.10 (21.40, 28.30)</td>
<td valign="middle" align="center">25.10 [21.40, 28.10]</td>
<td valign="middle" align="center">25.20 [21.37, 28.63]</td>
<td valign="middle" align="center">0.048</td>
</tr>
<tr>
<td valign="middle" align="left">infertility_duration</td>
<td valign="middle" align="center">2.10 (1.10, 4.00)</td>
<td valign="middle" align="center">2.00 [1.00, 4.00]</td>
<td valign="middle" align="center">2.20 [1.20, 4.00]</td>
<td valign="middle" align="center">0.038</td>
</tr>
<tr>
<td valign="middle" align="left">AMH</td>
<td valign="middle" align="center">3.71 (2.13, 6.01)</td>
<td valign="middle" align="center">3.74 [2.16, 6.21]</td>
<td valign="middle" align="center">3.64 [1.93, 5.80]</td>
<td valign="middle" align="center">0.113</td>
</tr>
<tr>
<td valign="middle" align="left">FSH</td>
<td valign="middle" align="center">6.53 (5.27, 7.84)</td>
<td valign="middle" align="center">6.53 [5.28, 7.76]</td>
<td valign="middle" align="center">6.56 [5.27, 8.18]</td>
<td valign="middle" align="center">0.029</td>
</tr>
<tr>
<td valign="middle" align="left">LH</td>
<td valign="middle" align="center">4.19 (2.94, 5.82)</td>
<td valign="middle" align="center">4.20 [2.91, 5.94]</td>
<td valign="middle" align="center">4.19 [3.02, 5.80]</td>
<td valign="middle" align="center">0.051</td>
</tr>
<tr>
<td valign="middle" align="left">PRL</td>
<td valign="middle" align="center">12.00 (8.85, 16.74)</td>
<td valign="middle" align="center">12.35 [9.05, 17.60]</td>
<td valign="middle" align="center">11.91 [8.73, 16.39]</td>
<td valign="middle" align="center">0.078</td>
</tr>
<tr>
<td valign="middle" align="left">E2</td>
<td valign="middle" align="center">43.00 (32.00, 62.95)</td>
<td valign="middle" align="center">43.33 [32.63, 63.73]</td>
<td valign="middle" align="center">42.00 [31.20, 62.74]</td>
<td valign="middle" align="center">0.002</td>
</tr>
<tr>
<td valign="middle" align="left">T</td>
<td valign="middle" align="center">0.26 (0.20, 0.37)</td>
<td valign="middle" align="center">0.27 [0.20, 0.37]</td>
<td valign="middle" align="center">0.25 [0.19, 0.36]</td>
<td valign="middle" align="center">0.053</td>
</tr>
<tr>
<td valign="middle" align="left">P</td>
<td valign="middle" align="center">0.58 (0.42, 0.79)</td>
<td valign="middle" align="center">0.58 [0.42, 0.79]</td>
<td valign="middle" align="center">0.57 [0.41, 0.80]</td>
<td valign="middle" align="center">0.077</td>
</tr>
<tr>
<td valign="middle" align="left">AFC_total</td>
<td valign="middle" align="center">17.00 (12.00, 21.00)</td>
<td valign="middle" align="center">17.00 [12.00, 21.00]</td>
<td valign="middle" align="center">17.00 [11.75, 21.00]</td>
<td valign="middle" align="center">0.03</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p><sup>1</sup>Standardized mean difference.</p></fn>
<fn>
<p>Data are shown as median with interquartile range (IQR) for continuous variables and number with percentage for categorical variables.</p></fn>
<fn>
<p>AMH, anti-M&#xfc;llerian hormone; FSH, follicle-stimulating hormone; LH, luteinizing hormone; PRL, prolactin; E2, estradiol; T, testosterone; P, progesterone; AFC_total, total antral follicle count.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>LASSO regression feature selection</title>
<p>LASSO regression was applied to reduce multicollinearity and identify key predictors. As shown in <xref ref-type="fig" rid="f1"><bold>Figures&#xa0;1A, B</bold></xref>, the penalty parameter was selected using 10-fold cross-validation. The minimum cross-validated deviance occurred at &#x3bb;_min = 0.00406, while the 1-standard-error criterion selected &#x3bb;_1SE = 0.01801, yielding a parsimonious set of five predictors with non-zero coefficients. Across the five imputed training datasets, five predictors were consistently selected by LASSO at &#x3bb;<sub>1</sub>SE in at least four imputations: female BMI, male BMI, basal FSH, AMH, and female age. Variables such as menstrual regularity, infertility type, and education level were not stably selected and were therefore excluded. This stability-based selection suggests that spousal BMI and ovarian reserve markers constituted the most robust predictors within the available feature set.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Feature selection using the LASSO regression model. <bold>(A)</bold> LASSO Regression Model Factor Selection: Left dashed line represents the optimal lambda value (lambda_min), while the right dashed line marks the lambda value within one standard error of the optimal (lambda.1se). <bold>(B)</bold> LASSO regression model screening variable trajectories.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fendo-17-1772106-g001.tif">
<alt-text content-type="machine-generated">Panel A shows a graph of binomial deviance against log lambda, with red points along a curve that increases sharply. Panel B illustrates coefficients plotted against log lambda, with multiple colored lines diverging as lambda increases.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Machine learning model performance evaluation</title>
<p>The predictive performance of the seven models in the training and validation sets is summarized in <xref ref-type="table" rid="T2"><bold>Tables&#xa0;2</bold></xref> and <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table S3</bold></xref>, with ROC curves, calibration plots, and decision curve analyses shown in <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>, and confusion matrices presented in <xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>. In the training set, ensemble models (Random Forest, XGBoost, and LightGBM) achieved higher AUCs than other algorithms (AUCs 0.903&#x2013;0.923), whereas model performance became more comparable in the temporal validation set, with AUCs ranging narrowly from 0.840 to 0.857 across all models (<xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>, <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2B</bold></xref>). LightGBM, XGBoost, and Random Forest demonstrated almost identical discriminative ability (all AUC = 0.857). DeLong tests confirmed that there were no statistically significant differences in AUC between LightGBM and Random Forest (P = 0.918), XGBoost (P = 0.985), or Logistic Regression (P = 0.067).</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Predictive performance of seven models in the validation set.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Model</th>
<th valign="middle" align="center">AUC (95% CI)</th>
<th valign="middle" align="center">Delong test P (vs.LightGBM)</th>
<th valign="middle" align="center">Delong test P (vs.Logistic)</th>
<th valign="middle" align="center">Accuracy</th>
<th valign="middle" align="center">Precision</th>
<th valign="middle" align="center">Sensitivity</th>
<th valign="middle" align="center">Specificity</th>
<th valign="middle" align="center">F1 Score</th>
<th valign="middle" align="center">Kappa</th>
<th valign="middle" align="center">PPV</th>
<th valign="middle" align="center">NPV</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Logistic</td>
<td valign="middle" align="center">0.842 (0.814&#x2013;0.867)</td>
<td valign="middle" align="center">0.067</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">0.772</td>
<td valign="middle" align="center">0.760</td>
<td valign="middle" align="center">0.684</td>
<td valign="middle" align="center">0.838</td>
<td valign="middle" align="center">0.720</td>
<td valign="middle" align="center">0.529</td>
<td valign="middle" align="center">0.760</td>
<td valign="middle" align="center">0.780</td>
</tr>
<tr>
<td valign="middle" align="left">Decision Tree</td>
<td valign="middle" align="center">0.840 (0.811&#x2013;0.867)</td>
<td valign="middle" align="center">0.027</td>
<td valign="middle" align="center">0.877</td>
<td valign="middle" align="center">0.788</td>
<td valign="middle" align="center">0.867</td>
<td valign="middle" align="center">0.596</td>
<td valign="middle" align="center">0.932</td>
<td valign="middle" align="center">0.706</td>
<td valign="middle" align="center">0.549</td>
<td valign="middle" align="center">0.867</td>
<td valign="middle" align="center">0.755</td>
</tr>
<tr>
<td valign="middle" align="left">Random Forest</td>
<td valign="middle" align="center">0.857 (0.829&#x2013;0.882)</td>
<td valign="middle" align="center">0.918</td>
<td valign="middle" align="center">0.090</td>
<td valign="middle" align="center">0.787</td>
<td valign="middle" align="center">0.857</td>
<td valign="middle" align="center">0.602</td>
<td valign="middle" align="center">0.925</td>
<td valign="middle" align="center">0.707</td>
<td valign="middle" align="center">0.547</td>
<td valign="middle" align="center">0.857</td>
<td valign="middle" align="center">0.756</td>
</tr>
<tr>
<td valign="middle" align="left">XGBoost</td>
<td valign="middle" align="center">0.857 (0.831&#x2013;0.882)</td>
<td valign="middle" align="center">0.985</td>
<td valign="middle" align="center">0.075</td>
<td valign="middle" align="center">0.780</td>
<td valign="middle" align="center">0.820</td>
<td valign="middle" align="center">0.623</td>
<td valign="middle" align="center">0.898</td>
<td valign="middle" align="center">0.708</td>
<td valign="middle" align="center">0.537</td>
<td valign="middle" align="center">0.820</td>
<td valign="middle" align="center">0.761</td>
</tr>
<tr>
<td valign="middle" align="left">LightGBM</td>
<td valign="middle" align="center">0.857 (0.830&#x2013;0.882)</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">0.067</td>
<td valign="middle" align="center">0.775</td>
<td valign="middle" align="center">0.831</td>
<td valign="middle" align="center">0.596</td>
<td valign="middle" align="center">0.909</td>
<td valign="middle" align="center">0.694</td>
<td valign="middle" align="center">0.523</td>
<td valign="middle" align="center">0.831</td>
<td valign="middle" align="center">0.750</td>
</tr>
<tr>
<td valign="middle" align="left">SVM</td>
<td valign="middle" align="center">0.841 (0.813&#x2013;0.867)</td>
<td valign="middle" align="center">0.051</td>
<td valign="middle" align="center">0.430</td>
<td valign="middle" align="center">0.762</td>
<td valign="middle" align="center">0.742</td>
<td valign="middle" align="center">0.681</td>
<td valign="middle" align="center">0.822</td>
<td valign="middle" align="center">0.710</td>
<td valign="middle" align="center">0.508</td>
<td valign="middle" align="center">0.742</td>
<td valign="middle" align="center">0.775</td>
</tr>
<tr>
<td valign="middle" align="left">ANN</td>
<td valign="middle" align="center">0.845 (0.816&#x2013;0.871)</td>
<td valign="middle" align="center">0.163</td>
<td valign="middle" align="center">0.469</td>
<td valign="middle" align="center">0.776</td>
<td valign="middle" align="center">0.796</td>
<td valign="middle" align="center">0.641</td>
<td valign="middle" align="center">0.877</td>
<td valign="middle" align="center">0.710</td>
<td valign="middle" align="center">0.531</td>
<td valign="middle" align="center">0.796</td>
<td valign="middle" align="center">0.765</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>AUC, Area Under the ROC Curve; PPV, Positive Predictive Value; NPV, Negative Predictive Value.</p></fn>
</table-wrap-foot>
</table-wrap>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Performance evaluation of seven machine learning models in the training and validation sets. Receiver operating characteristic (ROC) curves for the training set <bold>(A)</bold> and validation set <bold>(B)</bold>. Decision curve analysis (DCA) for the training set <bold>(C)</bold> and validation set <bold>(D)</bold>. Calibration curves for the training set <bold>(E)</bold> and validation set <bold>(F)</bold>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fendo-17-1772106-g002.tif">
<alt-text content-type="machine-generated">Panel A shows ROC curves for training set model comparison with multiple models plotted, where logistic regression achieves the highest AUC of 0.836. Panel B displays ROC curves for validation set model comparison with logistic regression also having the highest AUC of 0.841. Panel C presents decision curve analysis for training set models, while Panel D shows it for validation set models. Panels E and F illustrate calibration curves for training and validation sets, respectively, highlighting the logistic model with a Brier score of 0.044 for training and 0.048 for validation. Each panel compares several machine learning models.</alt-text>
</graphic></fig>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Confusion matrix heatmaps of machine learning models in the validation set. <bold>(A)</bold> LightGBM; <bold>(B)</bold> Logistic Regression; <bold>(C)</bold> XGBoost; <bold>(D)</bold> Random Forest; <bold>(E)</bold> Decision Tree; <bold>(F)</bold> Support Vector Machine (SVM); <bold>(G)</bold> Artificial Neural Network (ANN).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fendo-17-1772106-g003.tif">
<alt-text content-type="machine-generated">Seven confusion matrices comparing different machine learning models for clinical pregnancy prediction. A: LightGBM, B: Logistic, C: DecisionTree, D: ANN, E: SVM, F: XGBoost, G: RandomForest. Each matrix displays true positives, true negatives, false positives, and false negatives with percentage values, visualized in a blue color gradient.</alt-text>
</graphic></fig>
<p>Beyond discrimination, LightGBM showed a balanced performance profile in the validation set, with an accuracy of 0.775, high specificity of 0.909 and moderate sensitivity (0.596). Calibration analysis suggested reasonable agreement between predicted and observed risks (Brier score = 0.145; <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2F</bold></xref>), and decision curve analysis indicated net clinical benefit across a range of clinically plausible threshold probabilities (<xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2D</bold></xref>). Taken together, LightGBM was selected as the representative model for subsequent interpretability analyses due to its overall stability across discrimination, calibration, and clinical utility, rather than on statistically superior AUC alone.</p>
<p>Analysis of confusion matrices (<xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>) further illustrated model behavior. LightGBM achieved a high true-negative rate (92.20%), reflecting strong specificity, while maintaining a sensitivity comparable to other models. From a clinical perspective, this tendency to limit false-positive predictions may be advantageous for avoiding overly optimistic prognostic assessments in couples with low likelihood of pregnancy.</p>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Model interpretability analysis</title>
<p>To enhance interpretability of the selected model, SHAP was applied to visualize the contribution of individual predictors to model output (<xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref>). The SHAP bar plot (<xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4A</bold></xref>), based on mean absolute SHAP values, indicated that male BMI and female BMI showed the highest average contributions within the fitted model and the available feature set, followed by basal FSH, AMH, and female age. The SHAP beeswarm plot (<xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4B</bold></xref>) further illustrated the direction of these associations: higher BMI values in either partner (red points) were predominantly located on the negative side of the x-axis, suggesting that higher BMI was associated with lower predicted probability of clinical pregnancy in the model output.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>LightGBM model explanation by the SHAP method. <bold>(A)</bold> Bar chart of the all features. <bold>(B)</bold> Beeswarm plot. <bold>(C)</bold> Force plot for one non-pregnant patient. <bold>(D)</bold> SHAP dependency plot of features in the LightGBM model.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fendo-17-1772106-g004.tif">
<alt-text content-type="machine-generated">Charts showing SHAP analysis results. A: Bar chart of mean SHAP values highlighting feature importance, with female and male BMI as major factors. B: Bee swarm plot displaying impact on the model output by BMI, FSH, AMH, and female age. C: Waterfall chart illustrating cumulative contribution of features like AMH and BMI to the model output. D: Scatter plots depicting SHAP values against individual features for female age, male and female BMI, AMH, and FSH, showing non-linear relationships and feature value colors.</alt-text>
</graphic></fig>
<p>The SHAP force plot (<xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4C</bold></xref>) presents an illustrative individual case, showing how each feature contributed to shifting the prediction from the baseline toward a lower probability of pregnancy. In this example, elevated male BMI and older female age exerted negative contributions that outweighed the positive contribution of AMH. This visualization demonstrates how the model integrates multiple features to generate a personalized prediction, while reflecting model behavior rather than biological causation.</p>
<p>Finally, the SHAP dependence plots (<xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4D</bold></xref>) suggested nonlinear relationships between predictors and model output. Both female BMI and male BMI showed a threshold-like pattern: SHAP values remained relatively neutral within the lower range but declined sharply once BMI exceeded approximately the upper-normal range. AMH demonstrated a modest positive association at low-to-moderate levels, while higher FSH and increasing female age were associated with progressively negative SHAP values. These patterns reflect how the fitted model utilizes these predictors and should be interpreted as model-based associations rather than evidence of specific biological thresholds.</p>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>This study developed and validated a prediction model for IVF/ICSI pregnancy outcomes in couples with male factor infertility using a single-center, large-sample retrospective cohort and the LightGBM algorithm. Compared with conventional logistic regression (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table S4</bold></xref>), ensemble tree&#x2013;based methods may offer theoretical flexibility, although discrimination was comparable across models in our study. In the validation set, LightGBM showed an AUC of 0.857 and a specificity of 90.9%. SHAP-based interpretation suggested that, within the fitted model and conditional on the available predictors, spousal body mass index (BMI) exhibited relatively larger contributions to model predictions than traditional ovarian reserve indicators such as basal FSH, AMH, and female age. Importantly, these findings reflect model-based associations rather than causal effects. Nevertheless, the results suggest that, in the clinical context of impaired sperm quality, couple-level metabolic characteristics may represent an underappreciated dimension in prognostic assessment. These findings complement existing frameworks that traditionally emphasize ovarian reserve (<xref ref-type="bibr" rid="B32">32</xref>, <xref ref-type="bibr" rid="B33">33</xref>).</p>
<p>This observation is biologically plausible within the pathophysiological context of male factor infertility and may generate hypotheses for future research. Based on existing literature, we propose a conceptual &#x201c;two-hit&#x201d; hypothesis as a possible interpretive framework rather than a conclusion supported directly by our data. First, patients with oligozoospermia, asthenozoospermia, or teratozoospermia frequently show increased sperm DNA fragmentation and aberrant epigenetic alterations, and obesity-associated systemic oxidative stress in males may further exacerbate these abnormalities (<xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B35">35</xref>). Although ICSI can bypass physical barriers to fertilization, it does not rectify molecular defects carried by sperm, which may lead to embryos with reduced developmental competence, constituting the first hit (<xref ref-type="bibr" rid="B36">36</xref>). Second, when female BMI exceeds a threshold, obesity-related chronic low-grade inflammation may alter endometrial gene expression and compromise receptivity and decidualization, representing the second hit (<xref ref-type="bibr" rid="B37">37</xref>&#x2013;<xref ref-type="bibr" rid="B39">39</xref>). Importantly, the present study did not directly measure sperm DNA fragmentation, epigenetic alterations, or endometrial receptivity. Therefore, this conceptual framework should be regarded as hypothesis-generating and requires validation in future mechanistic and experimental studies.</p>
<p>Although multiple imputation was used to address missing data, performance metrics and statistical tests were primarily derived from pooled predicted probabilities rather than from fully Rubin-combined estimates across imputations. This approach may underestimate uncertainty because between-imputation variability is not fully propagated. However, given the very low proportion of missingness (&lt;5% for all predictors), the impact of this limitation is likely modest. Future studies with higher levels of missingness should consider fully nested bootstrap&#x2013;imputation procedures to provide more rigorous uncertainty quantification.</p>
<p>In the SHAP dependence plots, a gradual decline in SHAP values was observed as BMI increased, with a more apparent decrease beyond approximately 24&#x2013;25 kg/m&#xb2;. This apparent threshold should be interpreted with caution for several reasons and does not imply a clinically actionable cutoff or an intervention threshold. First, the value was derived from visual inspection of SHAP-based plots and reflects model behavior under correlated predictors rather than a clinically or statistically validated boundary. We did not apply formal methods for threshold identification, such as spline-based regression, uncertainty-aware partial dependence analysis, or analyses based on prespecified BMI categories. Second, the observed value closely corresponds to the Chinese definition of overweight (BMI &#x2265;24 kg/m&#xb2;), indicating that this pattern may partly reflect population-specific characteristics. Therefore, the generalizability of this threshold beyond the present cohort remains uncertain and warrants validation in external populations using alternative BMI classification standards.</p>
<p>These findings may have potential clinical implications, but they should be interpreted with appropriate caution. Rather than advocating a change in clinical practice, our results highlight the possible value of considering couple-level metabolic health alongside traditional ovarian-centered assessments (<xref ref-type="bibr" rid="B40">40</xref>). In current practice, clinical efforts often focus on optimizing ovarian stimulation to increase oocyte yield. Our model suggests that metabolic factors may contribute to prognostic stratification and may be useful during patient counseling. However, whether targeted preconception interventions, such as weight reduction&#x2014;particularly in the male partner&#x2014;lead to improved ART outcomes remains uncertain and requires confirmation in prospective interventional studies. Therefore, BMI should be regarded as a potentially informative predictive marker in this dataset rather than a basis for mandatory treatment delay or universal prioritization of weight intervention (<xref ref-type="bibr" rid="B41">41</xref>).</p>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusion</title>
<p>A LightGBM-based model demonstrated reasonable predictive performance for IVF/ICSI pregnancy outcomes in couples with male factor infertility, with relatively high specificity in the validation set. Model interpretation suggested that, within the fitted model and available feature set, couple-level metabolic characteristics were associated with predicted outcomes alongside traditional ovarian reserve markers. These findings represent predictive associations rather than causal effects. BMI may serve as a potentially informative prognostic feature for counseling and risk stratification in this population, while the clinical benefit of targeted metabolic interventions requires confirmation in prospective and interventional studies.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p></sec>
<sec id="s7" sec-type="ethics-statement">
<title>Ethics statement</title>
<p>The studies involving humans were approved by institutional ethics committee of Shanghai First Maternity and Infant Hospital. The studies were conducted in accordance with the local legislation and institutional requirements. The ethics committee/institutional review board waived the requirement of written informed consent for participation from the participants or the participants&#x2019; legal guardians/next of kin. The requirement for informed consent was waived due to the retrospective nature of the study design and the use of de-identified patient information.</p></sec>
<sec id="s8" sec-type="author-contributions">
<title>Author contributions</title>
<p>HL: Investigation, Software, Writing &#x2013; original draft. JG: Data curation, Validation, Visualization, Writing &#x2013; original draft. YL: Conceptualization, Writing &#x2013; review &amp; editing.</p></sec>
<ack>
<title>Acknowledgments</title>
<p>The authors would like to acknowledge the helpful suggestions concerning this study received from their colleagues.</p>
</ack>
<sec id="s10" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s11" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s12" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<sec id="s13" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fendo.2026.1772106/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fendo.2026.1772106/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="DataSheet1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Serafini</surname> <given-names>S</given-names></name>
<name><surname>O&#x2019;Flaherty</surname> <given-names>C</given-names></name>
</person-group>. 
<article-title>Dysregulation of sphingolipid and cholesterol homeostasis imposes oxidative stress in human spermatozoa</article-title>. <source>Redox Biol</source>. (<year>2025</year>) <volume>84</volume>:<fpage>103669</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.redox.2025.103669</pub-id>, PMID: <pub-id pub-id-type="pmid">40435557</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<label>2</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jin</surname> <given-names>Z-R</given-names></name>
<name><surname>Fang</surname> <given-names>D</given-names></name>
<name><surname>Liu</surname> <given-names>B-H</given-names></name>
<name><surname>Cai</surname> <given-names>J</given-names></name>
<name><surname>Tang</surname> <given-names>W-H</given-names></name>
<name><surname>Jiang</surname> <given-names>H</given-names></name>
<etal/>
</person-group>. 
<article-title>Roles of CatSper channels in the pathogenesis of asthenozoospermia and the therapeutic effects of acupuncture-like treatment on asthenozoospermia</article-title>. <source>Theranostics</source>. (<year>2021</year>) <volume>11</volume>:<page-range>2822&#x2013;44</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.7150/thno.51869</pub-id>, PMID: <pub-id pub-id-type="pmid">33456575</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<label>3</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Minhas</surname> <given-names>S</given-names></name>
<name><surname>Bettocchi</surname> <given-names>C</given-names></name>
<name><surname>Boeri</surname> <given-names>L</given-names></name>
<name><surname>Capogrosso</surname> <given-names>P</given-names></name>
<name><surname>Carvalho</surname> <given-names>J</given-names></name>
<name><surname>Cilesiz</surname> <given-names>NC</given-names></name>
<etal/>
</person-group>. 
<article-title>European association of urology guidelines on male sexual and reproductive health: 2021 update on male infertility</article-title>. <source>Eur Urol</source>. (<year>2021</year>) <volume>80</volume>:<page-range>603&#x2013;20</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.eururo.2021.08.014</pub-id>, PMID: <pub-id pub-id-type="pmid">34511305</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<label>4</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Kobayashi</surname> <given-names>N</given-names></name>
<name><surname>Miyauchi</surname> <given-names>N</given-names></name>
<name><surname>Tatsuta</surname> <given-names>N</given-names></name>
<name><surname>Kitamura</surname> <given-names>A</given-names></name>
<name><surname>Okae</surname> <given-names>H</given-names></name>
<name><surname>Hiura</surname> <given-names>H</given-names></name>
<etal/>
</person-group>. 
<article-title>Factors associated with aberrant imprint methylation and oligozoospermia</article-title>. <source>Sci Rep</source>. (<year>2017</year>) <volume>7</volume>:<fpage>42336</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/srep42336</pub-id>, PMID: <pub-id pub-id-type="pmid">28186187</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<label>5</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Kirkegaard</surname> <given-names>K</given-names></name>
<name><surname>Sundvall</surname> <given-names>L</given-names></name>
<name><surname>Erlandsen</surname> <given-names>M</given-names></name>
<name><surname>Hindkj&#xe6;r</surname> <given-names>JJ</given-names></name>
<name><surname>Knudsen</surname> <given-names>UB</given-names></name>
<name><surname>Ingerslev</surname> <given-names>HJ</given-names></name>
</person-group>. 
<article-title>Timing of human preimplantation embryonic development is confounded by embryo origin</article-title>. <source>Hum Reprod (Oxford England)</source>. (<year>2015</year>) <volume>31</volume>:<page-range>324&#x2013;31</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/humrep/dev296</pub-id>, PMID: <pub-id pub-id-type="pmid">26637491</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<label>6</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Adeniyi</surname> <given-names>T</given-names></name>
<name><surname>Horne</surname> <given-names>G</given-names></name>
<name><surname>Ruane</surname> <given-names>PT</given-names></name>
<name><surname>Brison</surname> <given-names>DR</given-names></name>
<name><surname>Roberts</surname> <given-names>SA</given-names></name>
</person-group>. 
<article-title>Clinical efficacy of hyaluronate-containing embryo transfer medium in IVF/ICSI treatment cycles: a cohort study</article-title>. <source>Hum Reprod Open</source>. (<year>2021</year>) <volume>2021</volume>:<fpage>hoab004</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/hropen/hoab004</pub-id>, PMID: <pub-id pub-id-type="pmid">33718621</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<label>7</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mantikou</surname> <given-names>E</given-names></name>
<name><surname>Youssef</surname> <given-names>MAFM</given-names></name>
<name><surname>van Wely</surname> <given-names>M</given-names></name>
<name><surname>van der Veen</surname> <given-names>F</given-names></name>
<name><surname>Al-Inany</surname> <given-names>HG</given-names></name>
<name><surname>Repping</surname> <given-names>S</given-names></name>
<etal/>
</person-group>. 
<article-title>Embryo culture media and IVF/ICSI success rates: a systematic review</article-title>. <source>Hum Reprod Update</source>. (<year>2013</year>) <volume>19</volume>:<page-range>210&#x2013;20</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/humupd/dms061</pub-id>, PMID: <pub-id pub-id-type="pmid">23385469</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<label>8</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Foong</surname> <given-names>SC</given-names></name>
<name><surname>Fleetham</surname> <given-names>JA</given-names></name>
<name><surname>O&#x2019;Keane</surname> <given-names>JA</given-names></name>
<name><surname>Scott</surname> <given-names>SG</given-names></name>
<name><surname>Tough</surname> <given-names>SC</given-names></name>
<name><surname>Greene</surname> <given-names>CA</given-names></name>
</person-group>. 
<article-title>A prospective randomized trial of conventional <italic>in vitro</italic> fertilization versus intracytoplasmic sperm injection in unexplained infertility</article-title>. <source>J Assisted Reprod Genet</source>. (<year>2006</year>) <volume>23</volume>:<page-range>137&#x2013;40</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10815-005-9008-y</pub-id>, PMID: <pub-id pub-id-type="pmid">16622804</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<label>9</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Connolly</surname> <given-names>MP</given-names></name>
<name><surname>Hoorens</surname> <given-names>S</given-names></name>
<name><surname>Chambers</surname> <given-names>GM</given-names></name>
</person-group>. 
<article-title>The costs and consequences of assisted reproductive technology: an economic perspective</article-title>. <source>Hum Reprod Update</source>. (<year>2010</year>) <volume>16</volume>:<page-range>603&#x2013;13</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/humupd/dmq013</pub-id>, PMID: <pub-id pub-id-type="pmid">20530804</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<label>10</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zou</surname> <given-names>K</given-names></name>
<name><surname>Wang</surname> <given-names>J</given-names></name>
<name><surname>Bi</surname> <given-names>H</given-names></name>
<name><surname>Zhang</surname> <given-names>Y</given-names></name>
<name><surname>Tian</surname> <given-names>X</given-names></name>
<name><surname>Tian</surname> <given-names>N</given-names></name>
<etal/>
</person-group>. 
<article-title>Comparison of different <italic>in vitro</italic> differentiation conditions for murine female germline stem cells</article-title>. <source>Cell Prolif</source>. (<year>2018</year>) <volume>52</volume>:<elocation-id>e12530</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/cpr.12530</pub-id>, PMID: <pub-id pub-id-type="pmid">30334302</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<label>11</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Leushuis</surname> <given-names>E</given-names></name>
<name><surname>van der Steeg</surname> <given-names>JW</given-names></name>
<name><surname>Steures</surname> <given-names>P</given-names></name>
<name><surname>Bossuyt</surname> <given-names>PMM</given-names></name>
<name><surname>Eijkemans</surname> <given-names>MJC</given-names></name>
<name><surname>van der Veen</surname> <given-names>F</given-names></name>
<etal/>
</person-group>. 
<article-title>Prediction models in reproductive medicine: a critical appraisal</article-title>. <source>Hum Reprod Update</source>. (<year>2009</year>) <volume>15</volume>:<page-range>537&#x2013;52</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/humupd/dmp013</pub-id>, PMID: <pub-id pub-id-type="pmid">19435779</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<label>12</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Templeton</surname> <given-names>A</given-names></name>
<name><surname>Morris</surname> <given-names>JK</given-names></name>
<name><surname>Parslow</surname> <given-names>W</given-names></name>
</person-group>. 
<article-title>Factors that affect outcome of <italic>in-vitro</italic> fertilisation treatment</article-title>. <source>Lancet</source>. (<year>1996</year>) <volume>348</volume>:<page-range>1402&#x2013;6</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/S0140-6736(96)05291-9</pub-id>, PMID: <pub-id pub-id-type="pmid">8937279</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<label>13</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Nelson</surname> <given-names>SM</given-names></name>
<name><surname>Lawlor</surname> <given-names>DA</given-names></name>
</person-group>. 
<article-title>Predicting live birth, preterm delivery, and low birth weight in infants born from <italic>in vitro</italic> fertilisation: a prospective study of 144,018 treatment cycles</article-title>. <source>PloS Med</source>. (<year>2011</year>) <volume>8</volume>:<elocation-id>e1000386</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pmed.1000386</pub-id>, PMID: <pub-id pub-id-type="pmid">21245905</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<label>14</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Campbell</surname> <given-names>JM</given-names></name>
<name><surname>Lane</surname> <given-names>M</given-names></name>
<name><surname>Owens</surname> <given-names>JA</given-names></name>
<name><surname>Bakos</surname> <given-names>HW</given-names></name>
</person-group>. 
<article-title>Paternal obesity negatively affects male fertility and assisted reproduction outcomes: a systematic review and meta-analysis</article-title>. <source>Reprod Biomedicine Online</source>. (<year>2015</year>) <volume>31</volume>:<fpage>593</fpage>&#x2013;<lpage>604</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.rbmo.2015.07.012</pub-id>, PMID: <pub-id pub-id-type="pmid">26380863</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<label>15</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Huang</surname> <given-names>S</given-names></name>
<name><surname>Tuerganbayi</surname> <given-names>K</given-names></name>
<name><surname>Wang</surname> <given-names>J</given-names></name>
<name><surname>Saad</surname> <given-names>SH</given-names></name>
<name><surname>Zhang</surname> <given-names>J</given-names></name>
<name><surname>Zou</surname> <given-names>J</given-names></name>
<etal/>
</person-group>. 
<article-title>Machine learning-based preliminary screening tool for clinical pregnancy prediction: towards management of IVF/ICSI stages</article-title>. <source>Ann Med</source>. (<year>2025</year>) <volume>57</volume>:<fpage>2582245</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/07853890.2025.2582245</pub-id>, PMID: <pub-id pub-id-type="pmid">41243616</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<label>16</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Leijdekkers</surname> <given-names>JA</given-names></name>
<name><surname>Eijkemans</surname> <given-names>MJC</given-names></name>
<name><surname>van Tilborg</surname> <given-names>TC</given-names></name>
<name><surname>Oudshoorn</surname> <given-names>SC</given-names></name>
<name><surname>McLernon</surname> <given-names>DJ</given-names></name>
<name><surname>Bhattacharya</surname> <given-names>S</given-names></name>
<etal/>
</person-group>. 
<article-title>Predicting the cumulative chance of live birth over multiple complete cycles of <italic>in vitro</italic> fertilization: an external validation study</article-title>. <source>Hum Reprod (Oxford England)</source>. (<year>2018</year>) <volume>33</volume>:<page-range>1684&#x2013;95</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/humrep/dey263</pub-id>, PMID: <pub-id pub-id-type="pmid">30085143</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<label>17</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bzdok</surname> <given-names>D</given-names></name>
<name><surname>Altman</surname> <given-names>N</given-names></name>
<name><surname>Krzywinski</surname> <given-names>M</given-names></name>
</person-group>. 
<article-title>Statistics versus machine learning</article-title>. <source>Nat Methods</source>. (<year>2018</year>) <volume>15</volume>:<page-range>233&#x2013;4</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nmeth.4642</pub-id>, PMID: <pub-id pub-id-type="pmid">30100822</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<label>18</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Deo</surname> <given-names>RC</given-names></name>
</person-group>. 
<article-title>Machine learning in medicine</article-title>. <source>Circulation</source>. (<year>2015</year>) <volume>132</volume>:<page-range>1920&#x2013;30</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1161/CIRCULATIONAHA.115.001593</pub-id>, PMID: <pub-id pub-id-type="pmid">26572668</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<label>19</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>J</given-names></name>
<name><surname>Chen</surname> <given-names>R</given-names></name>
<name><surname>Long</surname> <given-names>H</given-names></name>
<name><surname>He</surname> <given-names>J</given-names></name>
<name><surname>Tang</surname> <given-names>M</given-names></name>
<name><surname>Su</surname> <given-names>M</given-names></name>
<etal/>
</person-group>. 
<article-title>Artificial intelligence in polycystic ovarian syndrome management: past, present, and future</article-title>. <source>Radiol Med</source>. (<year>2025</year>) <volume>130</volume>:<page-range>1409&#x2013;41</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11547-025-02032-9</pub-id>, PMID: <pub-id pub-id-type="pmid">40549330</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<label>20</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>London</surname> <given-names>AJ</given-names></name>
</person-group>. 
<article-title>Artificial intelligence and black-box medical decisions: accuracy versus explainability</article-title>. <source>Hastings Cent Rep</source>. (<year>2019</year>) <volume>49</volume>:<fpage>15</fpage>&#x2013;<lpage>21</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/hast.973</pub-id>, PMID: <pub-id pub-id-type="pmid">30790315</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<label>21</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Amann</surname> <given-names>J</given-names></name>
<name><surname>Blasimme</surname> <given-names>A</given-names></name>
<name><surname>Vayena</surname> <given-names>E</given-names></name>
<name><surname>Frey</surname> <given-names>D</given-names></name>
<name><surname>Madai</surname> <given-names>VI</given-names></name>
</person-group>. 
<article-title>Explainability for artificial intelligence in healthcare: a multidisciplinary perspective</article-title>. <source>BMC Med Inf Decision Making</source>. (<year>2020</year>) <volume>20</volume>:<fpage>310</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12911-020-01332-6</pub-id>, PMID: <pub-id pub-id-type="pmid">33256715</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<label>22</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Cooper</surname> <given-names>TG</given-names></name>
<name><surname>Noonan</surname> <given-names>E</given-names></name>
<name><surname>von Eckardstein</surname> <given-names>S</given-names></name>
<name><surname>Auger</surname> <given-names>J</given-names></name>
<name><surname>Baker</surname> <given-names>HWG</given-names></name>
<name><surname>Behre</surname> <given-names>HM</given-names></name>
<etal/>
</person-group>. 
<article-title>World Health Organization reference values for human semen characteristics</article-title>. <source>Hum Reprod Update</source>. (<year>2009</year>) <volume>16</volume>:<page-range>231&#x2013;45</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/humupd/dmp048</pub-id>, PMID: <pub-id pub-id-type="pmid">19934213</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<label>23</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Collins</surname> <given-names>GS</given-names></name>
<name><surname>Reitsma</surname> <given-names>JB</given-names></name>
<name><surname>Altman</surname> <given-names>DG</given-names></name>
<name><surname>Moons</surname> <given-names>KGM</given-names></name>
</person-group>. 
<article-title>Transparent Reporting of a multivariable prediction model for Individual Prognosis Or Diagnosis (TRIPOD): The TRIPOD Statement</article-title>. <source>Ann Internal Med</source>. (<year>2015</year>) <volume>162</volume>:<fpage>55</fpage>&#x2013;<lpage>63</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.7326/M14-0697</pub-id>, PMID: <pub-id pub-id-type="pmid">25560714</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<label>24</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhai</surname> <given-names>J</given-names></name>
<name><surname>Li</surname> <given-names>S</given-names></name>
<name><surname>Zhu</surname> <given-names>Y</given-names></name>
<name><surname>Sun</surname> <given-names>Y</given-names></name>
<name><surname>Chen</surname> <given-names>Z-J</given-names></name>
<name><surname>Du</surname> <given-names>Y</given-names></name>
</person-group>. 
<article-title>Serum sex hormone binding globulin concentration as a predictor of ovarian response during controlled ovarian hyperstimulation</article-title>. <source>Front In Med</source>. (<year>2021</year>) <volume>8</volume>:<elocation-id>719818</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fmed.2021.719818</pub-id>, PMID: <pub-id pub-id-type="pmid">34805198</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<label>25</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>van Buuren</surname> <given-names>S</given-names></name>
<name><surname>Groothuis-Oudshoorn</surname> <given-names>K</given-names></name>
</person-group>. 
<article-title>mice: multivariate imputation by chained equations in R</article-title>. <source>J Stat Software</source>. (<year>2011</year>) <volume>45</volume>:<fpage>1</fpage>&#x2013;<lpage>67</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.18637/jss.v045.i03</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<label>26</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>K</given-names></name>
<name><surname>Xiong</surname> <given-names>W</given-names></name>
<name><surname>Duan</surname> <given-names>X</given-names></name>
<name><surname>Li</surname> <given-names>Q</given-names></name>
<name><surname>Ren</surname> <given-names>P</given-names></name>
<name><surname>Ye</surname> <given-names>H</given-names></name>
<etal/>
</person-group>. 
<article-title>A nomogram based on autoantibodies for noninvasive detection of AFP-negative hepatocellular carcinoma: a multicenter study</article-title>. <source>Br J Cancer</source>. (<year>2025</year>) <volume>133</volume>:<page-range>1896&#x2013;906</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41416-025-03215-x</pub-id>, PMID: <pub-id pub-id-type="pmid">41039018</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<label>27</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Rhodes</surname> <given-names>CJ</given-names></name>
<name><surname>Otero-N&#xfa;&#xf1;ez</surname> <given-names>P</given-names></name>
<name><surname>Wharton</surname> <given-names>J</given-names></name>
<name><surname>Swietlik</surname> <given-names>EM</given-names></name>
<name><surname>Kariotis</surname> <given-names>S</given-names></name>
<name><surname>Harbaum</surname> <given-names>L</given-names></name>
<etal/>
</person-group>. 
<article-title>Whole-blood RNA profiles associated with pulmonary arterial hypertension and clinical outcome</article-title>. <source>Am J Respir Crit Care Med</source>. (<year>2020</year>) <volume>202</volume>:<page-range>586&#x2013;94</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1164/rccm.202003-0510OC</pub-id>, PMID: <pub-id pub-id-type="pmid">32352834</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<label>28</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Pedregosa</surname> <given-names>F</given-names></name>
<name><surname>Varoquaux</surname> <given-names>G</given-names></name>
<name><surname>Gramfort</surname> <given-names>A</given-names></name>
<name><surname>Michel</surname> <given-names>V</given-names></name>
<name><surname>Thirion</surname> <given-names>B</given-names></name>
<name><surname>Grisel</surname> <given-names>O</given-names></name>
<etal/>
</person-group>. 
<article-title>Scikit-learn: machine learning in python</article-title>. <source>J Mach Learn Res</source>. (<year>2011</year>) <volume>12</volume>:<page-range>2825&#x2013;30</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.5555/1953048.2078195</pub-id>, PMID: <pub-id pub-id-type="pmid">34820480</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<label>29</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hu</surname> <given-names>D</given-names></name>
<name><surname>Li</surname> <given-names>Y</given-names></name>
<name><surname>Zhang</surname> <given-names>D</given-names></name>
<name><surname>Ding</surname> <given-names>J</given-names></name>
<name><surname>Song</surname> <given-names>Z</given-names></name>
<name><surname>Min</surname> <given-names>J</given-names></name>
<etal/>
</person-group>. 
<article-title>Genetic trade-offs between complex diseases and longevity</article-title>. <source>Aging Cell</source>. (<year>2022</year>) <volume>21</volume>:<elocation-id>e13654</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/acel.13654</pub-id>, PMID: <pub-id pub-id-type="pmid">35754110</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<label>30</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bai</surname> <given-names>Y</given-names></name>
<name><surname>Lei</surname> <given-names>C</given-names></name>
<name><surname>Zhang</surname> <given-names>N</given-names></name>
<name><surname>Liu</surname> <given-names>Y</given-names></name>
<name><surname>Hu</surname> <given-names>Z</given-names></name>
<name><surname>Li</surname> <given-names>Y</given-names></name>
<etal/>
</person-group>. 
<article-title>Peri-ulcerative mucosal inflammation appearance is an independent risk factor for 30-day rebleeding in patients with gastric ulcer bleeding: A multicenter retrospective study</article-title>. <source>J Inflammation Res</source>. (<year>2022</year>) <volume>15</volume>:<page-range>4951&#x2013;61</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.2147/JIR.S378263</pub-id>, PMID: <pub-id pub-id-type="pmid">36065317</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<label>31</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Giordano</surname> <given-names>G</given-names></name>
<name><surname>Mastrantoni</surname> <given-names>L</given-names></name>
<name><surname>Landi</surname> <given-names>F</given-names></name>
</person-group>. 
<article-title>Development and validation of quantile regression forests for prediction of reference quantiles in handgrip and chair-stand test</article-title>. <source>J Cachexia Sarcopenia Muscle</source>. (<year>2025</year>) <volume>16</volume>:<elocation-id>e13868</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/jcsm.13868</pub-id>, PMID: <pub-id pub-id-type="pmid">40525650</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<label>32</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Provost</surname> <given-names>MP</given-names></name>
<name><surname>Acharya</surname> <given-names>KS</given-names></name>
<name><surname>Acharya</surname> <given-names>CR</given-names></name>
<name><surname>Yeh</surname> <given-names>JS</given-names></name>
<name><surname>Steward</surname> <given-names>RG</given-names></name>
<name><surname>Eaton</surname> <given-names>JL</given-names></name>
<etal/>
</person-group>. 
<article-title>Pregnancy outcomes decline with increasing body mass index: analysis of 239,127 fresh autologous in&amp;xa0;vitro fertilization cycles from the 2008&amp;x2013;2010 Society for Assisted Reproductive Technology registry</article-title>. <source>Fertil Steril</source>. (<year>2016</year>) <volume>105</volume>:<page-range>663&#x2013;9</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.fertnstert.2015.11.008</pub-id>, PMID: <pub-id pub-id-type="pmid">26627120</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<label>33</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Campbell</surname> <given-names>JM</given-names></name>
<name><surname>Lane</surname> <given-names>M</given-names></name>
<name><surname>Owens</surname> <given-names>JA</given-names></name>
<name><surname>Bakos</surname> <given-names>HW</given-names></name>
</person-group>. 
<article-title>Paternal obesity negatively affects male fertility and assisted reproduction outcomes: a systematic review and meta-analysis</article-title>. <source>Reprod Biomedicine Online</source>. (<year>2015</year>) <volume>31</volume>:<fpage>593</fpage>&#x2013;<lpage>604</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.rbmo.2015.07.012</pub-id>, PMID: <pub-id pub-id-type="pmid">26380863</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<label>34</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Leisegang</surname> <given-names>K</given-names></name>
<name><surname>Sengupta</surname> <given-names>P</given-names></name>
<name><surname>Agarwal</surname> <given-names>A</given-names></name>
<name><surname>Henkel</surname> <given-names>R</given-names></name>
</person-group>. 
<article-title>Obesity and male infertility: Mechanisms and management</article-title>. <source>Andrologia</source>. (<year>2020</year>) <volume>53</volume>:<elocation-id>e13617</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/and.13617</pub-id>, PMID: <pub-id pub-id-type="pmid">32399992</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<label>35</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Donkin</surname> <given-names>I</given-names></name>
<name><surname>Barr&#xe8;s</surname> <given-names>R</given-names></name>
</person-group>. 
<article-title>Sperm epigenetics and influence of environmental factors</article-title>. <source>Mol Metab</source>. (<year>2018</year>) <volume>14</volume>:<fpage>1</fpage>&#x2013;<lpage>11</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.molmet.2018.02.006</pub-id>, PMID: <pub-id pub-id-type="pmid">29525406</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<label>36</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Simon</surname> <given-names>L</given-names></name>
<name><surname>Zini</surname> <given-names>A</given-names></name>
<name><surname>Dyachenko</surname> <given-names>A</given-names></name>
<name><surname>Ciampi</surname> <given-names>A</given-names></name>
<name><surname>Carrell</surname> <given-names>DT</given-names></name>
</person-group>. 
<article-title>A systematic review and meta-analysis to determine the effect of sperm DNA damage on <italic>in vitro</italic> fertilization and intracytoplasmic sperm injection outcome</article-title>. <source>Asian J Andrology</source>. (<year>2017</year>) <volume>19</volume>:<fpage>80</fpage>&#x2013;<lpage>90</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.4103/1008-682X.182822</pub-id>, PMID: <pub-id pub-id-type="pmid">27345006</pub-id>
</mixed-citation>
</ref>
<ref id="B37">
<label>37</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Rhee</surname> <given-names>JS</given-names></name>
<name><surname>Saben</surname> <given-names>JL</given-names></name>
<name><surname>Mayer</surname> <given-names>AL</given-names></name>
<name><surname>Schulte</surname> <given-names>MB</given-names></name>
<name><surname>Asghar</surname> <given-names>Z</given-names></name>
<name><surname>Stephens</surname> <given-names>C</given-names></name>
<etal/>
</person-group>. 
<article-title>Diet-induced obesity impairs endometrial stromal cell decidualization: a potential role for impaired autophagy</article-title>. <source>Hum Reprod (Oxford England)</source>. (<year>2016</year>) <volume>31</volume>:<page-range>1315&#x2013;26</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/humrep/dew048</pub-id>, PMID: <pub-id pub-id-type="pmid">27052498</pub-id>
</mixed-citation>
</ref>
<ref id="B38">
<label>38</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Broughton</surname> <given-names>DE</given-names></name>
<name><surname>Moley</surname> <given-names>KH</given-names></name>
</person-group>. 
<article-title>Obesity and female infertility: potential mediators of obesity&#x2019;s impact</article-title>. <source>Fertil Steril</source>. (<year>2017</year>) <volume>107</volume>:<page-range>840&#x2013;7</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.fertnstert.2017.01.017</pub-id>, PMID: <pub-id pub-id-type="pmid">28292619</pub-id>
</mixed-citation>
</ref>
<ref id="B39">
<label>39</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bellver</surname> <given-names>J</given-names></name>
<name><surname>Mart&#xed;nez-Conejero</surname> <given-names>JA</given-names></name>
<name><surname>Labarta</surname> <given-names>E</given-names></name>
<name><surname>Alam&#xe1;</surname> <given-names>P</given-names></name>
<name><surname>Melo</surname> <given-names>MAB</given-names></name>
<name><surname>Remoh&#xed;</surname> <given-names>J</given-names></name>
<etal/>
</person-group>. 
<article-title>Endometrial gene expression in the window of implantation is altered in obese women especially in association with polycystic ovary syndrome</article-title>. <source>Fertil Steril</source>. (<year>2011</year>) <volume>95</volume>:<fpage>2335-41,2391.e1-8</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.fertnstert.2011.03.021</pub-id>, PMID: <pub-id pub-id-type="pmid">21481376</pub-id>
</mixed-citation>
</ref>
<ref id="B40">
<label>40</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Best</surname> <given-names>D</given-names></name>
<name><surname>Avenell</surname> <given-names>A</given-names></name>
<name><surname>Bhattacharya</surname> <given-names>S</given-names></name>
</person-group>. 
<article-title>How effective are weight-loss interventions for improving fertility in women and men who are overweight or obese? A systematic review and meta-analysis of the evidence</article-title>. <source>Hum Reprod Update</source>. (<year>2017</year>) <volume>23</volume>:<fpage>681</fpage>&#x2013;<lpage>705</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/humupd/dmx027</pub-id>, PMID: <pub-id pub-id-type="pmid">28961722</pub-id>
</mixed-citation>
</ref>
<ref id="B41">
<label>41</label>
<mixed-citation publication-type="journal">
<article-title>Obesity and reproduction: a committee opinion</article-title>. <source>Fertil Steril</source>. (<year>2021</year>) <volume>116</volume>:<page-range>1266&#x2013;85</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.fertnstert.2021.08.018</pub-id>, PMID: <pub-id pub-id-type="pmid">34583840</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/110422">Luca Busetto</ext-link>, University of Padua, Italy</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3101668">Keyan Wang</ext-link>, Zhengzhou University, China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3130996">Arash Ziaee</ext-link>, Mashhad University of Medical Sciences, Iran</p></fn>
</fn-group>
</back>
</article>