<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" 'JATS-journalpublishing1-3-mathml3.dtd'>
<article article-type="research-article" dtd-version="1.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Cell Dev. Biol.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Cell and Developmental Biology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Cell Dev. Biol.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2296-634X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1734654</article-id>
<article-id pub-id-type="doi">10.3389/fcell.2025.1734654</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>A machine learning-based predictive model for 48-week hepatitis B surface antigen seroclearance in chronic hepatitis B patients treated with pegylated interferon &#x3b1;-2b: prediction at week 24</article-title>
<alt-title alt-title-type="left-running-head">Kong et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fcell.2025.1734654">10.3389/fcell.2025.1734654</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Kong</surname>
<given-names>Nan</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<xref ref-type="author-notes" rid="fn001">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3259703"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal Analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Wang</surname>
<given-names>Kaixia</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<xref ref-type="author-notes" rid="fn001">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1996139"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Wang</surname>
<given-names>Yiling</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<xref ref-type="author-notes" rid="fn001">
<sup>&#x2020;</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lou</surname>
<given-names>Shike</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<uri xlink:href="https://loop.frontiersin.org/people/1779951"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhou</surname>
<given-names>Luocheng</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Tao</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<uri xlink:href="https://loop.frontiersin.org/people/1784456"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Tan</surname>
<given-names>Zhili</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Qu</surname>
<given-names>Lihong</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
</contrib>
</contrib-group>
<aff id="aff1">
<institution>Department of Infectious Diseases, Shanghai East Hospital, Tongji University School of Medicine</institution>, <city>Shanghai</city>, <country country="CN">China</country>
</aff>
<author-notes>
<corresp id="c001">
<label>&#x2a;</label>Correspondence: Zhili Tan, <email xlink:href="mailto:lwfdtzl895@live.com">lwfdtzl895@live.com</email>; Lihong Qu, <email xlink:href="mailto:1905365@tongji.edu.cn">1905365@tongji.edu.cn</email>
</corresp>
<fn fn-type="equal" id="fn001">
<label>&#x2020;</label>
<p>These authors have contributed equally to this work</p>
</fn>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2025-11-26">
<day>26</day>
<month>11</month>
<year>2025</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>13</volume>
<elocation-id>1734654</elocation-id>
<history>
<date date-type="received">
<day>29</day>
<month>10</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>11</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>17</day>
<month>11</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Kong, Wang, Wang, Lou, Zhou, Wang, Tan and Qu.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Kong, Wang, Wang, Lou, Zhou, Wang, Tan and Qu</copyright-holder>
<license>
<ali:license_ref start_date="2025-11-26">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Background</title>
<p>Chronic hepatitis B (CHB) is an infectious disease mainly affecting the liver, caused by the hepatitis B virus (HBV). In the treatment of CHB, pegylated interferon &#x3b1;-2b (PEG-IFN&#x3b1;-2b) is one of the important therapeutic options. However, there are significant individual differences in patients&#x2019; responses to this treatment and only a few patients can achieve hepatitis B surface antigen (HBsAg) seroclearance. Therefore, an effective method to identify patients with a high likelihood of favorable response at an early stage is urgently needed.</p>
</sec>
<sec>
<title>Methods</title>
<p>In this study, we analyzed data from CHB patients who received antiviral treatment with PEG-IFN&#x3b1;-2b and completed 48 weeks of follow-up in the &#x201c;OASIS&#x201d; Project. Patients were divided into the seroclearance group and the non-seroclearance group based on whether HBsAg seroclearance was achieved at week 48.Five distinct machine learning feature selection algorithms were used to identify the optimal predictive variables for HBsAg seroclearance. These key variables were then incorporated into 12 machine learning algorithms to build predictive models for HBsAg seroclearance. The best-performing model was selected, and its performance was evaluated.</p>
</sec>
<sec>
<title>Results</title>
<p>A total of 680 subjects were included in this study, comprising 165 in the 48-week seroclearance group and 515 in the 48-week non-seroclearance group. Through five different machine learning feature selection algorithms, 11 variables were identified and used to construct 12 distinct machine learning models. Comparative analysis of these models, based on the Area Under the Receiver Operating Characteristic Curve (AUC) and Decision Curve Analysis (DCA) results from the training set, indicated that the Random Forest model was the optimal model for predicting HBsAg seroclearance.</p>
</sec>
<sec>
<title>Conclusion</title>
<p>The Random Forest model effectively predicted the 48-week HBsAg seroclearance rate using indicators measured at 24 weeks of PEG-IFN&#x3b1;-2b therapy. This model can provide a reliable reference for optimizing clinical treatment strategies.</p>
</sec>
</abstract>
<kwd-group>
<kwd>chronic hepatitis B</kwd>
<kwd>HBsAg seroclearance</kwd>
<kwd>predictive model</kwd>
<kwd>machine learning</kwd>
<kwd>clinical utility</kwd>
</kwd-group>
<funding-group>
<funding-statement>The authors declare that financial support was received for the research and/or publication of this article. The author(s) declare that financial support was received for the research and/or publication of this article. This work was supported by the Shanghai Pudong New Area Health Committee Supervision Institute (2025-PWDL-02).</funding-statement>
</funding-group>
<counts>
<fig-count count="7"/>
<table-count count="7"/>
<equation-count count="0"/>
<ref-count count="45"/>
<page-count count="18"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Cellular Biochemistry</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>CHB is an infectious liver disease caused by persistent hepatitis B virus (HBV) infection for more than 6 months. According to the WHO&#x2019;s 2024 Global Hepatitis Report, it affected 254 million people globally in 2022, with approximately 1.2 million new infections and 1.1 million deaths annually, primarily due to cirrhosis and hepatocellular carcinoma (<xref ref-type="bibr" rid="B5">Burki, 2024</xref>).In China, the CHB epidemic remains a serious concern. A nationwide study revealed approximately 75 million individuals living with chronic HBV infection. Despite this progress from 120 million in 1992, the prevention and control situation remains challenging. Among these 75 million, an estimated 30 million are unaware of their status, and 17 million require antiviral treatment, yet only three million are receiving it (<xref ref-type="bibr" rid="B20">Hui et al., 2024</xref>).These critical gaps in diagnosis and treatment expose a vast number of individuals to a high risk of disease progression to severe complications, including cirrhosis and hepatocellular carcinoma (HCC), which severely diminish quality of life and create a heavy socioeconomic burden (<xref ref-type="bibr" rid="B21">Jian et al., 2025</xref>).</p>
<p>The achievement of HBsAg seroclearance, regarded as a &#x201c;functional cure&#x201d; for CHB, significantly reduces the risk of cirrhosis and HCC, thereby establishing it as a primary therapeutic goal. As a key drug for immunomodulatory treatment of CHB, PEG-IFN&#x3b1;-2b is widely used in eligible patients due to its advantages of limited treatment duration and potential curability. However, treatment response exhibits substantial inter-individual variation, and the rate of HBsAg seroclearance remains low. Moreover, the current clinical paradigm requires awaiting 48-week outcomes to assess final response, which can result in ineffective treatment for non-responders, escalating both healthcare costs and the risk of adverse events (<xref ref-type="bibr" rid="B15">Hong et al., 2022</xref>). Early identification of patients with a high likelihood of HBsAg seroclearance during PEG-IFN&#x3b1;-2b treatment is crucial for optimizing individualized CHB treatment. Currently, most domestic and international studies continue to focus on baseline or early-treatment (e.g., week 12) predictors (<xref ref-type="bibr" rid="B27">Lee et al., 2025</xref>; <xref ref-type="bibr" rid="B34">Ren et al., 2021</xref>; <xref ref-type="bibr" rid="B35">Rijckborst et al., 2010</xref>; <xref ref-type="bibr" rid="B40">Sonneveld et al., 2010</xref>; <xref ref-type="bibr" rid="B44">Ye et al., 2024</xref>). However, week 24 of therapy is a well-established key timepoint for assessing response to interferon-based regimens and determining whether to adjust treatment strategies (<xref ref-type="bibr" rid="B15">Hong et al., 2022</xref>). Developing a robust predictive model at this juncture, integrating multidimensional data such as dynamic liver function indices and serological virological markers, can provide clinicians with a practical tool at this critical decision-making window. This enables early warning for potential non-responders, allowing timely strategy modifications, while reinforcing treatment confidence for potential responders, ultimately advancing personalized and precision management of CHB.</p>
<p>In the era of big data and artificial intelligence, machine learning (ML)&#x2014;a concept pioneered by Arthur Samuel&#x2014;enables computers to simulate human learning and predict outcomes through mathematical modeling (<xref ref-type="bibr" rid="B9">Deo, 2015</xref>).ML encompasses a wide range of mathematical models, such as Decision Trees, Random Forests, Support Vector Machines (SVM), k-Nearest Neighbors (kNN), Neural Networks, and Naive Bayes (NB) (<xref ref-type="bibr" rid="B2">Asnicar et al., 2024</xref>; <xref ref-type="bibr" rid="B25">Kotsiantis et al., 2006</xref>; <xref ref-type="bibr" rid="B38">Sim et al., 2023</xref>).ML techniques are particularly adept at processing high-dimensional clinical data and uncovering complex relationships to build reliable ML models. (<xref ref-type="bibr" rid="B11">Fan et al., 2023</xref>; <xref ref-type="bibr" rid="B26">Lee et al., 2023</xref>; <xref ref-type="bibr" rid="B43">Xue et al., 2025</xref>).</p>
<p>Based on the multi-center, prospective, real-world OASIS project, this study collected clinical data from CHB patients treated with PEG-IFN&#x3b1;-2b. The aim was to develop and validate a ML-based prediction model that utilizes accessible indicators at 24 weeks of treatment&#x2014;such as quantitative HBsAg and virological markers (e.g., HBV DNA load)&#x2014;to accurately predict the HBsAg seroclearance rate at 48 weeks. The resulting model is expected to provide clinicians with a decision-support tool for the early identification of patients who are most likely to respond favorably to PEG-IFN&#x3b1;-2b therapy, thereby reducing ineffective treatments and advancing the optimization of individualized CHB management.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2-1">
<label>2.1</label>
<title>Participants</title>
<p>This study utilized data from the &#x201c;China Hepatitis Prevention and Treatment Foundation - Project to Reduce Hepatocellular Carcinoma Incidence in Chronic Hepatitis B Patients&#x201d; (the OASIS project, NCT04896255). This large, multicenter, prospective, real-world study enrolled CHB patients from 32 provinces across China, including treatment-na&#xef;ve individuals, those previously treated with PEG-IFN&#x3b1;-2b, and those with nucleos(t)ide analogues (NAs) experience. Participants received either a PEG-IFN&#x3b1;-2b-based regimen (as monotherapy or in combination with NAs) or NAs monotherapy and are being followed for a planned 5-year period. This study was approved by the Ethics Committee of Shanghai East Hospital, Tongji University (Approval Number: [2020] Pre-review No. (157)). Written informed consent was obtained from all participants. The data were sourced from the OASIS project, with data cleaning and quality control handled by National Medical Center of Infectious Diseases (NMCID) Liver Diseases Research Group.</p>
<p>The specific inclusion criteria were as follows: (1) Chronic HBV infection, defined as: positivity for HBsAg for more than 6 months, or positivity for HBsAg for less than 6 months but with a liver biopsy within the past year meeting the pathological criteria for chronic hepatitis B, with other liver diseases excluded. (2) Age between 18 and 80 years, inclusive, irrespective of gender. (3)Treatment with PEG-IFN&#x3b1;-2b-based antiviral therapy. (4)Completion of the 48-week follow-up.</p>
<p>Of the 1,057 eligible CHB patients, 377 were excluded from the final analysis due to extensive missing data at baseline or the 24-week time point. Consequently, the final analytical cohort consisted of 680 patients.A flowchart detailing the study design and patient selection process is presented in <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Study flowchart.</p>
</caption>
<graphic xlink:href="fcell-13-1734654-g001.tif">
<alt-text content-type="machine-generated">Flowchart depicting the process of selecting and evaluating predictive models for chronic hepatitis B patients treated with PEG-IFN&#x3B1;-2b. Starting with 1057 patients, 377 with missing data are excluded, resulting in 680. This cohort is divided into a training set of 453 and a test set of 227. Various models like Logistic Regression, Random Forest, and XGBoost are developed and evaluated for machine learning model predictive performance and single-variable predictive performance. The goal is to predict the 48-week HBsAg seroclearance rate at week 24.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s2-2">
<label>2.2</label>
<title>Data preprocessing and feature engineering</title>
<p>The clinical dataset for this study comprised demographic and clinical characteristics&#x2014;including age, gender, height, weight, body mass index (BMI), cirrhosis status, and family history&#x2014;of CHB patients treated with PEG-IFN&#x3b1;-2b. It also included serial serological parameters measured at both baseline and 24 weeks (6 months) of treatment. These parameters covered virological markers (HBV DNA, HBsAg, HBeAg, HBeAb, HBcAb), liver function indices (ALT, AST, TBIL, DBIL, ALP, GGT, ALB, GLO), and routine blood test results (PLT, NEU, Hb).The initial data processing steps included: handling missing values, applying logarithmic transformation to viral load markers (HBV DNA, HBsAg, HBeAg, HBeAb, HBcAb), and deriving kinetic variables (HBsAg_desc &#x3d; baseline HBsAg level - week 24 HBsAg level; HBeAg_desc &#x3d; baseline HBeAg level - week 24 HBeAg level; HBeAb_desc &#x3d; baseline HBeAb level - week 24 HBeAb level; HBcAb_desc &#x3d; baseline HBcAb level - week 24 HBcAb level). Categorical variables (gender, cirrhosis, family history) were factor-encoded. After excluding patients with excessive missing data, the final dataset contained 31 predictor variables. The quantitative results for HBV DNA and HBsAg were log<sub>10</sub>-transformed during data preprocessing.</p>
</sec>
<sec id="s2-3">
<label>2.3</label>
<title>Feature selection using machine learning</title>
<p>To identify robust predictors of treatment response, we employed five distinct feature selection algorithms. (1)Random Forest (RF): we calculated mean decrease in Gini impurity using the randomForest package to rank feature importance (<xref ref-type="bibr" rid="B8">Chen et al., 2024</xref>; <xref ref-type="bibr" rid="B16">Hosseini Sarkhosh et al., 2022</xref>). (2)Least Absolute Shrinkage and Selection Operator (LASSO): using 5-fold cross-validation via glmnet, we selected non-zero coefficients at &#x3bb;min to retain features with strongest associations (<xref ref-type="bibr" rid="B13">Friedman et al., 2010</xref>; <xref ref-type="bibr" rid="B39">Simon et al., 2011</xref>). (3)Support Vector Machine-Recursive Feature Elimination (SVM-RFE): we implemented backward feature elimination with linear SVM kernel and 5-fold cross-validation (<xref ref-type="bibr" rid="B18">Huang et al., 2018</xref>). (4)Elastic Net: combining L1 and L2 regularization (&#x3b1; &#x3d; 0.5), we identified features with persistent coefficients across cross-validation folds (<xref ref-type="bibr" rid="B19">Hughey and Butte, 2015</xref>). (5) XGBoost: we utilized gradient boosting with xgboost package, ranking features by gain importance metric which quantifies the total reduction of loss contributed by each feature. The model was configured with multi:softmax objective, maximum depth of 3, learning rate of 0.1, and 100 boosting rounds with early stopping (<xref ref-type="bibr" rid="B7">Chen and Guestrin, 2016</xref>; <xref ref-type="bibr" rid="B28">Liang et al., 2025</xref>).</p>
<p>Features consistently ranked highly across multiple methods were prioritized for model construction.</p>
</sec>
<sec id="s2-4">
<label>2.4</label>
<title>Predictive model development</title>
<p>We trained 12 diverse classifiers using the mlr3 framework, including (1) Linear models: Logistic Regression, Linear Discriminant Analysis; (2) Tree-based models: Decision Tree, Random Forest, XGBoost; (3) Kernel methods: SVM (linear/RBF kernels); (4) Distance-based: k-Nearest Neighbors; (5)Neural networks: Single-hidden-layer Network; (6) Probabilistic: Na&#xef;ve Bayes; (7) Regularized regression: GLMNet,CV GLMNet. All models were configured for probability prediction and trained on a stratified 2:1 train-test split. Hyperparameters used default values from respective mlr3learners implementations. Model performance was assessed using: Discrimination metrics: Area under ROC curve (AUC), accuracy, sensitivity, specificity, precision; F1-score; Calibration: Precision-Recall curves and net benefit analysis via Decision Curve Analysis (DCA); Stability: Comparative training-test performance and ranking consistency across metrics.</p>
<p>We implemented k-fold cross-validation during feature selection phases and held-out test set validation for final model assessment. Additionally, for optimal models, we performed SHapley Additive exPlanations (SHAP) analysis using DALEXtra to compute Shapley values for feature contribution quantification. Besides, We also performed univariate ROC analysis of single features and compared with ML models. This comprehensive framework ensured robust feature selection, validated model performance, and facilitated clinical interpretation of predictive factors for interferon response.</p>
</sec>
<sec id="s2-5">
<label>2.5</label>
<title>Statistical analyses</title>
<p>Based on HBsAg seroclearance status at week 48, patients were categorized into &#x201c;Seroclearance&#x201d; and &#x201c;Non-Seroclearance&#x201d; groups. Continuous variables with a normal distribution are presented as mean (standard deviation, SD) and compared using the independent samples t-test. Non-normally distributed variables are expressed as median (interquartile range, IQR) and compared using the Mann-Whitney U test. Categorical variables are summarized as percentages and compared using the chi-square test. All statistical analyses were performed with R software (version 4.5.1).The significance level (&#x3b1;) was set at 0.05, and a result with <italic>P</italic> &#x3c; 0.05 was considered statistically significant.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<label>3</label>
<title>Results</title>
<sec id="s3-1">
<label>3.1</label>
<title>Characteristics of the study</title>
<p>Of the 680 CHB patients included in this study, 165 (24.3%) were classified into the seroclearance group and 515 (75.7%) into the non-seroclearance group. The data preprocessing pipeline began with the enumeration of missing values across all observations. Subsequent analysis was confined to clinical data from baseline (0W) and the 6-month (24W) time points. After excluding patients with a substantial amount of missing data, the final dataset comprised the following 31 predictor variables: Age, Gender, BMI, Liver Cirrhosis (LC), Family history, HBsAg (0W), HBsAg (24W), HBsAg_desc, HBeAg(0W), HBeAg(24W), HBeAg_desc, HBeAb (0W), HBeAb (24W), HBeAb_desc, HBcAb (0W), HBcAb (24W), HBcAb_desc, ALT (0W), ALT (24W), AST (0W), AST (24W), GGT (0W), GGT (24W), TBIL (0W), TBIL (24W), DBIL (0W), DBIL (24W), ALP (0W), ALP (24W), and PLT (0W),PLT (24W) (<xref ref-type="sec" rid="s14">Supplementary Table S1</xref>).</p>
<p>The baseline characteristics of two groups are presented in <xref ref-type="table" rid="T1">Table 1</xref>. Differences between groups were assessed using the Chi-square test and the Mann-Whitney U test, with a statistical significance level set at &#x3b1; &#x3d; 0.05. Significant baseline differences (all P &#x3c; 0.05) between groups were observed in: LC (4.2% vs. 9.7%,<italic>P</italic> &#x3d; 0.041), Family history (22.4% vs. 31.8%,<italic>P</italic> &#x3d; 0.027) HBV DNA (median 0.00 vs. 1.62 log<sub>10</sub> IU/mL, <italic>P</italic> &#x3d; 0.034), HBsAg (median 1.86 vs. 3.08 log<sub>10</sub> IU/mL, <italic>P</italic> &#x3c; 0.001), HBeAg(median 0.40 vs. 0.52 COI, P &#x3c; 0.001), HBeAb (median 0.03 vs. 0.18 COI, <italic>P</italic> &#x3d; 0.001), HBcAb (median 3.29 vs. 3.28 COI, <italic>P</italic> &#x3d; 0.012), ALT (median 26.70 vs. 31.00 U/L, <italic>P</italic> &#x3d; 0.004), AST (median 23.70 vs. 27.00 U/L, <italic>P</italic> &#x3d; 0.006), GGT (median 19.00 vs. 24.00 U/L, <italic>P</italic> &#x3d; 0.001), ALB (median 46.00 vs. 45.10 g/L, <italic>P</italic> &#x3c; 0.001), and Hb(median 148.00 vs. 153.00 g/L, <italic>P</italic> &#x3d; 0.005). The laboratory parameters of two groups after 24 weeks of PEG-IFN&#x3b1;-2b treatment are presented in <xref ref-type="table" rid="T2">Table 2</xref>. Statistical comparisons were performed using the Chi-square test and Mann-Whitney U test, with the significance level set at &#x3b1; &#x3d; 0.05. The analysis revealed statistically significant differences (all <italic>P</italic> &#x3c; 0.05) in the following variables at week 24:HBsAg (median 0.00 vs. 2.47 log<sub>10</sub> IU/mL, <italic>P</italic> &#x3c; 0.001), HBeAg(median 0.37 vs. 0.45 COI, <italic>P</italic> &#x3c; 0.001), HBeAb (median 0.04 vs. 0.26 COI, <italic>P</italic> &#x3c; 0.001), and Hb(median 131.00 vs. 135.00 g/L, <italic>P</italic> &#x3d; 0.006).</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Baseline characteristics and intergroup comparisons of the 680 patients.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Items</th>
<th align="center">Total patient (n &#x3d; 680)</th>
<th align="center">Seroclearance group (n &#x3d; 165)</th>
<th align="center">Non-seroclearance group (n &#x3d; 515)</th>
<th align="center">
<italic>p</italic>
</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Age (years)</td>
<td align="center">38.00 [32.00, 46.00]</td>
<td align="center">38.00 [32.00, 45.00]</td>
<td align="center">38.00 [32.00, 46.00]</td>
<td align="center">0.681</td>
</tr>
<tr>
<td align="center">Height (cm)</td>
<td align="center">170.00 [163.00, 174.00]</td>
<td align="center">170.00 [162.00, 174.00]</td>
<td align="center">170.00 [163.50, 175.00]</td>
<td align="center">0.282</td>
</tr>
<tr>
<td align="center">Weight (kg)</td>
<td align="center">69.00 [59.00, 75.00]</td>
<td align="center">67.00 [56.00, 74.00]</td>
<td align="center">70.00 [60.00, 75.00]</td>
<td align="center">0.06</td>
</tr>
<tr>
<td align="center">BMI(kg/m<sup>2</sup>)</td>
<td align="center">23.53 [21.25, 25.12]</td>
<td align="center">23.15 [20.96, 24.91]</td>
<td align="center">23.66 [21.47, 25.26]</td>
<td align="center">0.088</td>
</tr>
<tr style="background-color:#CCCCCC">
<td colspan="5" align="left">Gender (%)</td>
</tr>
<tr>
<td align="center">Male</td>
<td align="center">467 (68.7)</td>
<td align="center">104 (63.0)</td>
<td align="center">363 (70.5)</td>
<td align="center">0.089</td>
</tr>
<tr>
<td align="center">Female</td>
<td align="center">213 (31.3)</td>
<td align="center">61 (37.0)</td>
<td align="center">152 (29.5)</td>
<td align="left"/>
</tr>
<tr style="background-color:#CCCCCC">
<td colspan="5" align="left">LC (%)</td>
</tr>
<tr>
<td align="center">Yes</td>
<td align="center">57 (8.4)</td>
<td align="center">7 (4.2)</td>
<td align="center">50 (9.7)</td>
<td align="center">0.041</td>
</tr>
<tr>
<td align="center">No</td>
<td align="center">623 (91.6)</td>
<td align="center">158 (95.8)</td>
<td align="center">465 (90.3)</td>
<td align="left"/>
</tr>
<tr style="background-color:#CCCCCC">
<td colspan="5" align="left">Family history (%)</td>
</tr>
<tr>
<td align="center">Yes</td>
<td align="center">201 (29.6)</td>
<td align="center">37 (22.4)</td>
<td align="center">164 (31.8)</td>
<td align="center">0.027</td>
</tr>
<tr>
<td align="center">No</td>
<td align="center">479 (70.4)</td>
<td align="center">128 (77.6)</td>
<td align="center">351 (68.2)</td>
<td align="left"/>
</tr>
<tr>
<td align="center">HBVDNA (log<sub>10</sub>IU/ml) (0W)</td>
<td align="center">1.34 [0.00, 3.89]</td>
<td align="center">0.00 [0.00, 3.08]</td>
<td align="center">1.62 [0.00, 4.26]</td>
<td align="center">0.034</td>
</tr>
<tr>
<td align="center">HBsAg (log<sub>10</sub>IU/ml) (0W)</td>
<td align="center">2.81 [1.90, 3.47]</td>
<td align="center">1.86 [0.86, 2.58]</td>
<td align="center">3.08 [2.30, 3.65]</td>
<td align="center">&#x3c;0.001</td>
</tr>
<tr>
<td align="center">HBeAg (COI) (0W)</td>
<td align="center">0.49 [0.10, 1.30]</td>
<td align="center">0.40 [0.09, 0.56]</td>
<td align="center">0.52 [0.14, 1.99]</td>
<td align="center">&#x3c;0.001</td>
</tr>
<tr>
<td align="center">HBeAb (COI) (0W)</td>
<td align="center">0.11 [0.01, 1.55]</td>
<td align="center">0.03 [0.01, 1.20]</td>
<td align="center">0.18 [0.01, 1.76]</td>
<td align="center">0.001</td>
</tr>
<tr>
<td align="center">HBcAb (COI) (0W)</td>
<td align="center">3.28 [3.08, 3.44]</td>
<td align="center">3.29 [3.18, 3.49]</td>
<td align="center">3.28 [3.04, 3.42]</td>
<td align="center">0.012</td>
</tr>
<tr>
<td align="center">ALT (U/L) (0W)</td>
<td align="center">30.00 [19.00, 50.40]</td>
<td align="center">26.70 [15.70, 42.30]</td>
<td align="center">31.00 [20.00, 51.90]</td>
<td align="center">0.004</td>
</tr>
<tr>
<td align="center">AST (U/L) (0W)</td>
<td align="center">26.00 [21.00, 34.19]</td>
<td align="center">23.70 [19.80, 30.60]</td>
<td align="center">27.00 [21.00, 35.30]</td>
<td align="center">0.006</td>
</tr>
<tr>
<td align="center">Tbil (&#x3bc;mol/L) (0W)</td>
<td align="center">14.18 [10.90, 18.90]</td>
<td align="center">13.50 [10.80, 19.20]</td>
<td align="center">14.20 [10.94, 18.70]</td>
<td align="center">0.948</td>
</tr>
<tr>
<td align="center">Dbil (&#x3bc;mol/L) (0W)</td>
<td align="center">3.00 [2.20, 4.20]</td>
<td align="center">3.10 [2.30, 4.22]</td>
<td align="center">3.00 [2.20, 4.16]</td>
<td align="center">0.472</td>
</tr>
<tr>
<td align="center">ALP (&#x3bc;mol/L) (0W)</td>
<td align="center">80.00 [65.15, 97.45]</td>
<td align="center">77.00 [64.00, 90.70]</td>
<td align="center">80.85 [66.85, 98.08]</td>
<td align="center">0.111</td>
</tr>
<tr>
<td align="center">GGT (U/L) (0W)</td>
<td align="center">23.00 [15.00, 36.54]</td>
<td align="center">19.00 [13.00, 31.00]</td>
<td align="center">24.00 [16.00, 38.00]</td>
<td align="center">0.001</td>
</tr>
<tr>
<td align="center">ALB (g/L) (0W)</td>
<td align="center">45.50 [43.48, 47.32]</td>
<td align="center">46.00 [44.00, 48.00]</td>
<td align="center">45.10 [43.20, 47.00]</td>
<td align="center">&#x3c;0.001</td>
</tr>
<tr>
<td align="center">GLO (g/L) (0W)</td>
<td align="center">29.00 [26.35, 31.00]</td>
<td align="center">28.10 [26.00, 30.85]</td>
<td align="center">29.00 [26.40, 31.35]</td>
<td align="center">0.163</td>
</tr>
<tr>
<td align="center">NEU(10<sup>9</sup>/L) (0W)</td>
<td align="center">2.85 [2.16, 3.71]</td>
<td align="center">2.66 [2.06, 3.39]</td>
<td align="center">2.90 [2.20, 3.73]</td>
<td align="center">0.084</td>
</tr>
<tr>
<td align="center">PLT (10<sup>9</sup>/L) (0W)</td>
<td align="center">204.00 [164.00, 245.00]</td>
<td align="center">206.00 [161.00, 251.00]</td>
<td align="center">203.00 [165.50, 243.50]</td>
<td align="center">0.984</td>
</tr>
<tr>
<td align="center">Hb(g/L) (0W)</td>
<td align="center">152.00 [137.00, 162.00]</td>
<td align="center">148.00 [133.00, 159.00]</td>
<td align="center">153.00 [139.00, 162.50]</td>
<td align="center">0.005</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Comparison between the two groups after 24 weeks of treatment in the 680 patients.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Items</th>
<th align="center">Total patient (n &#x3d; 680)</th>
<th align="center">Seroclearance group (n &#x3d; 165)</th>
<th align="center">Non-seroclearance group (n &#x3d; 515)</th>
<th align="center">
<italic>p</italic>
</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">HBVDNA (log<sub>10</sub>IU/ml) (24W)</td>
<td align="center">0.00 [0.00, 0.00]</td>
<td align="center">0.00 [0.00, 0.00]</td>
<td align="center">0.00 [0.00, 0.00]</td>
<td align="center">N/A</td>
</tr>
<tr>
<td align="center">HBsAg (log<sub>10</sub>IU/ml) (24W)</td>
<td align="center">2.10 [0.43, 3.14]</td>
<td align="center">0.00 [0.00, 0.40]</td>
<td align="center">2.47 [1.57, 3.37]</td>
<td align="center">&#x3c;0.001</td>
</tr>
<tr>
<td align="center">HBeAg(COI) (24W)</td>
<td align="center">0.44 [0.09, 0.91]</td>
<td align="center">0.37 [0.08, 0.49]</td>
<td align="center">0.45 [0.09, 1.52]</td>
<td align="center">&#x3c;0.001</td>
</tr>
<tr>
<td align="center">HBeAb (COI) (24W)</td>
<td align="center">0.12 [0.01, 1.40]</td>
<td align="center">0.04 [0.01, 0.93]</td>
<td align="center">0.26 [0.02, 1.50]</td>
<td align="center">&#x3c;0.001</td>
</tr>
<tr>
<td align="center">HBcAb (COI) (24W)</td>
<td align="center">3.23 [3.03, 3.42]</td>
<td align="center">3.23 [3.07, 3.44]</td>
<td align="center">3.23 [3.01, 3.41]</td>
<td align="center">0.328</td>
</tr>
<tr>
<td align="center">ALT (U/L) (24W)</td>
<td align="center">45.20 [30.00, 71.55]</td>
<td align="center">48.80 [29.42, 80.20]</td>
<td align="center">45.00 [30.20, 68.00]</td>
<td align="center">0.416</td>
</tr>
<tr>
<td align="center">AST (U/L) (24W)</td>
<td align="center">44.00 [32.27, 65.00]</td>
<td align="center">46.20 [33.70, 70.00]</td>
<td align="center">43.00 [32.00, 63.00]</td>
<td align="center">0.192</td>
</tr>
<tr>
<td align="center">Tbil (&#x3bc;mol/L) (24W)</td>
<td align="center">12.80 [10.30, 15.97]</td>
<td align="center">12.50 [9.94, 15.45]</td>
<td align="center">13.06 [10.41, 16.20]</td>
<td align="center">0.107</td>
</tr>
<tr>
<td align="center">Dbil (&#x3bc;mol/L) (24W)</td>
<td align="center">3.20 [2.50, 4.22]</td>
<td align="center">3.20 [2.53, 4.39]</td>
<td align="center">3.20 [2.50, 4.20]</td>
<td align="center">0.512</td>
</tr>
<tr>
<td align="center">ALP (&#x3bc;mol/L) (24W)</td>
<td align="center">83.00 [70.85, 98.83]</td>
<td align="center">81.05 [70.92, 97.07]</td>
<td align="center">84.00 [70.75, 99.00]</td>
<td align="center">0.337</td>
</tr>
<tr>
<td align="center">GGT (U/L) (24W)</td>
<td align="center">50.10 [32.00, 91.00]</td>
<td align="center">49.00 [32.00, 110.60]</td>
<td align="center">51.00 [32.00, 87.17]</td>
<td align="center">0.886</td>
</tr>
<tr>
<td align="center">ALB (g/L) (24W)</td>
<td align="center">44.00 [42.00, 45.41]</td>
<td align="center">44.00 [42.00, 45.60]</td>
<td align="center">44.00 [42.00, 45.39]</td>
<td align="center">0.683</td>
</tr>
<tr>
<td align="center">GLO (g/L) (24W)</td>
<td align="center">29.00 [26.60, 32.00]</td>
<td align="center">29.00 [26.40, 32.10]</td>
<td align="center">29.00 [26.73, 32.00]</td>
<td align="center">0.65</td>
</tr>
<tr>
<td align="center">NEU(10<sup>9</sup>/L) (24W)</td>
<td align="center">1.63 [1.25, 2.21]</td>
<td align="center">1.61 [1.21, 2.03]</td>
<td align="center">1.63 [1.27, 2.24]</td>
<td align="center">0.215</td>
</tr>
<tr>
<td align="center">PLT (10<sup>9</sup>/L) (24W)</td>
<td align="center">116.00 [92.00, 152.00]</td>
<td align="center">121.00 [96.00, 152.00]</td>
<td align="center">114.00 [91.00, 152.00]</td>
<td align="center">0.282</td>
</tr>
<tr>
<td align="center">Hb(g/L) (24W)</td>
<td align="center">134.00 [122.00, 146.00]</td>
<td align="center">131.00 [119.00, 143.00]</td>
<td align="center">135.00 [124.00, 147.00]</td>
<td align="center">0.006</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Using the mlr3 framework in R, we implemented a 2:1 random split of patients into training (n &#x3d; 453) and validation (n &#x3d; 227) sets, followed by 500 bootstrap iterations to enhance the robustness of model assessment.Comparative analysis of baseline and 24-week treatment characteristics between the training and validation sets (<xref ref-type="table" rid="T3">Tables 3</xref>, <xref ref-type="table" rid="T4">4</xref>) showed that, aside from family history (35.2% vs. 26.7%, <italic>P</italic> &#x3d; 0.027), ALT at week 24 (median 48.80 vs. 44.00 U/L, <italic>P</italic> &#x3d; 0.023), AST at week 24 (median 46.20 vs. 42.20 U/L, <italic>P</italic> &#x3d; 0.048), and GGT at week 24 (median 55.00 vs. 48.00 U/L, <italic>P</italic> &#x3d; 0.013), no other indicators exhibited significant differences (<italic>P</italic> &#x3e; 0.05). This indicates that the baseline characteristics of the two sets were overall well-balanced, supporting their use for model training and validation.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Comparison of baseline characteristics between the training and test sets.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Items</th>
<th align="center">Test set (n &#x3d; 227)</th>
<th align="center">Training set (n &#x3d; 453)</th>
<th align="center">
<italic>p</italic>
</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Age</td>
<td align="center">38.00 [32.00, 45.00]</td>
<td align="center">38.00 [32.00, 46.00]</td>
<td align="center">0.872</td>
</tr>
<tr>
<td align="center">Height</td>
<td align="center">170.00 [163.00, 175.00]</td>
<td align="center">170.00 [163.00, 174.00]</td>
<td align="center">0.764</td>
</tr>
<tr>
<td align="center">Weight</td>
<td align="center">69.00 [60.00, 75.00]</td>
<td align="center">70.00 [58.00, 75.00]</td>
<td align="center">0.894</td>
</tr>
<tr>
<td align="center">BMI</td>
<td align="center">23.39 [21.51, 25.08]</td>
<td align="center">23.66 [21.03, 25.26]</td>
<td align="center">0.967</td>
</tr>
<tr>
<td align="center">Gender (%)</td>
<td align="left"/>
<td align="left"/>
<td align="center">0.916</td>
</tr>
<tr>
<td align="center">Male</td>
<td align="center">157 (69.2)</td>
<td align="center">310 (68.4)</td>
<td align="left"/>
</tr>
<tr>
<td align="center">Female</td>
<td align="center">70 (30.8)</td>
<td align="center">143 (31.6)</td>
<td align="left"/>
</tr>
<tr>
<td align="center">LC (%)</td>
<td align="left"/>
<td align="left"/>
<td align="center">0.458</td>
</tr>
<tr>
<td align="center">Yes</td>
<td align="center">16 (7.0)</td>
<td align="center">41 (9.1)</td>
<td align="left"/>
</tr>
<tr>
<td align="center">No</td>
<td align="center">211 (93.0)</td>
<td align="center">412 (90.9)</td>
<td align="left"/>
</tr>
<tr>
<td align="center">Family.history (%)</td>
<td align="left"/>
<td align="left"/>
<td align="center">0.027</td>
</tr>
<tr>
<td align="center">Yes</td>
<td align="center">80 (35.2)</td>
<td align="center">121 (26.7)</td>
<td align="left"/>
</tr>
<tr>
<td align="center">No</td>
<td align="center">147 (64.8)</td>
<td align="center">332 (73.3)</td>
<td align="left"/>
</tr>
<tr>
<td align="center">HBVDNA (log<sub>10</sub>IU/ml) (0W)</td>
<td align="center">1.36 [0.00, 3.65]</td>
<td align="center">1.32 [0.00, 4.08]</td>
<td align="center">0.698</td>
</tr>
<tr>
<td align="center">HBsAg (log<sub>10</sub> IU/ml) (0W)</td>
<td align="center">2.92 [2.03, 3.38]</td>
<td align="center">2.76 [1.81, 3.54]</td>
<td align="center">0.586</td>
</tr>
<tr>
<td align="center">HBeAg(COI) (0W)</td>
<td align="center">0.51 [0.10, 1.06]</td>
<td align="center">0.48 [0.11, 1.40]</td>
<td align="center">0.882</td>
</tr>
<tr>
<td align="center">HBeAb (COI) (0W)</td>
<td align="center">0.17 [0.02, 1.55]</td>
<td align="center">0.08 [0.01, 1.53]</td>
<td align="center">0.266</td>
</tr>
<tr>
<td align="center">HBcAb (COI) (0W)</td>
<td align="center">3.31 [3.15, 3.46]</td>
<td align="center">3.26 [3.05, 3.42]</td>
<td align="center">0.057</td>
</tr>
<tr>
<td align="center">ALT (U/L) (0W)</td>
<td align="center">29.90 [19.00, 47.76]</td>
<td align="center">30.00 [19.00, 51.30]</td>
<td align="center">0.86</td>
</tr>
<tr>
<td align="center">AST (U/L) (0W)</td>
<td align="center">26.00 [21.00, 33.98]</td>
<td align="center">26.00 [21.00, 35.00]</td>
<td align="center">0.7</td>
</tr>
<tr>
<td align="center">Tbil1 (&#x3bc;mol/L) (0W)</td>
<td align="center">14.50 [10.95, 19.57]</td>
<td align="center">14.08 [10.90, 18.60]</td>
<td align="center">0.266</td>
</tr>
<tr>
<td align="center">Dbil1 (&#x3bc;mol/L) (0W)</td>
<td align="center">3.17 [2.20, 4.27]</td>
<td align="center">2.93 [2.20, 4.16]</td>
<td align="center">0.226</td>
</tr>
<tr>
<td align="center">ALP (&#x3bc;mol/L) (0W)</td>
<td align="center">79.90 [66.07, 94.00]</td>
<td align="center">80.00 [65.03, 98.07]</td>
<td align="center">0.737</td>
</tr>
<tr>
<td align="center">GGT (U/L) (0W)</td>
<td align="center">24.00 [15.00, 36.70]</td>
<td align="center">22.40 [15.00, 36.12]</td>
<td align="center">0.589</td>
</tr>
<tr>
<td align="center">ALB (g/L) (0W)</td>
<td align="center">46.00 [43.50, 47.45]</td>
<td align="center">45.39 [43.50, 47.00]</td>
<td align="center">0.368</td>
</tr>
<tr>
<td align="center">GLO (g/L) (0W)</td>
<td align="center">28.70 [26.60, 31.15]</td>
<td align="center">29.00 [26.17, 31.00]</td>
<td align="center">0.835</td>
</tr>
<tr>
<td align="center">NEU(10<sup>9</sup>/L) (0W)</td>
<td align="center">2.90 [2.15, 3.71]</td>
<td align="center">2.84 [2.18, 3.68]</td>
<td align="center">0.812</td>
</tr>
<tr>
<td align="center">PLT (10<sup>9</sup>/L) (0W)</td>
<td align="center">201.00 [164.00, 240.50]</td>
<td align="center">205.00 [164.00, 251.00]</td>
<td align="center">0.51</td>
</tr>
<tr>
<td align="center">Hb(g/L) (0W)</td>
<td align="center">154.00 [137.50, 162.00]</td>
<td align="center">151.00 [137.00, 161.00]</td>
<td align="center">0.352</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Comparison of patient characteristics at 24 Weeks of treatment between the training and test sets.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Items</th>
<th align="center">Test set (n &#x3d; 227)</th>
<th align="center">Training set (n &#x3d; 453)</th>
<th align="center">
<italic>p</italic>
</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">HBVDNA (log<sub>10</sub>IU/ml) (24W)</td>
<td align="center">0.00 [0.00, 0.00]</td>
<td align="center">0.00 [0.00, 0.00]</td>
<td align="center">N/A</td>
</tr>
<tr>
<td align="center">HBsAg (log<sub>10</sub>IU/ml) (24W)</td>
<td align="center">2.18 [0.42, 3.07]</td>
<td align="center">2.06 [0.44, 3.21]</td>
<td align="center">0.818</td>
</tr>
<tr>
<td align="center">HBeAg(COI) (24W)</td>
<td align="center">0.41 [0.08, 0.84]</td>
<td align="center">0.44 [0.09, 1.00]</td>
<td align="center">0.291</td>
</tr>
<tr>
<td align="center">HBeAb (COI) (24W)</td>
<td align="center">0.21 [0.02, 1.45]</td>
<td align="center">0.10 [0.01, 1.37]</td>
<td align="center">0.248</td>
</tr>
<tr>
<td align="center">HBcAb (COI) (24W)</td>
<td align="center">3.26 [3.05, 3.44]</td>
<td align="center">3.22 [3.01, 3.41]</td>
<td align="center">0.192</td>
</tr>
<tr>
<td align="center">ALT (U/L) (24W)</td>
<td align="center">48.80 [32.80, 78.65]</td>
<td align="center">44.00 [29.00, 66.90]</td>
<td align="center">0.023</td>
</tr>
<tr>
<td align="center">AST (U/L) (24W)</td>
<td align="center">46.20 [34.00, 68.25]</td>
<td align="center">42.20 [31.60, 63.00]</td>
<td align="center">0.048</td>
</tr>
<tr>
<td align="center">Tbil1 (&#x3bc;mol/L) (24W)</td>
<td align="center">13.30 [10.65, 15.97]</td>
<td align="center">12.71 [10.10, 15.97]</td>
<td align="center">0.455</td>
</tr>
<tr>
<td align="center">Dbil1 (&#x3bc;mol/L) (24W)</td>
<td align="center">3.20 [2.51, 4.30]</td>
<td align="center">3.20 [2.50, 4.20]</td>
<td align="center">0.621</td>
</tr>
<tr>
<td align="center">ALP (&#x3bc;mol/L) (24W)</td>
<td align="center">82.50 [71.50, 99.50]</td>
<td align="center">83.40 [69.25, 98.46]</td>
<td align="center">0.904</td>
</tr>
<tr>
<td align="center">GGT (U/L) (24W)</td>
<td align="center">55.00 [35.50, 105.50]</td>
<td align="center">48.00 [30.00, 87.07]</td>
<td align="center">0.013</td>
</tr>
<tr>
<td align="center">ALB (g/L) (24W)</td>
<td align="center">44.00 [42.00, 45.30]</td>
<td align="center">43.90 [42.00, 45.60]</td>
<td align="center">0.94</td>
</tr>
<tr>
<td align="center">GLO (g/L) (24W)</td>
<td align="center">29.00 [26.83, 32.00]</td>
<td align="center">29.00 [26.50, 32.00]</td>
<td align="center">0.771</td>
</tr>
<tr>
<td align="center">NEU(10<sup>9</sup>/L) (24W)</td>
<td align="center">1.58 [1.21, 2.17]</td>
<td align="center">1.64 [1.27, 2.23]</td>
<td align="center">0.242</td>
</tr>
<tr>
<td align="center">PLT (10<sup>9</sup>/L) (24W)</td>
<td align="center">114.00 [91.00, 152.50]</td>
<td align="center">117.00 [94.00, 152.00]</td>
<td align="center">0.649</td>
</tr>
<tr>
<td align="center">Hb(g/L) (24W)</td>
<td align="center">135.00 [122.50, 146.50]</td>
<td align="center">134.00 [121.00, 146.00]</td>
<td align="center">0.671</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Categorical data are presented as frequency (percentage). Normally distributed continuous variables are expressed as mean (standard deviation) and compared using the independent samples t-test. Non-normally distributed variables are summarized as median (interquartile range) and compared using the Mann-Whitney U test.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3-2">
<label>3.2</label>
<title>Variable selection</title>
<p>To identify robust predictors of response to interferon-based therapy, this study employed five distinct machine learning-based feature selection algorithms: Random Forest: The results identified the following ten top-ranking features: HBsAg (24W), HBsAg (0W), HBsAg_desc, ALT (24W), GGT (24W), DBIL (24W), HBcAb_desc, HBeAb (24W), HBeAg_desc, and HBcAb (0W), as detailed in <xref ref-type="sec" rid="s14">Supplementary Table S2</xref>. LASSO Regression: The final model retained the following variables: HBsAg (24W), HBeAb (24W), ALT (24W), TBIL (24W), ALP (24W), Age, and Gender, as detailed in <xref ref-type="sec" rid="s14">Supplementary Table S3</xref>. SVM-RFE: The results identified the following top features: HBsAg (24W), HBsAg (0W), HBsAg_desc, HBeAg_desc, HBcAb (0W), HBeAg(24W), GGT (0W), HBeAb (24W),HBeAb_desc, and HBeAg(0W), as presented in <xref ref-type="sec" rid="s14">Supplementary Table S4</xref>. Elastic Net: The algorithm identified the following key variables: HBsAg (0W), HBsAg (24W), and HBsAg_desc, as detailed in <xref ref-type="sec" rid="s14">Supplementary Table S5</xref>. XGBoost: The top 10 variables were: HBsAg (24W), HBsAg (0W), HBsAg_desc, ALT (24W), DBIL (24W), GGT (24W), HBeAb (24W), ALP (24W), HBcAb (0W), and TBIL (24W), as detailed in <xref ref-type="sec" rid="s14">Supplementary Table S6</xref>.</p>
<p>To enhance model generalizability and clinical interpretability, we defined the optimal predictor set as variables selected by at least two out of the five feature selection algorithms, which yielded 11 key predictors (<xref ref-type="sec" rid="s14">Supplementary Table S7</xref>). This consensus approach is visually summarized in a Venn diagram (<xref ref-type="sec" rid="s14">Supplementary Figure S1</xref>). The final predictors were: ALP (24W), ALT (24W), DBIL (24W), GGT (0W), HBcAb (0W), HBeAb (24W), HBeAg_desc, HBsAg (0W), HBsAg_desc, HBsAg (24W), and TBIL (24W). Collectively, this set captures baseline status, on-treatment levels at mid-therapy, and early kinetic changes, thereby providing multi-dimensional predictive information.</p>
</sec>
<sec id="s3-3">
<label>3.3</label>
<title>Comparative performance of multiple machine learning models</title>
<p>Based on the selected set of 11 key predictors, we constructed 12 distinct machine learning models, including Logistic Regression, LDA, Decision Tree, Random Forest, XGBoost, SVM (RBF), SVM (Linear), k-Nearest Neighbors, Neural Network, Naive Bayes, GLMNet, and CV GLMNet. The performance of all 12 models was rigorously evaluated on both the training and validation sets.The evaluation metrics included the Area Under the Curve (AUC), accuracy, sensitivity, specificity, precision, and F1-score. Results for each metric are reported as the mean values derived from 500 effective bootstrap samples (<xref ref-type="sec" rid="s14">Supplementary Table S8</xref>). The Receiver Operating Characteristic (ROC) curve was used to assess the discriminative ability of the predictive models. These curves were plotted with 1-specificity on the x-axis and sensitivity on the y-axis, illustrating the performance for both the training and validation sets.In the training set, all models except Neural Network (AUC &#x3d; 0.685 &#xb1; 0.168) demonstrated strong performance (AUC &#x3e;0.85), as shown in <xref ref-type="fig" rid="F2">Figure 2A</xref>. A comprehensive analysis of the ROC curves from the test set (<xref ref-type="fig" rid="F2">Figure 2B</xref>) and the multi-metric radar chart (<xref ref-type="fig" rid="F2">Figure 2E</xref>) identified three top-performing models: Random Forest (AUC &#x3d; 0.915 &#xb1; 0.020), XGBoost (AUC &#x3d; 0.891 &#xb1; 0.026), and CV GLMNet (AUC &#x3d; 0.862 &#xb1; 0.029) (<xref ref-type="fig" rid="F2">Figures 2C,D</xref>). In contrast, models such as k-Nearest Neighbors and Neural Network showed marked performance degradation on the validation set, with their ROC curves deviating substantially from the upper-left corner, indicating poorer generalizability.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Evaluation of predictive performance for the twelve machine learning models. <bold>(A)</bold> ROC curves with 95% confidence intervals (shaded areas) for all models on the training set. <bold>(B)</bold> ROC curves for all models on the test set. <bold>(C)</bold> ROC curves for the top three performing models on the training set. <bold>(D)</bold> ROC curves for the top three models on the test set. <bold>(E)</bold> Radar plot comparing six performance metrics across all models on the test set. <bold>(F)</bold> Radar plot highlighting the balanced multi-metric performance of the top three models (ranked by test set AUC). The models are distinguished by color in all panels. The top three models, Random Forest, XGBoost, and CV GLMNet, consistently demonstrated superior and stable discriminative ability across both.</p>
</caption>
<graphic xlink:href="fcell-13-1734654-g002.tif">
<alt-text content-type="machine-generated">Six panels compare machine learning model performance. Panels A-D show ROC curves with True Positive Rate versus False Positive Rate for various models, including CV GLMNet, Decision Tree, and Random Forest. Panels E and F present radar charts comparing metrics like AUC, Specificity, Sensitivity, Accuracy, and F1 Score for different models, highlighting CV GLMNet, Random Forest, and XGBoost.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3-4">
<label>3.4</label>
<title>Analysis of top-performing models</title>
<p>Among the 12 machine learning models evaluated, Random Forest, XGBoost, and CV GLMNet consistently outperformed the others. The discriminative performance of Random Forest on the validation set was as follows: AUC &#x3d; 0.915 &#xb1; 0.020, accuracy &#x3d; 0.887 &#xb1; 0.019, precision &#x3d; 0.853 &#xb1; 0.062, sensitivity &#x3d; 0.707 &#xb1; 0.066, specificity &#x3d; 0.954 &#xb1; 0.021, and F1-score &#x3d; 0.770 &#xb1; 0.042. XGBoost achieved: AUC &#x3d; 0.891 &#xb1; 0.026, accuracy &#x3d; 0.878 &#xb1; 0.020, precision &#x3d; 0.820 &#xb1; 0.064, sensitivity &#x3d; 0.705 &#xb1; 0.063, specificity &#x3d; 0.942 &#xb1; 0.023, and F1-score &#x3d; 0.755 &#xb1; 0.043. CV GLMNet yielded: AUC &#x3d; 0.862 &#xb1; 0.029, accuracy &#x3d; 0.836 &#xb1; 0.063, precision &#x3d; 0.783 &#xb1; 0.089, sensitivity &#x3d; 0.567 &#xb1; 0.271, specificity &#x3d; 0.940 &#xb1; 0.041, and F1-score &#x3d; 0.724 &#xb1; 0.061 (<xref ref-type="fig" rid="F3">Figures 3A,B</xref>).The Precision-Recall (PR) curves, plotted for both training and validation sets, demonstrated that CV GLMNet, Random Forest, and XGBoost all maintained high and stable predictive performance. Their PR curves were positioned close to the upper-right corner and exhibited narrow 95% confidence intervals, indicating a strong and reliable balance between precision and recall (<xref ref-type="fig" rid="F4">Figures 4A&#x2013;D</xref>).Notably, the Random Forest model achieved the highest AUC on the validation set (<xref ref-type="fig" rid="F3">Figures 3A,B</xref>) and showed a balanced performance across accuracy, sensitivity, specificity, and F1-score (<xref ref-type="fig" rid="F2">Figure 2F</xref>). Its performance on the validation set was comparable to that on the training set, with no significant overfitting observed. This demonstrates superior generalizability, establishing Random Forest as the optimal model for predicting HBsAg seroclearance.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Comparative performance evaluation of the twelve machine learning models.<bold>(A)</bold> Bar plot displaying the bootstrap mean estimates of five performance metrics (AUC, Accuracy, F1-Score, Sensitivity, Specificity) for each model on both the training and test sets. <bold>(B)</bold> Heatmap summarizing the model rankings based on bootstrap mean performance on the test set. Color intensity corresponds to the metric score, with darker shades indicating higher values. The top-performing models&#x2014;Random Forest, CV GLMNet, and XGBoost&#x2014;consistently achieve superior and balanced results across most evaluation metrics in the test set.</p>
</caption>
<graphic xlink:href="fcell-13-1734654-g003.tif">
<alt-text content-type="machine-generated">Panel A shows bar charts comparing test and training scores across different machine learning models, evaluated on metrics like AUC, accuracy, and specificity. Panel B is a heat map ranking models such as XGBoost and Random Forest by similar metrics, using a color gradient to indicate performance rank.</alt-text>
</graphic>
</fig>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Precision-Recall analysis of machine learning models for HBsAg seroclearance prediction. <bold>(A)</bold> Precision-Recall curves with 95% confidence intervals for all twelve models on the training set. <bold>(B)</bold> Precision-Recall curves with 95% confidence intervals for all models on the test set. <bold>(C)</bold> Comparative Precision-Recall analysis of the top three performing models (Random Forest, XGBoost, and CV GLMNet) on the training set. <bold>(D)</bold> Comparative Precision-Recall analysis of the top three performing models (Random Forest, XGBoost, and CV GLMNet) on the test set.</p>
</caption>
<graphic xlink:href="fcell-13-1734654-g004.tif">
<alt-text content-type="machine-generated">Four precision-recall curves labeled A, B, C, and D compare multiple models. Graphs A and B showcase various models including Random Forest, Decision Tree, and more. Graphs C and D focus on CV GLMNet, Random Forest, and XGBoost models. Each graph displays precision on the y-axis and recall on the x-axis, demonstrating model performance variations.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3-5">
<label>3.5</label>
<title>Clinical utility assessment of the machine learning models</title>
<p>This study evaluated the clinical utility of the twelve machine learning models using Decision Curve Analysis (DCA), with 95% confidence intervals calculated to reflect the reproducibility of the results. As summarized in <xref ref-type="table" rid="T5">Table 5</xref>, the threshold corresponding to the maximum net benefit was 0.01 for all models. The Random Forest model demonstrated the highest Average Net Benefit Index (ANDI) value of 0.547 and the widest range of positive threshold probabilities (0.01&#x2013;0.99). These results indicate its high clinical application value, supporting its use in guiding decisions on whether to continue PEG-IFN&#x3b1;-2b therapy in CHB patients.Decision curves were plotted to illustrate the relationship between decision thresholds (ranging from 0.00 to 1.00) and net benefit (ranging from &#x2212;0.2&#x2013;0.6) across the machine learning models. Over a wide range of threshold probabilities, the net benefit of the three top-performing models&#x2014;Random Forest, XGBoost, and CV GLMNet&#x2014;was consistently superior to that of the other models and remained higher than the &#x201c;treat-all&#x201d; or &#x201c;treat-none&#x201d; strategies, demonstrating robust clinical discriminative ability. Among them, the Random Forest model exhibited greater adaptability to threshold variations, as evidenced by its smoother curve and stable performance across different intervention scenarios. This suggests that Random Forest is particularly well-suited for balancing benefits and risks in clinical decision-making regarding antiviral therapy for hepatitis B (<xref ref-type="fig" rid="F5">Figures 5A,B</xref>).</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Decision curve analysis (DCA) of the twelve machine learning models.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Model</th>
<th align="center">ANDI</th>
<th align="center">Max_Net_Benefit</th>
<th align="center">Threshold_at_Max</th>
<th align="center">Positive_Threshold_Range</th>
<th align="center">N_Valid_Bootstraps</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Logistic regression</td>
<td align="center">0.4732</td>
<td align="center">0.7262</td>
<td align="center">0.01</td>
<td align="center">0.01&#x2013;0.93</td>
<td align="center">500</td>
</tr>
<tr>
<td align="left">LDA</td>
<td align="center">0.4712</td>
<td align="center">0.7264</td>
<td align="center">0.01</td>
<td align="center">0.01&#x2013;0.93</td>
<td align="center">500</td>
</tr>
<tr>
<td align="left">Decision tree</td>
<td align="center">0.491</td>
<td align="center">0.7221</td>
<td align="center">0.01</td>
<td align="center">0.01&#x2013;0.90</td>
<td align="center">500</td>
</tr>
<tr>
<td align="left">Random forest</td>
<td align="center">0.547</td>
<td align="center">0.7271</td>
<td align="center">0.01</td>
<td align="center">0.01&#x2013;0.99</td>
<td align="center">500</td>
</tr>
<tr>
<td align="left">XGBoost</td>
<td align="center">0.4528</td>
<td align="center">0.7238</td>
<td align="center">0.01</td>
<td align="center">0.01&#x2013;0.91</td>
<td align="center">500</td>
</tr>
<tr>
<td align="left">SVM (RBF)</td>
<td align="center">0.4881</td>
<td align="center">0.7271</td>
<td align="center">0.01</td>
<td align="center">0.01&#x2013;0.94</td>
<td align="center">500</td>
</tr>
<tr>
<td align="left">SVM (linear)</td>
<td align="center">0.4772</td>
<td align="center">0.7268</td>
<td align="center">0.01</td>
<td align="center">0.01&#x2013;0.94</td>
<td align="center">500</td>
</tr>
<tr>
<td align="left">K-nearest neighbors</td>
<td align="center">0.3568</td>
<td align="center">0.7166</td>
<td align="center">0.01</td>
<td align="center">0.01&#x2013;0.89</td>
<td align="center">500</td>
</tr>
<tr>
<td align="left">Neural network</td>
<td align="center">0.4116</td>
<td align="center">0.7265</td>
<td align="center">0.01</td>
<td align="center">0.01&#x2013;0.88</td>
<td align="center">500</td>
</tr>
<tr>
<td align="left">Naive bayes</td>
<td align="center">0.3954</td>
<td align="center">0.7173</td>
<td align="center">0.01</td>
<td align="center">0.01&#x2013;0.90</td>
<td align="center">500</td>
</tr>
<tr>
<td align="left">GLMNet</td>
<td align="center">0.4799</td>
<td align="center">0.7267</td>
<td align="center">0.01</td>
<td align="center">0.01&#x2013;0.93</td>
<td align="center">500</td>
</tr>
<tr>
<td align="left">CV GLMNet</td>
<td align="center">0.4755</td>
<td align="center">0.7271</td>
<td align="center">0.01</td>
<td align="center">0.01&#x2013;0.93</td>
<td align="center">500</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Model interpretability and clinical utility assessment. <bold>(A)</bold> Net benefit of all twelve models across a range of threshold probabilities, with 95% confidence intervals derived from bootstrap sampling. <bold>(B)</bold> Net benefit of the top three performing models (Random Forest, XGBoost, and CV GLMNet). The Random Forest model demonstrated superior performance. <bold>(C)</bold> Permutation Importance plot showing the mean decrease in model performance (1 - AUC) after randomly shuffling each feature. <bold>(D)</bold> SHAP summary plot displaying the magnitude and direction of feature contributions for the first 10 samples.</p>
</caption>
<graphic xlink:href="fcell-13-1734654-g005.tif">
<alt-text content-type="machine-generated">Panel A and B show decision curve analysis graphs comparing various models, including random forest and XGBoost, displaying net benefit versus threshold probability. Panel C is a bar chart of one minus AUC loss after permutation for different biomarkers using random forest. Panel D is a bar chart illustrating the contribution of each biomarker, with color indicating positive or negative contribution, using random forest.</alt-text>
</graphic>
</fig>
<p>To quantify the contribution of each predictor to the performance of the Random Forest model, we employed both the Permutation Importance method and SHAP analysis to assess variable importance (<xref ref-type="fig" rid="F5">Figures 5C,D</xref>). The Permutation Importance results revealed significant differences in the importance of the 11 predictive features. HBsAg (24W) was identified as the most influential feature for model predictions, indicating that shuffling its values led to the greatest decrease in the model&#x2019;s AUC. This establishes it as a core variable for maintaining the model&#x2019;s discriminative ability. The following five features also provided substantial support for model performance: HBeAb (24W), DBIL (24W), GGT (0W), HBsAg_desc, and HBsAg (0W).Based on the SHAP analysis, using the shapviz package in R to compute Shapley values for features, a bar plot was generated to illustrate the contribution of the top 10 features to the model&#x2019;s predictions in the Random Forest model. The ranking of feature contributions was as follows: HBsAg (24W) &#x3e; HBeAb (24W) &#x3e; HBsAg_desc &#x3e; HBsAg (0W) &#x3e; GGT (0W) &#x3e; ALT (24W) &#x3e; HBeAg_desc &#x3e; DBIL (24W) &#x3e; HBcAb (0W) &#x3e; ALP (24W). Notably, HBcAb (0W) and ALP (24W) exhibited a negative predictive effect on HBsAg seroclearance, while the remaining variables showed positive predictive effects. Integrating the findings from both analytical methods, we conclude that HBsAg (24W), HBeAb (24W), GGT (0W), HBsAg_desc, and HBsAg (0W) are the key variables in the Random Forest model.</p>
</sec>
<sec id="s3-6">
<label>3.6</label>
<title>Predictive performance of individual variables</title>
<p>To compare the predictive performance between the machine learning models and individual variables, we conducted a univariate analysis of the 11 predictor variables on the test set, as detailed in <xref ref-type="sec" rid="s14">Supplementary Table S9</xref>. ROC curves were plotted for the top ten individual features (<xref ref-type="fig" rid="F6">Figures 6A,B</xref>). HBsAg (24W) was identified as the single most predictive feature, achieving an AUC of 0.866 &#xb1; 0.030, accuracy of 0.850 &#xb1; 0.025, precision of 0.723 &#xb1; 0.071, sensitivity of 0.736 &#xb1; 0.052, specificity of 0.893 &#xb1; 0.031, and an F1-score of 0.727 &#xb1; 0.046 on the validation set. Other valuable single features, in descending order of AUC, were HBsAg (0W) (AUC &#x3d; 0.757 &#xb1; 0.032) and HBsAg_desc (AUC &#x3d; 0.685 &#xb1; 0.036) (<xref ref-type="fig" rid="F6">Figures 6C,D</xref>). In contrast, most liver function indicators&#x2014;such as GGT (0W), TBIL (24W), ALP (24W), ALT (24W), and DBIL (24W)&#x2014;demonstrated low predictive utility (AUC &#x3c;0.6), indicating their limited value when used alone.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Univariate predictive performance of individual variables for HBsAg seroclearance. <bold>(A)</bold> Receiver operating characteristic (ROC) curves for the top 10 individual predictors on the training set, with area under the curve (AUC) values indicated. <bold>(B)</bold> ROC curves for the top 10 individual predictors on the test set. <bold>(C)</bold> ROC curves of the top three univariate predictors (HBsAg (24W), HBsAg (0W), and HBsAg_desc) on the training set. <bold>(D)</bold> ROC curves of the top three univariate predictors on the test set.</p>
</caption>
<graphic xlink:href="fcell-13-1734654-g006.tif">
<alt-text content-type="machine-generated">Four panels labeled A, B, C, and D display ROC curves comparing different features. Panels A and B show multiple colored curves labeled with features like ALT(2W), HBeAg(4W), and HBsAg(24W). Panels C and D focus on HBsAg curves at different weeks with AUC values: HBsAg(0W) green curve, blue HBsAg_desc, and red HBsAg(24W) with the highest AUC of 0.886. Each graph has axes labeled with sensitivity, specificity, and false positive rate.</alt-text>
</graphic>
</fig>
<p>The univariate analysis in the test set was ranked by AUC in descending order (<xref ref-type="fig" rid="F7">Figure 7A</xref>). Precision&#x2013;Recall (PR) curves of the top 10 individual variables are plotted with Recall on the x-axis and Precision on the y-axis, illustrating the precision of each variable at different recall levels (<xref ref-type="fig" rid="F7">Figure 7B</xref>). The PR curve for HBsAg (24W) was positioned closest to the upper-right corner, confirming its status as the best-performing single-variable predictor. However, this curve also exhibited noticeable fluctuations, reflecting its sensitivity to variations in a single feature. A sharp decline in precision was frequently observed at medium recall levels (0.4&#x2013;0.6). In contrast, the Random Forest model demonstrated markedly superior and stable performance: its precision remained consistently high (mostly above 0.75) across the entire recall range (0&#x2013;1) and was maintained at a high level even at high recall values (0.7&#x2013;0.8), resulting in a much smoother and more robust curve. The Random Forest model, by integrating multiple predictive variables, accurately identifies patients with a high likelihood of achieving HBsAg seroclearance. This precise stratification helps prevent two critical clinical scenarios: firstly, avoiding the premature discontinuation of therapy in potential responders due to misclassification as &#x201c;non-responders,&#x201d; which could otherwise lead to a missed cure opportunity; and secondly, preventing the continuation of ineffective treatment in true non-responders misclassified as &#x201c;responders&#x201d; thereby reducing unnecessary PEG-IFN&#x3b1;-2b-related side effects and economic burdens (<xref ref-type="table" rid="T6">Table 6</xref>).</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Performance assessment of individual predictors for HBsAg seroclearance. <bold>(A)</bold> Bar plot displaying the test set AUC values (mean &#xb1; standard deviation) of the univariate predictors, ranked in descending order. <bold>(B)</bold> Precision-Recall curves for the top 10 univariate predictors, illustrating their precision across the full range of recall values.</p>
</caption>
<graphic xlink:href="fcell-13-1734654-g007.tif">
<alt-text content-type="machine-generated">Chart A is a horizontal bar chart indicating the Area Under the Curve (AUC) values for different features, with error bars. Chart B is a precision-recall curve for various features, showing the trade-off between precision and recall for each feature.</alt-text>
</graphic>
</fig>
<table-wrap id="T6" position="float">
<label>TABLE 6</label>
<caption>
<p>Performance comparison between the random forest model and the univariate model (HBsAg at 24 Weeks).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Model_Type</th>
<th align="center">Name</th>
<th align="center">AUC</th>
<th align="center">Accuracy</th>
<th align="center">F1_Score</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Multivariable</td>
<td align="center">Random forest</td>
<td align="center">0.915 &#xb1; 0.020</td>
<td align="center">0.887 &#xb1; 0.019</td>
<td align="center">0.770 &#xb1; 0.042</td>
</tr>
<tr>
<td align="center">Univariable</td>
<td align="center">HBsAg (24W)</td>
<td align="center">0.866 &#xb1; 0.030</td>
<td align="center">0.850 &#xb1; 0.025</td>
<td align="center">0.727 &#xb1; 0.046</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<label>4</label>
<title>Discussion</title>
<p>This study integrated dynamic serological indicators from CHB patients with machine learning algorithms to develop and validate a predictive model for HBsAg seroclearance following PEG-IFN&#x3b1;-2b therapy. The main findings are as follows:</p>
<sec id="s4-1">
<label>4.1</label>
<title>Clinical significance of key predictor variables</title>
<p>In this study, following data cleaning and the exclusion of patients with excessive missing data, the final dataset comprised 31 predictor variables spanning demographic and clinical characteristics, virological markers, liver function indices, and routine blood test parameters. These variables were subsequently processed using five distinct machine learning feature selection methods, which identified 11 optimal predictors. These 11 key variables were used to construct 12 machine learning models. Among them, the Random Forest model demonstrated the best performance (AUC &#x3d; 0.915 &#xb1; 0.020) and surpassed the predictive capability of any single variable. Through Permutation Importance and SHAP analysis, the following variables were identified as the most influential within the Random Forest model: HBsAg (24W), HBeAb (24W), GGT (0W), HBsAg_desc, and HBsAg (0W).</p>
<p>HBsAg (24W) emerged as the most pivotal predictor in this study, achieving an AUC of 0.866 in the univariate analysis on the validation set and consistently ranking as the primary feature across multiple machine learning models. The quantitative HBsAg level serves as a key surrogate marker for the transcriptional activity of hepatitis B virus cccDNA. Both a lower baseline HBsAg level and a lower level at week 24 are strongly associated with a higher probability of HBsAg seroclearance at week 48 of interferon-based therapy (<xref ref-type="bibr" rid="B41">Tan et al., 2025</xref>; <xref ref-type="bibr" rid="B42">Wen et al., 2024</xref>). Furthermore, the magnitude of HBsAg decline or its absolute value at week 24 has been well-established as a robust predictor of response to interferon treatment (<xref ref-type="bibr" rid="B23">Jiang et al., 2024</xref>). Furthermore, incorporating the dynamic change in HBsAg (log<sub>10</sub>IU/mL) from baseline to week 24 further enhanced the model&#x2019;s performance, underscoring that kinetic indicators provide a more reflective measure of treatment response trends than single time-point measurements (<xref ref-type="bibr" rid="B33">Reijnders et al., 2011</xref>).</p>
<p>During antiviral therapy, seroconversion of HBeAg at week 24&#x2014;defined as the loss of HBeAg accompanied by the appearance of HBeAb&#x2014;serves as a powerful predictor of favorable long-term outcomes. This serological change signifies the effective activation of the patient&#x2019;s immune system, enabling it to better recognize and clear HBV-infected hepatocytes. A robust and effective cellular immune response is a prerequisite for achieving HBsAg seroclearance (<xref ref-type="bibr" rid="B29">Liaw et al., 2012</xref>).</p>
<p>This study identified a significant association between baseline GGT levels and HBsAg seroclearance. In prior research, low baseline GGT has been established as an independent predictor of sustained virological response in chronic hepatitis C treatment. A lower level of GGT indicates a significant reduction in liver inflammation and bile stasis, suggesting a relative preservation of liver cell function, which may better support the antiviral immune response induced by the combination of PEG-IFN and ribavirin (<xref ref-type="bibr" rid="B1">Akuta et al., 2007</xref>; <xref ref-type="bibr" rid="B10">Dogan et al., 2014</xref>). Furthermore, serum GGT has been proposed as a potential biomarker for predicting the activation of antiviral immunity and HBeAg seroconversion in HBeAg-positive chronic hepatitis B patients receiving nucleos(t)ide analogue (NAs) therapy (<xref ref-type="bibr" rid="B17">Huang, 2015</xref>). However, the role of GGT levels in predicting outcomes specifically for CHB patients treated with PEG-IFN&#x3b1;-2b requires further investigation and validation.</p>
</sec>
<sec id="s4-2">
<label>4.2</label>
<title>Superior performance of the machine learning model</title>
<p>In this study, the Random Forest (RF) model demonstrated superior performance. RF, an ensemble learning algorithm introduced by Breiman in 2001, operates on the core principle of &#x201c;integrating votes from multiple decision trees&#x201d; to model and predict complex data (<xref ref-type="bibr" rid="B3">Breiman, 2001</xref>). Its advantages lie in its capability to handle non-linear relationships and feature interactions, making it particularly suitable for modeling the complex associations between serological markers and treatment response (<xref ref-type="bibr" rid="B22">Jiang et al., 2009</xref>). Furthermore, its robustness to outliers and missing values reduces the impact of clinical data quality on model performance (<xref ref-type="bibr" rid="B24">Kokla et al., 2019</xref>). Permutation Importance, a model-agnostic tool for assessing feature significance, evaluates the importance of a variable by randomly shuffling its values in the validation dataset and observing the resultant decline in model performance metrics (e.g., accuracy, AUC). A greater decrease in performance indicates a higher importance of the feature to the model&#x2019;s predictions. This method does not rely on the internal structure or parameters of any specific model, directly quantifying the feature&#x2019;s contribution to the actual predictive performance (<xref ref-type="bibr" rid="B12">Fisher et al., 2019</xref>). SHAP values quantify the contribution of each feature to individual predictions, thereby assisting clinicians in understanding the model&#x2019;s decision-making process and addressing the challenge of deploying traditional &#x201c;black-box&#x201d; models in clinical practice. By interpreting feature contributions through SHAP analysis, the model&#x2019;s clinical interpretability is significantly enhanced (<xref ref-type="bibr" rid="B31">Lundberg and Lee, 2017</xref>; <xref ref-type="bibr" rid="B37">Shu et al., 2025</xref>).The combination of Permutation Importance and SHAP analysis provides complementary insights&#x2014;from global feature importance to local explanations&#x2014;consistently identifying HBsAg (24W) as the core predictor, with HBeAb (24W), HBsAg_desc, HBsAg (0W), and GGT (0W) as important supporting variables.For future clinical translation, a user-friendly predictive tool (such as a nomogram or a mobile application) could be developed to transform the Random Forest model into an accessible decision-support instrument for clinicians. Furthermore, prospective interventional studies are warranted to validate whether &#x201c;RF model-guided therapy for CHB&#x201d; can ultimately improve the rate of HBsAg seroclearance and reduce overall treatment costs.</p>
</sec>
<sec id="s4-3">
<label>4.3</label>
<title>Clinical application value of the machine learning model</title>
<p>The Random Forest model developed in this study can predict the likelihood of eventual HBsAg seroclearance in CHB patients as early as 24 weeks after initiating PEG-IFN&#x3b1;-2b therapy. Its clinical application includes several key aspects: For patients predicted as &#x201c;non-achievers,&#x201d; clinicians can promptly adjust the treatment strategy (e.g., switching to nucleos(t)ide analogues), thereby avoiding ineffective therapy and unnecessary drug-related adverse events. For those predicted as &#x201c;achievers&#x201d;, the positive prediction can reinforce treatment confidence and improve adherence. Furthermore, since all required variables (such as HBsAg and GGT) are routinely measured in standard clinical practice, the model can be implemented without additional testing, facilitating its widespread adoption.</p>
</sec>
<sec id="s4-4">
<label>4.4</label>
<title>Clinical translation and future directions</title>
<p>To facilitate the clinical translation of our predictive model, we have outlined a clear roadmap for subsequent research. First, we will precisely calibrate the decision thresholds for predicted probabilities using a prospective cohort to establish a risk stratification framework categorizing patients as &#x201c;high-, intermediate-, and low-probability responders.&#x201d; Based on this, a user-friendly clinical decision support tool will be developed to enable instant input of patient indicators and automatic output of stratification-based recommendations. Ultimately, a multicenter randomized controlled trial will be conducted to validate the effectiveness of this decision-making workflow, comparing the HBsAg seroclearance rates and medical costs between the &#x201c;model-guided strategy&#x201d; group and the &#x201c;standard management&#x201d; group. This systematic effort aims to transition our model from prediction to clinical practice.</p>
</sec>
</sec>
<sec id="s5">
<label>5</label>
<title>Study limitations</title>
<p>However, this study has several limitations. First, the predictive model did not incorporate several potentially significant variables, including HBV genotype, HBV RNA, and host genetic polymorphisms (e.g., IL28B genotype). Previous research has established that patients with HBV genotype B exhibit a more pronounced decline in HBsAg and less rebound during interferon therapy, suggesting a more sustained immune response (<xref ref-type="bibr" rid="B4">Brunetto et al., 2009</xref>); In contrast, patients with HBV genotype D demonstrate a lower probability of achieving a sustained virological response, regardless of baseline ALT levels or HBV DNA load, identifying genotype D as an independent predictor of poor treatment outcome (<xref ref-type="bibr" rid="B6">Buster et al., 2009</xref>).Furthermore, small interfering RNA (siRNA) therapies targeting HBV RNA represent a promising strategy for achieving functional cure, with the substantial HBsAg reduction they induce being a key predictor of subsequent HBsAg seroclearance (<xref ref-type="bibr" rid="B45">Yuen et al., 2024</xref>). Additionally, IL28B gene polymorphisms have been significantly associated with HBsAg clearance (OR &#x3d; 15.534, 95% CI: 1.998&#x2013;120.777, P &#x3c; 0.001) (<xref ref-type="bibr" rid="B14">Geng et al., 2024</xref>; <xref ref-type="bibr" rid="B36">Seto et al., 2013</xref>).Other functional biomarkers, including miR-548c-3p (<xref ref-type="bibr" rid="B30">Lin et al., 2023</xref>)and CXCL13 (<xref ref-type="bibr" rid="B32">Luo et al., 2023</xref>), have also been linked to HBsAg clearance. Future studies should incorporate these features to further refine the model&#x2019;s accuracy; Second, the outcome measure was limited to &#x201c;HBsAg seroclearance at 48 weeks of treatment&#x201d; and was not linked to long-term clinical outcomes (e.g., the 5-year risk of hepatocellular carcinoma). Extending the follow-up period is necessary to comprehensively assess the prognostic implications.Furthermore, the model was only internally validated and lacked an independent external validation cohort. Both the training and validation data were sourced from the OASIS project, which may introduce data bias or overfitting. Consequently, the model&#x2019;s generalizability to other independent cohorts (e.g., from different regions or using different testing platforms) and specific subgroups (such as patients with HBV genotype D) remains unverified, potentially affecting the reliability of its clinical application.</p>
</sec>
<sec sec-type="conclusion" id="s6">
<label>6</label>
<title>Conclusion</title>
<p>The Random Forest model, constructed based on serological data from CHB patients, effectively predicts the HBsAg seroclearance rate at 48 weeks of PEG-IFN&#x3b1;-2b therapy. Key predictive variables identified include HBsAg (24W), HBeAb (24W), GGT (0W), HBsAg_desc, and HBsAg (0W). The model demonstrates strong discriminative ability and clinical utility, providing valuable data-driven support for clinicians to develop individualized interferon treatment strategies.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s7">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec sec-type="ethics-statement" id="s8">
<title>Ethics statement</title>
<p>The studies involving humans were approved by This study was conducted in accordance with the Declaration of Helsinki and received ethical approval from the Institutional Review Board of Shanghai East Hospital, Tongji University (Approval Number: [2020] Pre-review No. (157)). The studies were conducted in accordance with the local legislation and institutional requirements. Written informed consent for participation was not required from the participants or the participants&#x2019; legal guardians/next of kin in accordance with the national legislation and institutional requirements.</p>
</sec>
<sec sec-type="author-contributions" id="s9">
<title>Author contributions</title>
<p>NK: Writing &#x2013; original draft, Formal Analysis, Writing &#x2013; review and editing. KW: Writing &#x2013; review and editing, Supervision, Conceptualization. YW: Supervision, Writing &#x2013; review and editing. SL: Writing &#x2013; review and editing, Visualization, Software. LZ: Resources, Visualization, Software, Writing &#x2013; review and editing. TW: Visualization, Software, Writing &#x2013; review and editing, Resources. ZT: Validation, Supervision, Conceptualization, Writing &#x2013; review and editing. LQ: Funding acquisition, Writing &#x2013; review and editing, Validation.</p>
</sec>
<ack>
<title>Acknowledgements</title>
<p>We acknowledged all the participating centers of OASIS project and their staff for their invaluable efforts in patients enrollment, follow-up and data collection for this study. We also thanked the members in NMCID Liver Diseases Research Group for their contribution in the data cleaning and quality control work.</p>
</ack>
<sec sec-type="COI-statement" id="s11">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s12">
<title>Generative AI statement</title>
<p>The authors declare that no Generative AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s13">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="s14">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fcell.2025.1734654/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fcell.2025.1734654/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Image1.tif" id="SM1" mimetype="application/tif" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table1.doc" id="SM2" mimetype="application/doc" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<fn-group>
<fn fn-type="custom" custom-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1066418/overview">Mingjie Wang</ext-link>, Shanghai Jiao Tong University, China</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1314585/overview">Chenghai Liu</ext-link>, Shanghai University of Traditional Chinese Medicine, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1270099/overview">Xiaohua Chen</ext-link>, Shanghai Jiao Tong University, China</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Akuta</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Suzuki</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Kawamura</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yatsuji</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Sezaki</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Suzuki</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2007</year>). <article-title>Predictive factors of early and sustained responses to peginterferon plus ribavirin combination therapy in Japanese patients infected with hepatitis c virus genotype 1b: amino acid substitutions in the core region and low-density lipoprotein cholesterol levels</article-title>. <source>J. Hepatol.</source> <volume>46</volume> (<issue>3</issue>), <fpage>403</fpage>&#x2013;<lpage>410</lpage>. <pub-id pub-id-type="doi">10.1016/j.jhep.2006.09.019</pub-id>
<pub-id pub-id-type="pmid">17126448</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Asnicar</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Thomas</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Passerini</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Waldron</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Segata</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Machine learning for microbiologists</article-title>. <source>Nat. Rev. Microbiol.</source> <volume>22</volume> (<issue>4</issue>), <fpage>191</fpage>&#x2013;<lpage>205</lpage>. <pub-id pub-id-type="doi">10.1038/s41579-023-00984-1</pub-id>
<pub-id pub-id-type="pmid">37968359</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Breiman</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Random forests</article-title>. <source>Mach. Learn.</source> <volume>45</volume> (<issue>1</issue>), <fpage>5</fpage>&#x2013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1023/a:1010933404324</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brunetto</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>Moriconi</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Bonino</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Lau</surname>
<given-names>G. K. K.</given-names>
</name>
<name>
<surname>Farci</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Yurdaydin</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2009</year>). <article-title>Hepatitis b virus surface antigen levels: a guide to sustained response to peginterferon alfa&#x2010;2a in hbeag&#x2010;negative chronic hepatitis b</article-title>. <source>Hepatol. Baltim. Md</source> <volume>49</volume> (<issue>4</issue>), <fpage>1141</fpage>&#x2013;<lpage>1150</lpage>. <pub-id pub-id-type="doi">10.1002/hep.22760</pub-id>
<pub-id pub-id-type="pmid">19338056</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Burki</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Who&#x27;s 2024 global hepatitis report</article-title>. <source>Lancet Infect. Dis.</source> <volume>24</volume> (<issue>6</issue>), <fpage>e362</fpage>&#x2013;<lpage>e363</lpage>. <pub-id pub-id-type="doi">10.1016/S1473-3099(24)00307-4</pub-id>
<pub-id pub-id-type="pmid">38795729</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Buster</surname>
<given-names>E. H. C. J.</given-names>
</name>
<name>
<surname>Hansen</surname>
<given-names>B. E.</given-names>
</name>
<name>
<surname>Lau</surname>
<given-names>G. K. K.</given-names>
</name>
<name>
<surname>Piratvisuth</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zeuzem</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Steyerberg</surname>
<given-names>E. W.</given-names>
</name>
<etal/>
</person-group> (<year>2009</year>). <article-title>Factors that predict response of patients with hepatitis b e antigen&#x2013;positive chronic hepatitis b to peginterferon-alfa</article-title>. <source>Gastroenterology</source> <volume>137</volume> (<issue>6</issue>), <fpage>2002</fpage>&#x2013;<lpage>2009</lpage>. <pub-id pub-id-type="doi">10.1053/j.gastro.2009.08.061</pub-id>
<pub-id pub-id-type="pmid">19737568</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Guestrin</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Xgboost: a scalable tree boosting system</article-title>,&#x201d; in <source>Paper presented at the</source>. <publisher-loc>New York, NY, USA</publisher-loc>.</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Lei</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Data-driven multinomial random forest: a new random forest variant with strong consistency</article-title>. <source>J. Big Data</source> <volume>11</volume> (<issue>1</issue>), <fpage>34</fpage>. <pub-id pub-id-type="doi">10.1186/s40537-023-00874-6</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Deo</surname>
<given-names>R. C.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Machine learning in medicine</article-title>. <source>Circulation</source> <volume>132</volume> (<issue>20</issue>), <fpage>1920</fpage>&#x2013;<lpage>1930</lpage>. <pub-id pub-id-type="doi">10.1161/CIRCULATIONAHA.115.001593</pub-id>
<pub-id pub-id-type="pmid">26572668</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dogan</surname>
<given-names>U. B.</given-names>
</name>
<name>
<surname>Akin</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Yalaki</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>A low serum &#x3b3;-glutamyltransferase level predicts a sustained virological response in patients with chronic hepatitis c genotype 1</article-title>. <source>Gut Liver</source> <volume>8</volume> (<issue>1</issue>), <fpage>113</fpage>&#x2013;<lpage>115</lpage>. <pub-id pub-id-type="doi">10.5009/gnl.2014.8.1.113</pub-id>
<pub-id pub-id-type="pmid">24516710</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fan</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xia</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Construction and validation of prognostic models in critically ill patients with sepsis-associated acute kidney injury: interpretable machine learning approach</article-title>. <source>J. Transl. Med.</source> <volume>21</volume> (<issue>1</issue>), <fpage>406</fpage>. <pub-id pub-id-type="doi">10.1186/s12967-023-04205-4</pub-id>
<pub-id pub-id-type="pmid">37349774</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fisher</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Rudin</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Dominici</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>All models are wrong, but many are useful: learning a variable&#x27;s importance by studying an entire class of prediction models simultaneously</article-title>. <source>J. Mach. Learn. Res.</source> <volume>20</volume>, <fpage>177</fpage>.<pub-id pub-id-type="pmid">34335110</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Friedman</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hastie</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Tibshirani</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Regularization paths for generalized linear models <italic>via</italic> coordinate descent</article-title>. <source>J. Stat. Softw.</source> <volume>33</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.18637/jss.v033.i01</pub-id>
<pub-id pub-id-type="pmid">20808728</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Geng</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Prediction model for the clearance of hepatitis b surface antigen in patients with chronic hepatitis b before interferon therapy: a prospective case&#x2013;control study</article-title>. <source>Diagnostics</source> <volume>14</volume> (<issue>1</issue>), <fpage>118</fpage>. <pub-id pub-id-type="doi">10.3390/diagnostics14010118</pub-id>
<pub-id pub-id-type="pmid">38201427</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hong</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Fusheng</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Taisheng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Yameng</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Xiaoyuan</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Jidong</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Guidelines for the prevention and treatment of chronic hepatitis b (version 2022)</article-title>. <source>Chin. J. Hepatol.</source> <volume>30</volume> (<issue>12</issue>), <fpage>1309</fpage>&#x2013;<lpage>1331</lpage>. <pub-id pub-id-type="doi">10.3760/cma.j.cn501113-20221204-00607</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hosseini Sarkhosh</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Esteghamati</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Hemmatabadi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Daraei</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Predicting diabetic nephropathy in type 2 diabetic patients using machine learning algorithms</article-title>. <source>J. Diabetes and Metabolic Disord.</source> <volume>21</volume> (<issue>2</issue>), <fpage>1433</fpage>&#x2013;<lpage>1441</lpage>. <pub-id pub-id-type="doi">10.1007/s40200-022-01076-2</pub-id>
<pub-id pub-id-type="pmid">36404838</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>C. C.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xia</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Xiong</surname>
<given-names>Y. L.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Association of serum gamma-glutamyl transferase with treatment outcome in chronic hepatitis b patients</article-title>. <source>World J. Gastroenterol.</source> <volume>21</volume> (<issue>34</issue>), <fpage>9957</fpage>&#x2013;<lpage>9965</lpage>. <pub-id pub-id-type="doi">10.3748/wjg.v21.i34.9957</pub-id>
<pub-id pub-id-type="pmid">26379400</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Feature clustering based support vector machine recursive feature elimination for gene selection</article-title>. <source>Appl. Intell.</source> <volume>48</volume> (<issue>3</issue>), <fpage>594</fpage>&#x2013;<lpage>607</lpage>. <pub-id pub-id-type="doi">10.1007/s10489-017-0992-2</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hughey</surname>
<given-names>J. J.</given-names>
</name>
<name>
<surname>Butte</surname>
<given-names>A. J.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Robust meta-analysis of gene expression using the elastic net</article-title>. <source>Nucleic. Acids. Res.</source> <volume>43</volume> (<issue>12</issue>), <fpage>e79</fpage>. <pub-id pub-id-type="doi">10.1093/nar/gkv229</pub-id>
<pub-id pub-id-type="pmid">25829177</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hui</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Fuzhen</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Liping</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Guomin</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Jianhua</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>New progress in hbv control and the Cascade of health care for people living with hbv in China: evidence from the fourth national serological survey, 2020</article-title>. <source>Lancet Reg. Health-W. Pac.</source> <volume>51</volume>, <fpage>101193</fpage>. <pub-id pub-id-type="doi">10.1016/j.lanwpc.2024.101193</pub-id>
<pub-id pub-id-type="pmid">39315090</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jian</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Fuzhen</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhongdan</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Jinlei</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Ailing</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Fanghui</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>Research advances in the disease burden of viral hepatitis in China</article-title>. <source>J. Clin. Hepatol.</source> <volume>41</volume> (<issue>2</issue>), <fpage>221</fpage>&#x2013;<lpage>227</lpage>. <pub-id pub-id-type="doi">10.12449/JCH250205</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>A random forest approach to the detection of epistatic interactions in case-control studies</article-title>. <source>BMC Bioinforma.</source> <volume>10</volume> (<issue>Suppl. 1</issue>), <fpage>S65</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-10-S1-S65</pub-id>
<pub-id pub-id-type="pmid">19208169</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yin</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Predictors of hbsag seroclearance in patients with chronic hbv infection treated with pegylated interferon-&#x3b1;: a systematic review and meta-analysis</article-title>. <source>Hepatol. Int.</source> <volume>18</volume> (<issue>3</issue>), <fpage>892</fpage>&#x2013;<lpage>903</lpage>. <pub-id pub-id-type="doi">10.1007/s12072-024-10648-8</pub-id>
<pub-id pub-id-type="pmid">38461186</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kokla</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Virtanen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kolehmainen</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Paananen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hanhineva</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Random forest-based imputation outperforms other methods for imputing lc-ms metabolomics data: a comparative study</article-title>. <source>BMC Bioinforma.</source> <volume>20</volume> (<issue>1</issue>), <fpage>492</fpage>. <pub-id pub-id-type="doi">10.1186/s12859-019-3110-0</pub-id>
<pub-id pub-id-type="pmid">31601178</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kotsiantis</surname>
<given-names>S. B.</given-names>
</name>
<name>
<surname>Zaharakis</surname>
<given-names>I. D.</given-names>
</name>
<name>
<surname>Pintelas</surname>
<given-names>P. E.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Machine learning: a review of classification and combining techniques</article-title>. <source>Artif. Intell. Rev.</source> <volume>26</volume> (<issue>3</issue>), <fpage>159</fpage>&#x2013;<lpage>190</lpage>. <pub-id pub-id-type="doi">10.1007/s10462-007-9052-3</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname>
<given-names>H. W.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Park</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Park</surname>
<given-names>S. Y.</given-names>
</name>
<name>
<surname>Chon</surname>
<given-names>Y. E.</given-names>
</name>
<name>
<surname>Seo</surname>
<given-names>Y. S.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>A machine learning model for predicting hepatocellular carcinoma risk in patients with chronic hepatitis b</article-title>. <source>Liver Int.</source> <volume>43</volume> (<issue>8</issue>), <fpage>1813</fpage>&#x2013;<lpage>1821</lpage>. <pub-id pub-id-type="doi">10.1111/liv.15597</pub-id>
<pub-id pub-id-type="pmid">37452503</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname>
<given-names>H. L.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>S. K.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>J. W.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Nam</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Sung</surname>
<given-names>P. S.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>Prediction of long-term hbsag seroclearance in patients with hbeag-negative chronic hepatitis b</article-title>. <source>JHEP Rep.</source> <volume>7</volume> (<issue>7</issue>), <fpage>101391</fpage>. <pub-id pub-id-type="doi">10.1016/j.jhepr.2025.101391</pub-id>
<pub-id pub-id-type="pmid">40524695</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Q.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>Perspective: global burden of iodine deficiency: insights and projections to 2050 using xgboost and shap</article-title>. <source>Adv. Nutr.</source> <volume>16</volume> (<issue>3</issue>), <fpage>100384</fpage>. <pub-id pub-id-type="doi">10.1016/j.advnut.2025.100384</pub-id>
<pub-id pub-id-type="pmid">39914495</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liaw</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Kao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Piratvisuth</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Chan</surname>
<given-names>H. L. Y.</given-names>
</name>
<name>
<surname>Chien</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>Asian-pacific consensus statement on the management of chronic hepatitis b: a 2012 update</article-title>. <source>Hepatol. Int.</source> <volume>6</volume> (<issue>3</issue>), <fpage>531</fpage>&#x2013;<lpage>561</lpage>. <pub-id pub-id-type="doi">10.1007/s12072-012-9365-4</pub-id>
<pub-id pub-id-type="pmid">26201469</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Mir-548c-3p targets trim22 to attenuate the peg&#x2013;ifn&#x2013;&#x3b1; therapeutic efficacy in hbeag-positive patients with chronic hepatitis b</article-title>. <source>Antivir. Res.</source> <volume>213</volume>, <fpage>105584</fpage>. <pub-id pub-id-type="doi">10.1016/j.antiviral.2023.105584</pub-id>
<pub-id pub-id-type="pmid">37019306</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lundberg</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>S. I.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>A unified approach to interpreting model predictions</article-title>. <source>Adv. Neural Information Processing Systems</source> <volume>30</volume>.</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luo</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Hou</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>D. K.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>
<italic>Cxcl13</italic> variant predicts pegylated&#x2010;interferon &#x3b1; treatment response in hbeag&#x2010;positive chronic hepatitis b patients</article-title>. <source>J. Med. Virol.</source> <volume>95</volume> (<issue>7</issue>), <fpage>e28963</fpage>. <pub-id pub-id-type="doi">10.1002/jmv.28963</pub-id>
<pub-id pub-id-type="pmid">37470204</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reijnders</surname>
<given-names>J. G. P.</given-names>
</name>
<name>
<surname>Rijckborst</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Sonneveld</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Scherbeijn</surname>
<given-names>S. M. J.</given-names>
</name>
<name>
<surname>Boucher</surname>
<given-names>C. A. B.</given-names>
</name>
<name>
<surname>Hansen</surname>
<given-names>B. E.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>Kinetics of hepatitis b surface antigen differ between treatment with peginterferon and entecavir</article-title>. <source>J. Hepatol.</source> <volume>54</volume> (<issue>3</issue>), <fpage>449</fpage>&#x2013;<lpage>454</lpage>. <pub-id pub-id-type="doi">10.1016/j.jhep.2010.07.046</pub-id>
<pub-id pub-id-type="pmid">21112655</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ren</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>A simple-to-use tool for predicting response to peginterferon in hbv dna suppressed chronic hepatitis b patients in China</article-title>. <source>Antivir. Res.</source> <volume>194</volume>, <fpage>105163</fpage>. <pub-id pub-id-type="doi">10.1016/j.antiviral.2021.105163</pub-id>
<pub-id pub-id-type="pmid">34389410</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rijckborst</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Hansen</surname>
<given-names>B. E.</given-names>
</name>
<name>
<surname>Cakaloglu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ferenci</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Tabak</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Akdogan</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2010</year>). <article-title>Early on&#x2010;treatment prediction of response to peginterferon alfa&#x2010;2a for hbeag&#x2010;negative chronic hepatitis b using hbsag and hbv dna levels</article-title>. <source>Hepatology</source> <volume>52</volume> (<issue>2</issue>), <fpage>454</fpage>&#x2013;<lpage>461</lpage>. <pub-id pub-id-type="doi">10.1002/hep.23722</pub-id>
<pub-id pub-id-type="pmid">20683945</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Seto</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wong</surname>
<given-names>D. K.</given-names>
</name>
<name>
<surname>Kopaniszen</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Proitsi</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Sham</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Hung</surname>
<given-names>I. F.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Hla-dp and il28b polymorphisms: influence of host genome on hepatitis b surface antigen seroclearance in chronic hepatitis b</article-title>. <source>Clin. Infect. Dis.</source> <volume>56</volume> (<issue>12</issue>), <fpage>1695</fpage>&#x2013;<lpage>1703</lpage>. <pub-id pub-id-type="doi">10.1093/cid/cit121</pub-id>
<pub-id pub-id-type="pmid">23449268</pub-id>
</mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shu</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Shap combined with machine learning to predict mortality risk in maintenance hemodialysis patients: a retrospective study</article-title>. <source>Front. Med.</source> <volume>12</volume>, <fpage>1615950</fpage>. <pub-id pub-id-type="doi">10.3389/fmed.2025.1615950</pub-id>
<pub-id pub-id-type="pmid">40692959</pub-id>
</mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sim</surname>
<given-names>J. Z. T.</given-names>
</name>
<name>
<surname>Fong</surname>
<given-names>Q. W.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>C. H.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Machine learning in medicine: what clinicians should know</article-title>. <source>Singap. Med. J.</source> <volume>64</volume> (<issue>2</issue>), <fpage>91</fpage>&#x2013;<lpage>97</lpage>. <pub-id pub-id-type="doi">10.11622/smedj.2021054</pub-id>
</mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Simon</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Friedman</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hastie</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Tibshirani</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Regularization paths for cox&#x27;s proportional hazards model <italic>via</italic> coordinate descent</article-title>. <source>J. Stat. Softw.</source> <volume>39</volume> (<issue>5</issue>), <fpage>1</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.18637/jss.v039.i05</pub-id>
<pub-id pub-id-type="pmid">27065756</pub-id>
</mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sonneveld</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Rijckborst</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Boucher</surname>
<given-names>C. A. B.</given-names>
</name>
<name>
<surname>Hansen</surname>
<given-names>B. E.</given-names>
</name>
<name>
<surname>Janssen</surname>
<given-names>H. L. A.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Prediction of sustained response to peginterferon alfa-2b for hepatitis b e antigen&#x2013;positive chronic hepatitis b using on-treatment hepatitis b surface antigen decline</article-title>. <source>Hepatology</source> <volume>52</volume> (<issue>4</issue>), <fpage>1251</fpage>&#x2013;<lpage>1257</lpage>. <pub-id pub-id-type="doi">10.1002/hep.23844</pub-id>
<pub-id pub-id-type="pmid">20830787</pub-id>
</mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tan</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Kong</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Shang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Geng</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>Predictive model for hbsag clearance rate in chronic hepatitis b patients treated with pegylated interferon &#x3b1;-2b for 48 weeks</article-title>. <source>Hepatol. Int.</source> <volume>19</volume> (<issue>2</issue>), <fpage>358</fpage>&#x2013;<lpage>367</lpage>. <pub-id pub-id-type="doi">10.1007/s12072-024-10764-5</pub-id>
<pub-id pub-id-type="pmid">39702655</pub-id>
</mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wen</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Tian</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Lei</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Clinical cure induced by pegylated interferon &#x3b1;-2b in the advantaged population of chronic hepatitis b virus infection: a retrospective cohort study</article-title>. <source>Front. Cell. Infect. Microbiol.</source> <volume>13</volume>, <fpage>1332232</fpage>. <pub-id pub-id-type="doi">10.3389/fcimb.2023.1332232</pub-id>
<pub-id pub-id-type="pmid">38292859</pub-id>
</mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xue</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Machine learning based immune evasion signature for predicting the prognosis and immunotherapy benefit in stomach adenocarcinoma</article-title>. <source>Front. Cell. Dev. Biol.</source> <volume>13</volume>, <fpage>1656367</fpage>. <pub-id pub-id-type="doi">10.3389/fcell.2025.1656367</pub-id>
<pub-id pub-id-type="pmid">41081048</pub-id>
</mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ye</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>A predictive model for functional cure in chronic hbv patients treated with pegylated interferon alpha: a comparative study of multiple algorithms based on clinical data</article-title>. <source>Virol. J.</source> <volume>21</volume> (<issue>1</issue>), <fpage>333</fpage>. <pub-id pub-id-type="doi">10.1186/s12985-024-02599-1</pub-id>
<pub-id pub-id-type="pmid">39710712</pub-id>
</mixed-citation>
</ref>
<ref id="B45">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yuen</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lim</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yoon</surname>
<given-names>K. T.</given-names>
</name>
<name>
<surname>Lim</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Heo</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Tangkijvanich</surname>
<given-names>P.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Vir-2218 (elebsiran) plus pegylated interferon-alfa-2a in participants with chronic hepatitis b virus infection: a phase 2 study</article-title>. <source>Lancet Gastroenterology and Hepatology</source> <volume>9</volume> (<issue>12</issue>), <fpage>1121</fpage>&#x2013;<lpage>1132</lpage>. <pub-id pub-id-type="doi">10.1016/S2468-1253(24)00237-1</pub-id>
<pub-id pub-id-type="pmid">39389081</pub-id>
</mixed-citation>
</ref>
</ref-list>
</back>
</article>