<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Immunol.</journal-id>
<journal-title>Frontiers in Immunology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Immunol.</abbrev-journal-title>
<issn pub-type="epub">1664-3224</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fimmu.2024.1407632</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Immunology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Interpretable machine learning for predicting the response duration to Sintilimab plus chemotherapy in patients with advanced gastric or gastroesophageal junction cancer</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Dan-qi</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2064096"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Xu</surname>
<given-names>Wen-huan</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1772097"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Cheng</surname>
<given-names>Xiao-wei</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Hua</surname>
<given-names>Lei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ge</surname>
<given-names>Xiao-song</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Liu</surname>
<given-names>Li</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Gao</surname>
<given-names>Xiang</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2602411"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Big Data Center, Affiliated Hospital of Jiangnan University</institution>, <addr-line>Wuxi</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Oncology, Affiliated Hospital of Jiangnan University</institution>, <addr-line>Wuxi</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Wei Wang, Jiangsu Institute of Parasitic Diseases (JIPD), China</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Martin Vogt, University of Bonn, Germany</p>
<p>Dan Liu, Helmholtz Association of German Research Centers (HZ), Germany</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Li Liu, <email xlink:href="mailto:9862016027@jiangnan.edu.cn">9862016027@jiangnan.edu.cn</email>; Xiang Gao, <email xlink:href="mailto:13606189128@139.com">13606189128@139.com</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>22</day>
<month>05</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>15</volume>
<elocation-id>1407632</elocation-id>
<history>
<date date-type="received">
<day>27</day>
<month>03</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>08</day>
<month>05</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Wang, Xu, Cheng, Hua, Ge, Liu and Gao</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Wang, Xu, Cheng, Hua, Ge, Liu and Gao</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec>
<title>Background</title>
<p>Sintilimab plus chemotherapy has proven effective as a combination immunotherapy for patients with advanced gastric and gastroesophageal junction adenocarcinoma (GC/GEJC). A multi-center study conducted in China revealed a median progression-free survival (PFS) of 7.1 months. However, the prediction of response duration to this immunotherapy has not been thoroughly investigated. Additionally, the potential of baseline laboratory features in predicting PFS remains largely unexplored. Therefore, we developed an interpretable machine learning (ML) framework, iPFS-SC, aimed at predicting PFS using baseline (pre-treatment) laboratory features and providing interpretations of the predictions.</p>
</sec>
<sec>
<title>Materials and methods</title>
<p>A cohort of 146 patients with advanced GC/GEJC, along with their baseline laboratory features, was included in the iPFS-SC framework. Through a forward feature selection process, predictive baseline features were identified, and four ML algorithms were developed to categorize PFS duration based on a threshold of 7.1 months. Furthermore, we employed explainable artificial intelligence (XAI) methodologies to elucidate the relationship between features and model predictions.</p>
</sec>
<sec>
<title>Results</title>
<p>The findings demonstrated that LightGBM achieved an accuracy of 0.70 in predicting PFS for advanced GC/GEJC patients. Furthermore, an F1-score of 0.77 was attained for identifying patients with PFS durations shorter than 7.1 months. Through the feature selection process, we identified 11 predictive features. Additionally, our framework facilitated the discovery of relationships between laboratory features and PFS.</p>
</sec>
<sec>
<title>Conclusion</title>
<p>A ML-based framework was developed to predict Sintilimab plus chemotherapy response duration with high accuracy. The suggested predictive features are easily accessible through routine laboratory tests. Furthermore, XAI techniques offer comprehensive explanations, both at the global and individual level, regarding PFS predictions. This framework enables patients to better understand their treatment plans, while clinicians can customize therapeutic approaches based on the explanations provided by the model.</p>
</sec>
</abstract>
<kwd-group>
<kwd>Sintilimab</kwd>
<kwd>immunotherapy</kwd>
<kwd>progression-free survival</kwd>
<kwd>machine learning</kwd>
<kwd>interpretability</kwd>
</kwd-group>
<counts>
<fig-count count="5"/>
<table-count count="5"/>
<equation-count count="0"/>
<ref-count count="31"/>
<page-count count="12"/>
<word-count count="4982"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Cancer Immunity and Immunotherapy</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Gastric and gastroesophageal junction adenocarcinoma (GC/GEJC) is the fifth most common cancer worldwide and approximately 44% of patients are diagnosed in China (<xref ref-type="bibr" rid="B1">1</xref>). The median overall survival is approximately one year for advanced GC/GEJC. Recently, immune checkpoint blockade therapy targeting programmed death 1 ligand (PD-L1) has shown efficacy in HER2-negative GC/GEJC (<xref ref-type="bibr" rid="B1">1</xref>).</p>
<p>Sintilimab is a recombinant, fully human IgG4 anti-PD-1 monoclonal antibody, which is the earliest approved anti-PD-1 monoclonal antibody for gastric cancer in China (<xref ref-type="bibr" rid="B2">2</xref>). The multicenter ORIENT-16 randomized clinical trial conducted across 62 hospitals in China demonstrated that the addition of Sintilimab to chemotherapy significantly enhanced overall patient-specific outcomes, including overall survival (OS) and median progression-free survival (PFS), in all 650 previously untreated patients with advanced GC/GEJC (<xref ref-type="bibr" rid="B3">3</xref>). Specifically, the study found that patients treated with Sintilimab, who had a combined positive score (CPS) of 5 or more, exhibited a median PFS of 7.1 months compared to those receiving placebo and chemotherapy (<xref ref-type="bibr" rid="B3">3</xref>). However, it&#x2019;s noteworthy that CPS testing can only be conducted in hospitals primarily located in tertiary settings, and relevant tests are still unavailable in rural hospitals. Additionally, the current detection platforms for CPS show inconsistencies, leading to significant deviations across different platforms.</p>
<p>As an alternative to CPS, PFS and OS have been utilized to predict survival outcomes in gastric cancer (GC) through methods like Kaplan-Meier analysis and Cox proportional hazards models (<xref ref-type="bibr" rid="B4">4</xref>&#x2013;<xref ref-type="bibr" rid="B8">8</xref>). Among these studies, Ozveren et&#xa0;al. (<xref ref-type="bibr" rid="B6">6</xref>) identified an association between the inflammatory prognostic index (IPS) score (derived from C-reactive protein (CRP), neutrophil-to-lymphocyte ratio (NLR), and serum albumin) and the risk of disease progression, highlighting the potential utility of clinical laboratory tests in predicting tumor response.</p>
<p>Machine learning (ML) methods offer a valuable approach to analyzing intricate datasets and revealing underlying relationships between predictors and outcomes. By employing ML techniques, it becomes feasible to capture non-linear associations among features, thereby enhancing prediction accuracy. Previous research has demonstrated the efficacy of various ML algorithms, including Logistic Regression (LR), Na&#xef;ve Bayes (NB), Support Vector Machine (SVM), k-Nearest Neighbors (KNN), Decision Tree (DT), Random Forest (RF), eXtreme Gradient Boosting (XGBoost), and convolutional neural networks (CNN), in effectively predicting outcomes in GC (<xref ref-type="bibr" rid="B9">9</xref>&#x2013;<xref ref-type="bibr" rid="B16">16</xref>). To date, there has been no systematic application of ML algorithms to predict patient survival following treatment with Sintilimab plus chemotherapy, and it remains uncertain whether baseline laboratory features are associated with short-term survival outcomes, such as short PFS.</p>
<p>To tackle these challenges, we utilized four ML algorithms within a local cohort comprising 146 patients diagnosed with advanced GC/GEJC. Our aim was to develop a novel framework for predicting treatment response, termed iPFS-SC (Interpretable machine learning models for predicting Progression-Free Survival in patients undergoing Sintilimab plus Chemotherapy). Within this framework, we evaluated the potential of baseline laboratory tests to forecast the response to Sintilimab plus chemotherapy. Considering the median PFS of 7.1 months observed with Sintilimab plus chemotherapy (<xref ref-type="bibr" rid="B3">3</xref>), we utilized the threshold of 7.1 months to categorize treatment outcomes. To streamline the feature set and minimize redundancy, we employed a forward feature selection method, which identified a subset of 11 baseline laboratory features relevant to our task of classifying PFS duration. These features included mean corpuscular hemoglobin (MCH) in whole blood, urinary osmolality (SG-STY), and serum creatinine (CREA), etc. In particular, the LightGBM model, in conjunction with the chosen features, emerged as the best-performing model, attaining an accuracy and weighted F1-score of 0.70 and 0.71, correspondingly, in predicting the PFS of patients with advanced GC/GEJC.</p>
<p>As ML is often perceived as a black box within the healthcare system, posing challenges to its reliability due to the need for clinicians to explain specific predictions to patients (<xref ref-type="bibr" rid="B17">17</xref>, <xref ref-type="bibr" rid="B18">18</xref>). We incorporated cutting-edge explainable artificial intelligence (XAI) methods (<xref ref-type="bibr" rid="B19">19</xref>&#x2013;<xref ref-type="bibr" rid="B22">22</xref>), such as SHapley Additive exPlanations (SHAP) and Diverse counterfactual explanations (DiCE), into the LightGBM model. These approaches enabled us to generate both global and individualized explanations for predictive laboratory features. The results demonstrated that LightGBM effectively captured both linear and nonlinear relationships between features and outputs, identified important thresholds of features, and established simple constraints on features for generating counterfactuals. Therefore, the proposed iPFS-SC framework not only offered high accuracy for predicting PFS but also provided explainable analysis of laboratory features. This capability is particularly valuable for individualized PFS prediction in patients with advanced GC/GEJC.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Methods and materials</title>
<sec id="s2_1">
<label>2.1</label>
<title>General setup of iPFS-SC framework</title>
<p>
<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref> provides an overview of the iPFS-SC framework. Within iPFS-SC, baseline features encompassing demographic and laboratory test data from 146 patients with advanced GC/GEJC were gathered alongside their corresponding PFS durations. The aim of iPFS-SC was to utilize interpretable ML methodologies to predict patients&#x2019; PFS, with a specified threshold at 7.1 months. As part of our study, we examined 113 laboratory features, allowing for a maximum missingness threshold of 25%. Among these, 11 features were identified as predictive variables. We then employed four ML algorithms (LR, SVM, RF, and LightGBM), with hyperparameters determined via 5-fold cross-validation within the training set. Evaluation metrics including accuracy, area under the receiver operating curve (AUC), sensitivity, precision, and F1-score were calculated for each algorithm. Furthermore, model interpretation was generated through the utilization of SHAP and DiCE algorithms within the best-performing algorithm.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>The overview of iPFS-SC framework.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimmu-15-1407632-g001.tif"/>
</fig>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Patient enrollment</title>
<p>The study included patients diagnosed with advanced GC/GEJC, who underwent treatment with Sintilimab plus chemotherapy at the Affiliated Hospital of Jiangnan University from January 16, 2020, to January 26, 2024. Clinical response to Sintilimab plus chemotherapy was assessed using the Response Evaluation Criteria in Solid Tumors (RECIST) criteria. Computed tomography (CT) scans and magnetic resonance imaging (MRI) were conducted at the initial visit and during the 2-year follow-up to evaluate tumor status.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Data preparation</title>
<sec id="s2_3_1">
<label>2.3.1</label>
<title>Population characteristics</title>
<p>The demographic data (age, sex, and smoking status) and laboratory test (whole blood, plasma, serum, urine, gastric fluid, fecal, sputum, etc.) were initially assessed at Patients&#x2019; baseline visits. Features with missing values exceeding 25% were excluded, resulting in 113 features across five categories: demographics (n=3), whole blood (n=24), plasma (n=49), serum (n=10), and urine (n=27). The existing missing values were imputed by Missforest (Python missingpy library (version 0.2.0)), a non-parametric random forest imputation algorithm that can cope with numerical and categorical variables simultaneously (<xref ref-type="bibr" rid="B23">23</xref>).</p>
</sec>
<sec id="s2_3_2">
<label>2.3.2</label>
<title>PFS outcomes</title>
<p>Derived from the findings of Xu et&#xa0;al. (<xref ref-type="bibr" rid="B3">3</xref>), the median PFS was 7.1 months for patients in Sintilimab plus chemotherapy treatment. Employing this PFS threshold of 7.1 months, we created outcome labels for the iPFS-SC framework. Patients who experienced a PFS duration of 7.1 months or longer were categorized as one group (n=53), while those with a PFS shorter than 7.1 months (n=93) were classified into another group.</p>
</sec>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Feature selection</title>
<p>The dataset was split into training and test sets (7:3) in a stratified manner. We utilized StandardScaler from the Python Scikit-learn library (version 1.1.2) to normalize the data. The most predictive laboratory features were then identified through a three-step selection process within the training set. Firstly, feature importance was assessed by calculating the mean decrease in impurity (MDI) using a na&#xef;ve RF classifier. The MDI for a feature is determined by calculating the average reduction in impurity resulting from splitting on that feature across all nodes in an RF classifier (<xref ref-type="bibr" rid="B24">24</xref>). We then ranked the features in descending order based on their MDI values, with the one exhibiting the highest ranking deemed the most crucial for predicting PFS. Secondly, to address multicollinearity, pairwise Pearson correlation coefficients were computed between these features, with coefficients exceeding 0.70 subjected to further scrutiny. For the elimination process, the feature with comparatively lower MDI feature importance among the correlated features was then excluded. Lastly, the final feature corpus was constructed by iteratively incorporating the remaining features from the previous step into an RF classifier, with the cumulative AUC being evaluated at each iteration. The iterative process concluded when no further improvement in the cumulative AUC was observed.</p>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>iPFS-SC modeling and evaluation</title>
<p>Four ML algorithms were employed, including LR, SVM, RF, and LightGBM. To determine the optimal hyperparameters for each algorithm, we utilized 5-fold cross-validation within the training set via Python Optuna framework (version 3.5.0). During cross-validation, four folds were utilized for tuning hyperparameters, while one fold was reserved to assess model performance. Throughout the training process, the Tree-structured Parzen Estimator (TPE) (<xref ref-type="bibr" rid="B25">25</xref>) was designated as a sampler with the number of trials configured to 50. We aimed to maximize and evaluate the AUC. Subsequently, we retrained the model using the suggested hyperparameters and assessed the performance of each algorithm on unseen test data. The hyperparameter ranges for each algorithm are summarized in <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;1</bold>
</xref>. The best-performing model was integrated into the iPFS-SC framework.</p>
</sec>
<sec id="s2_6">
<label>2.6</label>
<title>Statistical analysis</title>
<p>The distribution of each feature was evaluated against the Kolmogorov-Smirnov test. To compare between the two groups (PFS&lt; 7.1 months vs. PFS &#x2265; 7.1 months), Welch&#x2019;s 2-sample t-test was employed, assuming normal distribution of continuous data. The significance level for both statistical tests was set at 0.05. Model classification performance was assessed using metrics including AUC, accuracy, recall, precision, and F1-score (the harmonic mean of recall and precision). All statistical analyses were conducted using the Python scikit-learn library (version 1.1.2) and scipy library (version 1.9.1).</p>
</sec>
<sec id="s2_7">
<label>2.7</label>
<title>Interpretation of iPFS-SC</title>
<p>Model explanations were generated by analyzing feature attribution to predict PFS within the test set. To achieve this, we utilized two methodologies. Firstly, the SHAP algorithm (<xref ref-type="bibr" rid="B21">21</xref>) was employed to uncover global feature attributions, with higher magnitudes indicating greater contributions to shorter PFS. Furthermore, the relationship between chosen features and model output was investigated through partial dependence plots. Additionally, the SHAP force plot was used to illustrate the influence of each variable on the final SHAP value, offering localized, sample-specific explanations. These SHAP analyses provided a thorough understanding of the reasoning behind iPFS-SC&#x2019;s particular PFS predictions and supplied feature importance for individual samples.</p>
<p>Secondly, counterfactual explanation was conducted to produce interpretable changes in features aimed at achieving the desired model prediction, transitioning from PFS&lt; 7.1 months to PFS &#x2265; 7.1 months. This methodology involved exploring &#x201c;what-if&#x201d; scenarios, where the goal was to determine the outcome if the selected feature was altered. Specifically, given the model&#x2019;s outcome y<sub>i</sub> = 0 (indicating PFS shorter than 7.1 months) for the input feature space x<sub>i</sub>:{x<sub>0</sub>, x<sub>1</sub>, &#x2026;, x<sub>10</sub>}, we aimed to ascertain the outcome when the selected feature is changed to x<sub>i</sub>&#x2019;:{x<sub>0</sub>&#x2019;, x<sub>1</sub>&#x2019;, &#x2026;,x<sub>10</sub>&#x2019;}. Throughout our study, the modified inputs (cases from the test set) were fed into the trained ML model. Additionally, the DiCE algorithm (<xref ref-type="bibr" rid="B22">22</xref>) was employed to generate a set of counterfactual explanations. These implementations and visualizations were conducted using the Python shap library (version 0.44.1) and dice-ml library (version 0.11).</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>The baseline characteristics of the study population</title>
<p>Our study involved 146 patients who received Sintilimab plus chemotherapy. The median [IQR] age of the cohort was 68 [60, 73], with 105 [71.9%] male and 41 [28.1%] female participants. Among them, 93 patients had a PFS shorter than 7.1 months, with a median age of 67 [60, 73], comprising 67 [72.0%] male and 26 [28.0%] female individuals. Conversely, the long PFS group consisted of 53 patients, with a median age of 69 [60, 73], and comprised 38 [71.7%] male and 15 [28.3%] female participants. Statistical descriptions of the features utilized in iPFS-SC are presented in <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>, with numerical features displayed as median [IQR]. The features listed were found to be normally distributed (Kolmogorov-Smirnov test, P-value&lt; 0.0001).</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>The statistical characteristics of laboratory features assessed as important for PFS prediction.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Feature</th>
<th valign="top" align="left">Full name</th>
<th valign="top" align="left">Clinical specimen</th>
<th valign="top" align="left">Missing<break/>Percentage<break/>(%)</th>
<th valign="top" align="left">Total<break/>(median [IQR])</th>
<th valign="top" align="left">PFS&lt; 7.1<break/>months<break/>(n=93)</th>
<th valign="top" align="left">PFS &#x2265; 7.1 months<break/>(n=53)</th>
<th valign="top" align="left">P-value<break/>(Welch&#x2019;s 2-sample t-test)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">MCH</td>
<td valign="top" align="left">Mean corpuscular hemoglobin<break/>(pg)</td>
<td valign="top" rowspan="2" align="left">Whole blood</td>
<td valign="top" align="left">0.68</td>
<td valign="top" align="left">29.8<break/>[26.7, 31.6]</td>
<td valign="top" align="left">29.3<break/>[25.78, 31.05]</td>
<td valign="top" align="left">30.9<break/>[28.8, 32.2]</td>
<td valign="top" align="left">0.001</td>
</tr>
<tr>
<td valign="top" align="left">LYMPH</td>
<td valign="top" align="left">Absolute lymphocyte count<break/>(10<sup>9</sup>/L)</td>
<td valign="top" align="left">0.68</td>
<td valign="top" align="left">1.2<break/>[1.0, 1.5]</td>
<td valign="top" align="left">1.2<break/>[0.98, 1.5]</td>
<td valign="top" align="left">1.3<break/>[1.1, 1.7]</td>
<td valign="top" align="left">0.031</td>
</tr>
<tr>
<td valign="top" align="left">SG-STY</td>
<td valign="top" align="left">Urinary osmolality<break/>(mOsm/kg.H2)</td>
<td valign="top" align="left">Urine</td>
<td valign="top" align="left">18.49</td>
<td valign="top" align="left">550.0<break/>[416.5, 664.0]</td>
<td valign="top" align="left">514.5<break/>[389.5, 615.0]</td>
<td valign="top" align="left">620<break/>[522, 746]</td>
<td valign="top" align="left">0.002</td>
</tr>
<tr>
<td valign="top" align="left">Fe</td>
<td valign="top" align="left">Fe<break/>(<inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:mtext>&#x3bc;mol</mml:mtext>
<mml:mo stretchy="false">/</mml:mo>
<mml:mtext>L</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>)</td>
<td valign="top" rowspan="6" align="left">Serum</td>
<td valign="top" align="left">4.79</td>
<td valign="top" align="left">8.2<break/>[5.2, 13.18]</td>
<td valign="top" align="left">7.85<break/>[4.74, 12.74]</td>
<td valign="top" align="left">10.0<break/>[6.37, 15.58]</td>
<td valign="top" align="left">0.012</td>
</tr>
<tr>
<td valign="top" align="left">eGFR</td>
<td valign="top" align="left">Glomerular filtration rate assessment</td>
<td valign="top" align="left">2.74</td>
<td valign="top" align="left">109.97<break/>[87.57, 129.04]</td>
<td valign="top" align="left">101.21<break/>[87.57, 124.66]</td>
<td valign="top" align="left">114.14<break/>[90.45, 143.47]</td>
<td valign="top" align="left">0.021</td>
</tr>
<tr>
<td valign="top" align="left">CA125</td>
<td valign="top" align="left">Carbohydrate antigen 125 (U/mL)</td>
<td valign="top" align="left">0.68</td>
<td valign="top" align="left">177.1<break/>[134.6, 215.1]</td>
<td valign="top" align="left">176<break/>[141.88, 211.53]</td>
<td valign="top" align="left">184.4<break/>[127.7, 222.5]</td>
<td valign="top" align="left">0.013</td>
</tr>
<tr>
<td valign="top" align="left">A/G</td>
<td valign="top" align="left">A/G</td>
<td valign="top" align="left">0.68</td>
<td valign="top" align="left">1.36<break/>[1.19, 1.58]</td>
<td valign="top" align="left">1.36<break/>[1.22, 1.60]</td>
<td valign="top" align="left">1.38<break/>[1.13, 1.56]</td>
<td valign="top" align="left">0.016</td>
</tr>
<tr>
<td valign="top" align="left">NGAL</td>
<td valign="top" align="left">Neutrophil gelatinase lipoprotein<break/>(ng/mL)</td>
<td valign="top" align="left">6.85</td>
<td valign="top" align="left">128.75<break/>[88.83, 201.55]</td>
<td valign="top" align="left">134.55<break/>[94.7, 236.78]</td>
<td valign="top" align="left">106.4<break/>[80.88, 154.53]</td>
<td valign="top" align="left">0.039</td>
</tr>
<tr>
<td valign="top" align="left">CREA</td>
<td valign="top" align="left">Creatinine<break/>(<inline-formula>
<mml:math display="inline" id="im2">
<mml:mrow>
<mml:mtext>&#x3bc;mol</mml:mtext>
<mml:mo stretchy="false">/</mml:mo>
<mml:mtext>L</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>)</td>
<td valign="top" align="left">0.68</td>
<td valign="top" align="left">69.1<break/>[56.5, 81.6]</td>
<td valign="top" align="left">72.15<break/>[58.48, 81.38]</td>
<td valign="top" align="left">63.5<break/>[53.9, 82.0]</td>
<td valign="top" align="left">0.012</td>
</tr>
<tr>
<td valign="top" align="left">TT</td>
<td valign="top" align="left">Thrombin time<break/>(s)</td>
<td valign="top" rowspan="2" align="left">Plasma</td>
<td valign="top" align="left">5.48</td>
<td valign="top" align="left">15.95<break/>[15.2, 17.2]</td>
<td valign="top" align="left">15.6<break/>[15.0, 16.8]</td>
<td valign="top" align="left">16.6<break/>[15.8, 17.6]</td>
<td valign="top" align="left">0.022</td>
</tr>
<tr>
<td valign="top" align="left">FDP</td>
<td valign="top" align="left">Fibrinogen degradation products<break/>(<inline-formula>
<mml:math display="inline" id="im3">
<mml:mrow>
<mml:mtext>&#x3bc;g</mml:mtext>
<mml:mo stretchy="false">/</mml:mo>
<mml:mtext>mL</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>)</td>
<td valign="top" align="left">5.48</td>
<td valign="top" align="left">4.25<break/>[2.7, 8.38]</td>
<td valign="top" align="left">4.3<break/>[2.7, 6.9]</td>
<td valign="top" align="left">4.2<break/>[2.1, 10.6]</td>
<td valign="top" align="left">0.045</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Identification of predictive laboratory features</title>
<p>To mitigate overfitting and minimize bias in the predictive model, we adopted a forward feature selection approach to determine the optimal number of features for iPFS-SC modeling. Initially, we identified the top 50 features based on their MDI values, ranked in descending order of importance. Subsequently, after addressing multicollinearity through Pearson correlation analysis, a total of 39 features remained for further evaluation. These features were then sequentially integrated into the RF classifier, and the cumulative AUC was computed. <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure&#xa0;1</bold>
</xref> displays the features with Pearson correlation coefficients exceeding 0.70 and their respective MDI feature importance scores. The removed features could be found in <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;2</bold>
</xref>. <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref> illustrates the top-ranked 20 features alongside their respective feature importance (left <italic>y</italic>-axis) and cumulative AUC (right <italic>y</italic>-axis). By sequentially incorporating 11 laboratory features, such as urinary SG-STY, whole blood MCH and whole blood LYMPH, an AUC of 0.84 was attained. Further addition of features did not yield any improvement, as indicated by the dashed line. Consequently, 11 features derived from four clinical categories (whole blood, plasma, serum, and urine) were utilized for modeling and predicting PFS.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Feature selection within the iPFS-SC framework. The <italic>x</italic>-axis displays the selected features, arranged in descending order of importance after addressing multicollinearity. The feature importance from RF is shown on the left <italic>y</italic>-axis, while the cumulative AUC is presented on the right <italic>y</italic>-axis. The red lines indicate the final 11 features chosen for modeling.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimmu-15-1407632-g002.tif"/>
</fig>
<p>Furthermore, the feature selection process was conducted in nine additional training/test splits. The percentages depicting the occurrence of predictive features among these splits are presented in <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure&#xa0;2</bold>
</xref>. It is evident that, all 11 features consistently demonstrated high occurrences across various splits, highlighting the robustness of the feature selection procedure.</p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>LightGBM outperformed other ML algorithms for PFS prediction</title>
<p>The model evaluation was conducted on assessing performance on the unseen test set. <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref> summarizes the evaluation metrics of various models, while <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref> presents the optimal hyperparameters for each algorithm. Overall, LightGBM outperformed LR, SVM, and RF, with a weighted F1-score and an accuracy of 0.71 and 0.70, respectively. Furthermore, it achieved an F1-score of 0.77 in identifying patients with short PFS. Ultimately, the LightGBM model was chosen and incorporated into the iPFS-SC framework to forecast the PFS duration of advanced GC/GEJC patients.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>The evaluation metrics of four ML algorithms on unseen test data.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Model</th>
<th valign="top" align="left">Accuracy</th>
<th valign="top" align="left">AUC</th>
<th valign="top" align="left">Category</th>
<th valign="top" align="left">Recall</th>
<th valign="top" align="left">Precision</th>
<th valign="top" align="left">F1-score</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" rowspan="3" align="left">LR</td>
<td valign="top" rowspan="3" align="left">0.68</td>
<td valign="top" rowspan="3" align="left">0.65</td>
<td valign="top" align="left">PFS&lt; 7.1 months</td>
<td valign="top" align="left">0.71</td>
<td valign="top" align="left">0.86</td>
<td valign="top" align="left">0.77</td>
</tr>
<tr>
<td valign="top" align="left">PFS &#x2265; 7.1 months</td>
<td valign="top" align="left">0.6</td>
<td valign="top" align="left">0.38</td>
<td valign="top" align="left">0.46</td>
</tr>
<tr>
<td valign="top" align="left">Weighted avg.</td>
<td valign="top" align="left">0.68</td>
<td valign="top" align="left">0.75</td>
<td valign="top" align="left">0.70</td>
</tr>
<tr>
<td valign="top" rowspan="3" align="left">SVM</td>
<td valign="top" rowspan="3" align="left">0.64</td>
<td valign="top" rowspan="3" align="left">0.60</td>
<td valign="top" align="left">PFS&lt; 7.1 months</td>
<td valign="top" align="left">0.7</td>
<td valign="top" align="left">0.75</td>
<td valign="top" align="left">0.72</td>
</tr>
<tr>
<td valign="top" align="left">PFS &#x2265; 7.1 months</td>
<td valign="top" align="left">0.5</td>
<td valign="top" align="left">0.44</td>
<td valign="top" align="left">0.47</td>
</tr>
<tr>
<td valign="top" align="left">Weighted avg.</td>
<td valign="top" align="left">0.64</td>
<td valign="top" align="left">0.65</td>
<td valign="top" align="left">0.64</td>
</tr>
<tr>
<td valign="top" rowspan="3" align="left">RF</td>
<td valign="top" rowspan="3" align="left">0.64</td>
<td valign="top" rowspan="3" align="left">0.60</td>
<td valign="top" align="left">PFS&lt; 7.1 months</td>
<td valign="top" align="left">0.68</td>
<td valign="top" align="left">0.82</td>
<td valign="top" align="left">0.74</td>
</tr>
<tr>
<td valign="top" align="left">PFS &#x2265; 7.1 months</td>
<td valign="top" align="left">0.5</td>
<td valign="top" align="left">0.31</td>
<td valign="top" align="left">0.38</td>
</tr>
<tr>
<td valign="top" align="left">Weighted avg.</td>
<td valign="top" align="left">0.64</td>
<td valign="top" align="left">0.71</td>
<td valign="top" align="left">0.66</td>
</tr>
<tr>
<td valign="top" rowspan="3" align="left">LightGBM</td>
<td valign="top" rowspan="3" align="left">0.70</td>
<td valign="top" rowspan="3" align="left">0.68</td>
<td valign="top" align="left">PFS&lt; 7.1 months</td>
<td valign="top" align="left">0.76</td>
<td valign="top" align="left">0.79</td>
<td valign="top" align="left">0.77</td>
</tr>
<tr>
<td valign="top" align="left">PFS &#x2265; 7.1 months</td>
<td valign="top" align="left">0.6</td>
<td valign="top" align="left">0.56</td>
<td valign="top" align="left">0.58</td>
</tr>
<tr>
<td valign="top" align="left">Weighted avg.</td>
<td valign="top" align="left">0.70</td>
<td valign="top" align="left">0.71</td>
<td valign="top" align="left">0.71</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>The optimal hyperparameters of four ML algorithms.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Model</th>
<th valign="top" align="left">Hyperparameters</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">LR</td>
<td valign="top" align="left">C (0.14034567027876954), solver (&#x201c;lbfgs&#x201d;)</td>
</tr>
<tr>
<td valign="top" align="left">SVM</td>
<td valign="top" align="left">C (100), kernel (&#x201c;rbf&#x201d;)</td>
</tr>
<tr>
<td valign="top" align="left">RF</td>
<td valign="top" align="left">max_depth (16), max_features (&#x201c;sqrt&#x201d;), min_samples_leaf (5),<break/>n_estimators (10)</td>
</tr>
<tr>
<td valign="top" align="left">LightGBM</td>
<td valign="top" align="left">num_leaves (10), learning_rate (0.3), max_depth (8), min_child_samples (5), n_estimators (50)</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>iPFS-SC interpretation</title>
<sec id="s3_4_1">
<label>3.4.1</label>
<title>SHAP global and individualized PFS duration explanation</title>
<p>XAI methodologies, such as SHAP, offer model interpretability by combining feature importance visualization with model predictions. The mean absolute SHAP values depict the overall impact of each feature on the output of LightGBM within the test set. As depicted in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3A</bold>
</xref>, the top-ranking features in terms of their relative importance for predicting PFS shorter than 7.1 months were urinary SG-STY, whole blood MCH, and serum CREA, respectively. Unlike the bar plot, <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3B</bold>
</xref> presents a SHAP summary plot aimed at providing a deeper understanding of the actual relationships between features and the PFS following Sintilimab plus chemotherapy. The horizontal magnitude and direction indicate the predictive strength of each feature. A positive SHAP value toward the right indicates a tendency toward short PFS (&lt; 7.1 months), while a negative value toward the left suggests long PFS (&#x2265; 7.1 months). Furthermore, the gradient coloring of each feature&#x2019;s quantitative range, from red (high value) to blue (low value), offers insights into how the model&#x2019;s prediction changes with variations in feature values. These results highlighted that urinary SG-STY, whole blood MCH, serum A/G, serum eGFR, and serum CREA exerted the greatest magnitude effects on the model&#x2019;s output, indicating their critical role as predictors of combination immunotherapy response.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Interpretation of the LightGBM classifier using SHAP for predicting PFS. The bar plot of the mean absolute SHAP value of each feature <bold>(A)</bold>, the summary plot of the distribution of SHAP values and the impact of each feature on the model&#x2019;s prediction <bold>(B)</bold>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimmu-15-1407632-g003.tif"/>
</fig>
<p>To delve deeper into the importance of the top-ranking features for predicting PFS, we further created partial dependence plots. These plots illustrated the relationship between each feature and its effect on PFS, highlighting crucial thresholds for predicting outcomes (<xref ref-type="fig" rid="f4">
<bold>Figures&#xa0;4A-F</bold>
</xref>). The SHAP value on the y-axis indicated the direction of the effect on PFS (negative: PFS &#x2265; 7.1 months; positive: PFS&lt; 7.1 months) when one feature changed within a certain range (x-axis). <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref> suggests that LightGBM effectively captured both linear and complex relationships between the selected features and the model output. In our efforts to identify the triggering features associated with long PFS, we observed that patients with elevated levels of urinary SG-STY (&gt; 600 mOsm/kg.H2) or whole blood MCH (&gt; 30.5 pg) exhibited negative SHAP values (<xref ref-type="fig" rid="f4">
<bold>Figures&#xa0;4A, B</bold>
</xref>). More dependence plots of predictive features can be found in <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figures&#xa0;3A-E</bold>
</xref>.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>The SHAP dependence plots detailing feature values and contributions to predicting PFS in LightGBM. Urinary SG-STY <bold>(A)</bold>, whole blood MCH <bold>(B)</bold>, serum CREA <bold>(C)</bold>, serum A/G <bold>(D)</bold>, serum eGFR <bold>(E)</bold>, and serum Fe <bold>(F)</bold>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimmu-15-1407632-g004.tif"/>
</fig>
<p>
<xref ref-type="fig" rid="f5">
<bold>Figures&#xa0;5A, B</bold>
</xref> present individualized explanations for a patient with a PFS duration longer and shorter than 7.1 months, respectively. As shown in <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5A</bold>
</xref>, iPFS-SC predicted a final SHAP value of -0.02 for the individual. Feature values of serum A/G (1.04) and serum CREA (98.7 <inline-formula>
<mml:math display="inline" id="im4">
<mml:mrow>
<mml:mtext>&#x3bc;mol</mml:mtext>
<mml:mo stretchy="false">/</mml:mo>
<mml:mtext>L</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>), indicating a longer PFS, contrast with those of whole blood MCH (27.9 pg), serum Fe (3.53 <inline-formula>
<mml:math display="inline" id="im5">
<mml:mrow>
<mml:mtext>&#x3bc;mol</mml:mtext>
<mml:mo stretchy="false">/</mml:mo>
<mml:mtext>L</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>), and whole blood LYMPH (1.2 <inline-formula>
<mml:math display="inline" id="im6">
<mml:mo>&#xd7;</mml:mo>
</mml:math>
</inline-formula> 10<sup>9</sup>/L), which suggest a shorter PFS. In contrast to the scenario depicted in <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5A</bold>
</xref>, the individual in <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5B</bold>
</xref> presented a SHAP value of 0.52. The figure demonstrates that the values of serum CREA (77.4 <inline-formula>
<mml:math display="inline" id="im7">
<mml:mrow>
<mml:mtext>&#x3bc;mol</mml:mtext>
<mml:mo stretchy="false">/</mml:mo>
<mml:mtext>L</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>), plasma TT (14.9 s), serum eGFR (112.47), and urinary SG-STY (487.0 mOsm/kg.H2) had the most significant impact on the final outcome of short PFS.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Individualized explanation of patients with PFS longer <bold>(A)</bold> and shorter than 7.1 months <bold>(B)</bold>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimmu-15-1407632-g005.tif"/>
</fig>
</sec>
<sec id="s3_4_2">
<label>3.4.2</label>
<title>Counterfactual explanations via DiCE</title>
<p>Given the explanation evidence of SHAP feature attributions to the PFS prediction, we proceeded to examine whether making minor and interpretable adjustments to a given feature could yield a contradictory outcome. To do so, we randomly chose a patient from the test set with a PFS shorter than 7.1 months and utilized the DiCE algorithm to generate counterfactual cases for LightGBM. This process aimed to identify the optimal values of predictive features that would lead to the opposite outcome (PFS &#x2265; 7.1 months).</p>
<p>
<xref ref-type="table" rid="T4">
<bold>Table&#xa0;4</bold>
</xref> displays the laboratory reference intervals of the selected features and one counterfactual case from test set. For the queried patient, assuming all other features remained unchanged, if the patient&#x2019;s urinary SG-STY was 771.49 mOsm/kg.H2 and serum A/G was 2.04, it is anticipated that the patient would have a PFS of 7.1 months or even longer. A similar counterfactual outcome was observed when the values of whole blood MCH was 33.79 pg and serum CREA shifted to 71.85 <inline-formula>
<mml:math display="inline" id="im11">
<mml:mrow>
<mml:mtext>&#x3bc;mol</mml:mtext>
<mml:mo stretchy="false">/</mml:mo>
<mml:mtext>L</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>. Furthermore, a decline in plasma TT (14.26 s) and a significant rise in serum Fe (35.17 <inline-formula>
<mml:math display="inline" id="im12">
<mml:mrow>
<mml:mtext>&#xa0;&#x3bc;mol</mml:mtext>
<mml:mo stretchy="false">/</mml:mo>
<mml:mtext>L</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>) also suggested a long PFS. More counterfactual cases were generated and displayed in <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;3</bold>
</xref>.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>DiCE counterfactual explanations of one given query case from the test set.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">MCH</th>
<th valign="top" align="left">SG-STY</th>
<th valign="top" align="left">LYMPH</th>
<th valign="top" align="left">TT</th>
<th valign="top" align="left">Fe</th>
<th valign="top" align="left">NGAL</th>
<th valign="top" align="left">eGFR</th>
<th valign="top" align="left">FDP</th>
<th valign="top" align="left">CA125</th>
<th valign="top" align="left">CREA</th>
<th valign="top" align="left">A/G</th>
</tr>
</thead>
<tbody>
<tr>
<th valign="top" colspan="11" align="left">Reference interval</th>
</tr>
<tr>
<td valign="top" align="left">27-34<break/>(pg)</td>
<td valign="top" align="left">600-1000<break/>(mOsm/kg.H2)</td>
<td valign="top" align="left">1.1-3.2<break/>(10<sup>9</sup>/L)</td>
<td valign="top" align="left">14-21<break/>(s)</td>
<td valign="top" align="left">10.6-36.7<break/>(<inline-formula>
<mml:math display="inline" id="im8">
<mml:mtext>&#x3bc;</mml:mtext>
</mml:math>
</inline-formula> mol/L)</td>
<td valign="top" align="left">37-180<break/>(ng/mL)</td>
<td valign="top" align="left">&#x2014;</td>
<td valign="top" align="left">0-5<break/>(<inline-formula>
<mml:math display="inline" id="im9">
<mml:mrow>
<mml:mtext>&#x3bc;g</mml:mtext>
<mml:mo stretchy="false">/</mml:mo>
<mml:mtext>mL</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>)</td>
<td valign="top" align="left">0-35<break/>(U/mL)</td>
<td valign="top" align="left">57-111<break/>(<inline-formula>
<mml:math display="inline" id="im10">
<mml:mtext>&#x3bc;</mml:mtext>
</mml:math>
</inline-formula> mol/L)</td>
<td valign="top" align="left">1.2-2.4</td>
</tr>
<tr>
<th valign="top" colspan="11" align="left">Original feature set (PFS&lt; 7.1 months)</th>
</tr>
<tr>
<td valign="top" align="left">32.3</td>
<td valign="top" align="left">487.0</td>
<td valign="top" align="left">1.4</td>
<td valign="top" align="left">14.9</td>
<td valign="top" align="left">8.18</td>
<td valign="top" align="left">179.4</td>
<td valign="top" align="left">112.47</td>
<td valign="top" align="left">10.6</td>
<td valign="top" align="left">41.28</td>
<td valign="top" align="left">77.4</td>
<td valign="top" align="left">1.06</td>
</tr>
<tr>
<th valign="top" colspan="11" align="left">Counterfactual set (PFS &#x2265; 7.1 months)</th>
</tr>
<tr>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">771.49</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">2.04</td>
</tr>
<tr>
<td valign="top" align="left">33.79</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">71.85</td>
<td valign="top" align="left">&#x2013;</td>
</tr>
<tr>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">14.26</td>
<td valign="top" align="left">35.17</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">&#x2013;</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The patient represented its original query features and the counterfactual feature set (- means no change for the given feature).</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>These results corroborated the findings of the SHAP interpretable analysis, confirming the relationship between quantitative features and PFS, as well as the critical thresholds that led to contrasting PFS outcomes.</p>
</sec>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>In recent years, there has been a notable evolution in immunotherapies, which are now recommended for patients diagnosed with advanced GC/GEJC (<xref ref-type="bibr" rid="B26">26</xref>, <xref ref-type="bibr" rid="B27">27</xref>). This progress has been driven by the discovery of innovative diagnostic biomarkers, the development of drugs targeting novel molecules, and the emergence of combination therapies, such as immunotherapy combined with chemotherapy, antiangiogenic agents, anti-HER-2 antibodies, and chemotherapy (<xref ref-type="bibr" rid="B26">26</xref>). Despite these advancements, a portion of GC patients do not derive benefits from immunotherapy, as evidenced by the lack of improvement in their OS and PFS compared to chemotherapy alone (<xref ref-type="bibr" rid="B28">28</xref>).</p>
<p>Hence, there is a critical need to construct predictive models aimed at identifying patients who stand to benefit from immunotherapy, particularly those likely to achieve prolonged PFS. This not only aids in identifying suitable candidates for immunotherapy but also helps clinicians comprehend individual predictors, thereby enabling the adjustment of treatment strategies accordingly.</p>
<p>The development of iPFS-SC was grounded in the findings from the multi-center ORIENT-16 trial, which demonstrated the efficacy of Sintilimab combined with chemotherapy, resulting in a median PFS of 7.1 months for patients with advanced GC/GEJC in China (<xref ref-type="bibr" rid="B3">3</xref>). Consequently, our model&#x2019;s outcome label was to predict whether a patient would experience a PFS shorter than 7.1 months. The increasing interest in utilizing ML solutions for developing predictive tools and uncovering characteristic features underscores the necessity of creating ML algorithms for GC research. To the best of our knowledge, ML approaches have not been employed to investigate tumor response to Sintilimab combination therapy. In line with this perspective, iPFS-SC integrated interpretable ML algorithms with baseline laboratory features, enabling accurate prediction of patients&#x2019; PFS to Sintilimab plus chemotherapy while systematically evaluating the relationship between predictive features and model outputs. Additionally, thresholds for features that generated counterfactual effects were scrutinized, allowing for a better understanding of the significance of each feature for modeling.</p>
<p>Based on our findings, baseline laboratory features have emerged as crucial indicators for PFS prediction. Through a three-step forward feature selection process, we pinpointed 11 predictive features from four clinical specimens (whole blood, plasma, serum, and urine). LightGBM, trained on the predictive features, surpassed LR, SVM, and RF, achieving an accuracy of 0.70 in predicting PFS of advanced GC/GEJC patients, and an F1-score of 0.77 for identifying patients with a PFS shorter than 7.1 months. During SHAP interpretable analysis, we unearthed non-linear relationships between top-ranking features and PFS, for example, whole blood MCH, serum CREA and serum A/G.</p>
<p>Prior research has correlated elevated MCH levels with improved survival in hepatocellular carcinoma patients, indicating its significance in cancer progression (<xref ref-type="bibr" rid="B29">29</xref>, <xref ref-type="bibr" rid="B30">30</xref>). Our study identified that higher whole blood MCH levels (&gt; 30.5 pg) were negatively associated with short PFS. The trigger point and the correlation identified in the SHAP analysis was corroborated by DiCE, demonstrating that elevated whole blood MCH level could lead to an opposite PFS outcome. Urinary SG-STY, incorporating ions, glucose, and urea levels, provided a reliable indicator of urine dilution (<xref ref-type="bibr" rid="B31">31</xref>). Despite its rare evaluation in GC studies, iPFS-SC revealed a linear relationship between urinary SG-STY and PFS, identifying a threshold of 600 mOsm/kg.H2 indicative of a PFS duration equal to or longer than 7.1 months. In summary, the predictive features identified within the iPFS-SC framework included not only well-established biomarkers in cancer progression but also less-investigated clinical features.</p>
<p>We acknowledge the limitations when interpreting our findings. Firstly, in addition to the cohort receiving Sintilimab plus chemotherapy, a control group of advanced GC/GEJC patients who have not undergone immunotherapy can be included in future studies. This would comprehensively identify individual patients who may benefit from Sintilimab plus chemotherapy based on their distinct clinical characteristics. Secondly, we employed XAI methodologies, including SHAP global feature importance, individualized model explanation and DiCE counterfactual effects, to elucidate the intricate relationships among predictive features and the PFS outcome. It&#x2019;s important to note that the thresholds generated for features and counterfactual cases may exhibit slight variations depending on the dataset utilized. Additionally, our study was conducted using data from a single center, and the cohort size was relatively small. A multi-center study recruiting heterogeneous patient cohorts could be conducted to assess the model&#x2019;s performance on a large scale.</p>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusion</title>
<p>In conclusion, we were the first to discover predictive features from baseline laboratory test for patients with advanced GC/GEJC receiving Sintilimab plus chemotherapy. The iPFS-SC framework was developed through the feature selection, ML-based PFS prediction, and model interpretation, enabling personalized immunotherapy. Our findings demonstrated the framework&#x2019;s ability to predict PFS in patients with advanced GC/GEJC. Leveraging XAI methodologies, we revealed the contribution of features to the model output and identified thresholds for certain features to generate contrasting PFS outcomes. With a feature corpus comprising 11 laboratory features alongside the developed model, we could effectively evaluate and interpret the PFS duration of advanced GC/GEJC patients to Sintilimab plus chemotherapy.</p>
</sec>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec id="s7" sec-type="ethics-statement">
<title>Ethics statement</title>
<p>The studies involving humans were approved by The ethics committee/institutional review board of the Affiliated Hospital of Jiangnan University. The studies were conducted in accordance with the local legislation and institutional requirements. The participants provided their written informed consent to participate in this study.</p>
</sec>
<sec id="s8" sec-type="author-contributions">
<title>Author contributions</title>
<p>DW: Writing &#x2013; review &amp; editing, Writing &#x2013; original draft, Visualization, Methodology, Data curation. WX: Writing &#x2013; original draft, Resources, Investigation. XC: Writing &#x2013; original draft, Resources, Investigation. LH: Writing &#x2013; review &amp; editing, Investigation, Data curation. XG: Writing &#x2013; review &amp; editing, Resources, Investigation. LL: Writing &#x2013; review &amp; editing, Supervision, Project administration, Funding acquisition, Conceptualization. XG: Writing &#x2013; original draft, Supervision, Resources, Project administration, Investigation, Funding acquisition, Conceptualization.</p>
</sec>
</body>
<back>
<sec id="s9" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. The study has been funded by the National Key R&amp;D Program of China (NO. 2021YFC0122701), the Scientific Research Program of Wuxi Health Commission (NO. Z202309 and NO. M202215), the Taihu Light Technology Research (Medical and Health; NO. Y20232001), and Research Project of Cutting-Edge Tumor Support Therapy (nphcf-2022-118).</p>
</sec>
<sec id="s10" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s12" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fimmu.2024.1407632/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fimmu.2024.1407632/full#supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet_1.pdf" id="SM1" mimetype="application/pdf"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sung</surname> <given-names>H</given-names>
</name>
<name>
<surname>Ferlay</surname> <given-names>J</given-names>
</name>
<name>
<surname>Siegel</surname> <given-names>RL</given-names>
</name>
<name>
<surname>Laversanne</surname> <given-names>M</given-names>
</name>
<name>
<surname>Soerjomataram</surname> <given-names>I</given-names>
</name>
<name>
<surname>Jemal</surname> <given-names>A</given-names>
</name>
<etal/>
</person-group>. <article-title>Global cancer statistics 2020: GLOBOCAN estimates of incidence and mortality worldwide for 36 cancers in 185 countries</article-title>. <source>CA: A Cancer J Clin</source>. (<year>2021</year>) <volume>71</volume>:<page-range>209&#x2013;49</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.3322/caac.21660</pub-id>
</citation>
</ref>
<ref id="B2">
<label>2</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>J</given-names>
</name>
<name>
<surname>Fei</surname> <given-names>K</given-names>
</name>
<name>
<surname>Jing</surname> <given-names>H</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>W</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>S</given-names>
</name>
<etal/>
</person-group>. <article-title>Durable blockade of PD-1 signaling links preclinical efficacy of sintilimab to its clinical benefit</article-title>. <source>mAbs</source>. (<year>2019</year>) <volume>11</volume>:<page-range>1443&#x2013;51</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/19420862.2019.1654303</pub-id>
</citation>
</ref>
<ref id="B3">
<label>3</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>J</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>H</given-names>
</name>
<name>
<surname>Pan</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Gu</surname> <given-names>K</given-names>
</name>
<name>
<surname>Cang</surname> <given-names>S</given-names>
</name>
<name>
<surname>Han</surname> <given-names>L</given-names>
</name>
<etal/>
</person-group>. <article-title>Sintilimab plus chemotherapy for unresectable gastric or gastroesophageal junction cancer</article-title>. <source>JAMA</source>. (<year>2023</year>) <volume>330</volume>:<page-range>2064&#x2013;74</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1001/jama.2023.19918</pub-id>
</citation>
</ref>
<ref id="B4">
<label>4</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Al-Shamsi</surname> <given-names>HO</given-names>
</name>
<name>
<surname>Fahmawi</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Dahbour</surname> <given-names>I</given-names>
</name>
<name>
<surname>Tabash</surname> <given-names>A</given-names>
</name>
<name>
<surname>Rogers</surname> <given-names>JE</given-names>
</name>
<name>
<surname>Mares</surname> <given-names>JE</given-names>
</name>
<etal/>
</person-group>. <article-title>Continuation of trastuzumab beyond disease progression in HER2-positive metastatic gastric cancer: the MD Anderson experience</article-title>. <source>J Gastrointest Oncol</source>. (<year>2016</year>) <volume>7</volume>:<fpage>499</fpage>&#x2013;<lpage>505</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.21037/jgo.2016.06.16</pub-id>
</citation>
</ref>
<ref id="B5">
<label>5</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Han</surname> <given-names>C</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>L</given-names>
</name>
<name>
<surname>Ye</surname> <given-names>S</given-names>
</name>
<etal/>
</person-group>. <article-title>Efficacy and safety for Apatinib treatment in advanced gastric cancer: a real world study</article-title>. <source>Sci Rep</source>. (<year>2017</year>) <volume>7</volume>:<fpage>13208</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-017-13192-8</pub-id>
</citation>
</ref>
<ref id="B6">
<label>6</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ozveren</surname> <given-names>A</given-names>
</name>
<name>
<surname>Erdogan</surname> <given-names>AP</given-names>
</name>
<name>
<surname>Ekinci</surname> <given-names>F</given-names>
</name>
</person-group>. <article-title>The inflammatory prognostic index as a potential predictor of prognosis in metastatic gastric cancer</article-title>. <source>Sci Rep</source>. (<year>2023</year>) <volume>13</volume>:<fpage>7755</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-023-34778-5</pub-id>
</citation>
</ref>
<ref id="B7">
<label>7</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Orillard</surname> <given-names>E</given-names>
</name>
<name>
<surname>Henriques</surname> <given-names>J</given-names>
</name>
<name>
<surname>Vernerey</surname> <given-names>D</given-names>
</name>
<name>
<surname>Almotlak</surname> <given-names>H</given-names>
</name>
<name>
<surname>Calcagno</surname> <given-names>F</given-names>
</name>
<name>
<surname>Fein</surname> <given-names>F</given-names>
</name>
<etal/>
</person-group>. <article-title>Interest of the addition of taxanes to standard treatment in first-line advanced HER2 positive gastroesophageal adenocarcinoma in selective patients</article-title>. <source>Front Oncol</source>. (<year>2022</year>) <volume>12</volume>:<elocation-id>763926</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fonc.2022.763926</pub-id>
</citation>
</ref>
<ref id="B8">
<label>8</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ohtsu</surname> <given-names>A</given-names>
</name>
<name>
<surname>Shah</surname> <given-names>MA</given-names>
</name>
<name>
<surname>Cutsem</surname> <given-names>EV</given-names>
</name>
<name>
<surname>Rha</surname> <given-names>SY</given-names>
</name>
<name>
<surname>Sawaki</surname> <given-names>A</given-names>
</name>
<name>
<surname>Park</surname> <given-names>SR</given-names>
</name>
<etal/>
</person-group>. <article-title>Bevacizumab in combination with chemotherapy as first-line therapy in advanced gastric cancer: A randomized, double-blind, placebo-controlled phase III study</article-title>. <source>J Clin Oncol</source>. (<year>2011</year>) <volume>29</volume>:<page-range>3968&#x2013;76</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1200/jco.2011.36.2236</pub-id>
</citation>
</ref>
<ref id="B9">
<label>9</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Afrash</surname> <given-names>MR</given-names>
</name>
<name>
<surname>Shafiee</surname> <given-names>M</given-names>
</name>
<name>
<surname>Kazemi-Arpanahi</surname> <given-names>H</given-names>
</name>
</person-group>. <article-title>Establishing machine learning models to predict the early risk of gastric cancer based on lifestyle factors</article-title>. <source>BMC Gastroenterol</source>. (<year>2023</year>) <volume>23</volume>:<elocation-id>6</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12876-022-02626-x</pub-id>
</citation>
</ref>
<ref id="B10">
<label>10</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Taninaga</surname> <given-names>J</given-names>
</name>
<name>
<surname>Nishiyama</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Fujibayashi</surname> <given-names>K</given-names>
</name>
<name>
<surname>Gunji</surname> <given-names>T</given-names>
</name>
<name>
<surname>Sasabe</surname> <given-names>N</given-names>
</name>
<name>
<surname>Iijima</surname> <given-names>K</given-names>
</name>
<etal/>
</person-group>. <article-title>Prediction of future gastric cancer risk using a machine learning algorithm and comprehensive medical check-up data: A case-control study</article-title>. <source>Sci Rep</source>. (<year>2019</year>) <volume>9</volume>:<fpage>12384</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-019-48769-y</pub-id>
</citation>
</ref>
<ref id="B11">
<label>11</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>M-M</given-names>
</name>
<name>
<surname>Wen</surname> <given-names>L</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Y-J</given-names>
</name>
<name>
<surname>Cai</surname> <given-names>Q</given-names>
</name>
<name>
<surname>Li</surname> <given-names>L-T</given-names>
</name>
<name>
<surname>Cai</surname> <given-names>Y-M</given-names>
</name>
</person-group>. <article-title>Application of data mining methods to improve screening for the risk of early gastric cancer</article-title>. <source>BMC M&#xe9;d Inform Decis Mak</source>. (<year>2018</year>) <volume>18</volume>:<fpage>121</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12911-018-0689-4</pub-id>
</citation>
</ref>
<ref id="B12">
<label>12</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cai</surname> <given-names>Q</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>C</given-names>
</name>
<name>
<surname>Yuan</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>Q</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Hao</surname> <given-names>Y</given-names>
</name>
<etal/>
</person-group>. <article-title>Development and validation of a prediction rule for estimating gastric cancer risk in the Chinese high-risk population: a nationwide multicentre study</article-title>. <source>Gut</source>. (<year>2019</year>) <volume>68</volume>:<fpage>1576</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1136/gutjnl-2018-317556</pub-id>
</citation>
</ref>
<ref id="B13">
<label>13</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Su</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>J</given-names>
</name>
<name>
<surname>Qian</surname> <given-names>H</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>H</given-names>
</name>
<name>
<surname>Ji</surname> <given-names>J</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>H</given-names>
</name>
<etal/>
</person-group>. <article-title>Diagnosis of gastric cancer using decision tree classification of mass spectral data</article-title>. <source>Cancer Sci</source>. (<year>2007</year>) <volume>98</volume>:<fpage>37</fpage>&#x2013;<lpage>43</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/j.1349-7006.2006.00339.x</pub-id>
</citation>
</ref>
<ref id="B14">
<label>14</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mortezagholi</surname> <given-names>A</given-names>
</name>
<name>
<surname>Khosravizadeh</surname> <given-names>O</given-names>
</name>
<name>
<surname>Menhaj</surname> <given-names>MB</given-names>
</name>
<name>
<surname>Shafigh</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Kalhor</surname> <given-names>R</given-names>
</name>
</person-group>. <article-title>Make intelligent of gastric cancer diagnosis error in Qazvin&#x2019;s medical centers: using data mining method</article-title>. <source>Asian Pac J Cancer Prev : APJCP</source>. (<year>2019</year>) <volume>20</volume>:<page-range>2607&#x2013;10</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.31557/apjcp.2019.20.9.2607</pub-id>
</citation>
</ref>
<ref id="B15">
<label>15</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Q</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>Application of convolutional neural network in the diagnosis of the invasion depth of gastric cancer based on conventional endoscopy</article-title>. <source>Gastrointestinal Endoscopy</source>. (<year>2019</year>) <volume>89</volume>:<page-range>806&#x2013;15</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.gie.2018.11.011</pub-id>
</citation>
</ref>
<ref id="B16">
<label>16</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hirasawa</surname> <given-names>T</given-names>
</name>
<name>
<surname>Aoyama</surname> <given-names>K</given-names>
</name>
<name>
<surname>Tanimoto</surname> <given-names>T</given-names>
</name>
<name>
<surname>Ishihara</surname> <given-names>S</given-names>
</name>
<name>
<surname>Shichijo</surname> <given-names>S</given-names>
</name>
<name>
<surname>Ozawa</surname> <given-names>T</given-names>
</name>
<etal/>
</person-group>. <article-title>Application of artificial intelligence using a convolutional neural network for detecting gastric cancer in endoscopic images</article-title>. <source>Gastric Cancer</source>. (<year>2018</year>) <volume>21</volume>:<page-range>653&#x2013;60</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10120-018-0793-2</pub-id>
</citation>
</ref>
<ref id="B17">
<label>17</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Newaz</surname> <given-names>A</given-names>
</name>
<name>
<surname>Taharat</surname> <given-names>A</given-names>
</name>
<name>
<surname>Islam</surname> <given-names>MS</given-names>
</name>
<name>
<surname>Akanda</surname> <given-names>AGMFH</given-names>
</name>
</person-group>. <article-title>An explainable machine learning framework for the accurate diagnosis of ovarian cancer</article-title>. <source>arXiv</source>. (<year>2023</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arxiv.2312.08381</pub-id>
</citation>
</ref>
<ref id="B18">
<label>18</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rudin</surname> <given-names>C</given-names>
</name>
</person-group>. <article-title>Why black box machine learning should be avoided for high-stakes decisions, in brief</article-title>. <source>Nat Rev Methods Primers</source>. (<year>2022</year>) <volume>2</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s43586-022-00172-0</pub-id>
</citation>
</ref>
<ref id="B19">
<label>19</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Baron S. Explainable</surname> <given-names>AI</given-names>
</name>
</person-group>. <article-title>and causal understanding: counterfactual approaches considered</article-title>. <source>Minds Mach</source>. (<year>2023</year>) <volume>33</volume>:<page-range>347&#x2013;77</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11023-023-09637-x</pub-id>
</citation>
</ref>
<ref id="B20">
<label>20</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lundberg</surname> <given-names>SM</given-names>
</name>
<name>
<surname>Erion</surname> <given-names>G</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>H</given-names>
</name>
<name>
<surname>DeGrave</surname> <given-names>A</given-names>
</name>
<name>
<surname>Prutkin</surname> <given-names>JM</given-names>
</name>
<name>
<surname>Nair</surname> <given-names>B</given-names>
</name>
<etal/>
</person-group>. <article-title>From local explanations to global understanding with explainable AI for trees</article-title>. <source>Nat Mach Intell</source>. (<year>2020</year>) <volume>2</volume>:<fpage>56</fpage>&#x2013;<lpage>67</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s42256-019-0138-9</pub-id>
</citation>
</ref>
<ref id="B21">
<label>21</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lundberg</surname> <given-names>S</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>S-I</given-names>
</name>
</person-group>. <article-title>A unified approach to interpreting model predictions</article-title>. <source>Arxiv</source>. (<year>2017</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arxiv.1705.07874</pub-id>
</citation>
</ref>
<ref id="B22">
<label>22</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hildebrandt</surname> <given-names>M</given-names>
</name>
<name>
<surname>Castillo</surname> <given-names>C</given-names>
</name>
<name>
<surname>Celis</surname> <given-names>E</given-names>
</name>
<name>
<surname>Ruggieri</surname> <given-names>S</given-names>
</name>
<name>
<surname>Taylor</surname> <given-names>L</given-names>
</name>
<name>
<surname>Zanfir-Fortuna</surname> <given-names>G</given-names>
</name>
<etal/>
</person-group>. <article-title>Explaining machine learning classifiers through diverse counterfactual explanations</article-title>. <source>Proc 2020 Conf Fairness Account Transpar</source>. (<year>2020</year>), <page-range>607&#x2013;17</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1145/3351095.3372850</pub-id>
</citation>
</ref>
<ref id="B23">
<label>23</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stekhoven</surname> <given-names>DJ</given-names>
</name>
<name>
<surname>B&#xfc;hlmann</surname> <given-names>P</given-names>
</name>
</person-group>. <article-title>MissForest&#x2014;non-parametric missing value imputation for mixed-type data</article-title>. <source>Bioinformatics</source>. (<year>2012</year>) <volume>28</volume>:<page-range>112&#x2013;8</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btr597</pub-id>
</citation>
</ref>
<ref id="B24">
<label>24</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Breiman</surname> <given-names>L</given-names>
</name>
</person-group>. <article-title>Random forests</article-title>. <source>Mach Learn</source>. (<year>2001</year>) <volume>45</volume>:<fpage>5</fpage>&#x2013;<lpage>32</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1023/a:1010933404324</pub-id>
</citation>
</ref>
<ref id="B25">
<label>25</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bergstra</surname> <given-names>J</given-names>
</name>
<name>
<surname>Bardenet</surname> <given-names>R</given-names>
</name>
<name>
<surname>Bengio</surname> <given-names>Y</given-names>
</name>
<name>
<surname>K&#xe9;gl</surname> <given-names>B</given-names>
</name>
</person-group>. <article-title>Algorithms for hyper-parameter optimization</article-title>. <source>NeurIPS</source>. (<year>2011</year>).</citation>
</ref>
<ref id="B26">
<label>26</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bai</surname> <given-names>R</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>N</given-names>
</name>
<name>
<surname>Liang</surname> <given-names>T</given-names>
</name>
<name>
<surname>Li</surname> <given-names>L</given-names>
</name>
<name>
<surname>Lv</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Lv</surname> <given-names>X</given-names>
</name>
<etal/>
</person-group>. <article-title>Novel frontiers of treatment for advanced gastric or gastroesophageal junction cancer (GC/GEJC): will immunotherapy be a future direction</article-title>? <source>Front Oncol</source>. (<year>2020</year>) <volume>10</volume>:<elocation-id>912</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fonc.2020.00912</pub-id>
</citation>
</ref>
<ref id="B27">
<label>27</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Leowattana</surname> <given-names>W</given-names>
</name>
<name>
<surname>Leowattana</surname> <given-names>P</given-names>
</name>
<name>
<surname>Leowattana</surname> <given-names>T</given-names>
</name>
</person-group>. <article-title>Immunotherapy for advanced gastric cancer</article-title>. <source>World J Methodol</source>. (<year>2023</year>) <volume>13</volume>:<fpage>79</fpage>&#x2013;<lpage>97</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5662/wjm.v13.i3.79</pub-id>
</citation>
</ref>
<ref id="B28">
<label>28</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yoon</surname> <given-names>J</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>T-Y</given-names>
</name>
<name>
<surname>Oh</surname> <given-names>D-Y</given-names>
</name>
</person-group>. <article-title>Recent progress in immunotherapy for gastric cancer</article-title>. <source>J Gastric Cancer</source>. (<year>2022</year>) <volume>23</volume>:<page-range>207&#x2013;23</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.5230/jgc.2023.23.e10</pub-id>
</citation>
</ref>
<ref id="B29">
<label>29</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname> <given-names>P</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>C</given-names>
</name>
<name>
<surname>Li</surname> <given-names>B</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Zou</surname> <given-names>R</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>J</given-names>
</name>
<etal/>
</person-group>. <article-title>Preoperative mean corpuscular hemoglobin affecting long-term outcomes of hepatectomized patients with hepatocellular carcinoma</article-title>. <source>Mol Clin Oncol</source>. (<year>2016</year>) <volume>4</volume>:<page-range>229&#x2013;36</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.3892/mco.2015.705</pub-id>
</citation>
</ref>
<ref id="B30">
<label>30</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname> <given-names>X-N</given-names>
</name>
<name>
<surname>Su</surname> <given-names>D</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>L</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>F-L</given-names>
</name>
</person-group>. <article-title>Roles of the hepcidin&#x2013;ferroportin axis and iron in cancer</article-title>. <source>Eur J Cancer Prev</source>. (<year>2014</year>) <volume>23</volume>:<page-range>122&#x2013;33</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1097/CEJ.0b013e3283627f14</pub-id>
</citation>
</ref>
<ref id="B31">
<label>31</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ozdemir</surname> <given-names>S</given-names>
</name>
<name>
<surname>Sears</surname> <given-names>CG</given-names>
</name>
<name>
<surname>Harrington</surname> <given-names>JM</given-names>
</name>
<name>
<surname>Poulsen</surname> <given-names>AH</given-names>
</name>
<name>
<surname>Buckley</surname> <given-names>J</given-names>
</name>
<name>
<surname>Howe</surname> <given-names>CJ</given-names>
</name>
<etal/>
</person-group>. <article-title>Relationship between urine creatinine and urine osmolality in spot samples among men and women in the Danish diet cancer and health cohort</article-title>. <source>Toxics</source>. (<year>2021</year>) <volume>9</volume>:<elocation-id>282</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/toxics9110282</pub-id>
</citation>
</ref>
</ref-list>
<glossary>
<title>Glossary</title>
<table-wrap position="anchor">
<table frame="hsides">
<tbody>
<tr>
<td valign="top" align="left">GC-GEJC</td>
<td valign="top" align="left">Advanced gastric and gastroesophageal junction adenocarcinoma</td>
</tr>
<tr>
<td valign="top" align="left">PFS</td>
<td valign="top" align="left">Progression-free survival</td>
</tr>
<tr>
<td valign="top" align="left">ML</td>
<td valign="top" align="left">Machine learning</td>
</tr>
<tr>
<td valign="top" align="left">iPFS-SC</td>
<td valign="top" align="left">Interpretable machine learning models for predicting Progression-Free Survival in patients undergoing Sintilimab plus Chemotherapy</td>
</tr>
<tr>
<td valign="top" align="left">PD-1</td>
<td valign="top" align="left">Programmed death 1 ligand</td>
</tr>
<tr>
<td valign="top" align="left">OS</td>
<td valign="top" align="left">Overall survival</td>
</tr>
<tr>
<td valign="top" align="left">CPS</td>
<td valign="top" align="left">Combined positive score</td>
</tr>
<tr>
<td valign="top" align="left">GC</td>
<td valign="top" align="left">Gastric cancer</td>
</tr>
<tr>
<td valign="top" align="left">IPS</td>
<td valign="top" align="left">Inflammatory prognostic index</td>
</tr>
<tr>
<td valign="top" align="left">CRP</td>
<td valign="top" align="left">C-reactive protein</td>
</tr>
<tr>
<td valign="top" align="left">NLR</td>
<td valign="top" align="left">Neutrophil-to-lymphocyte</td>
</tr>
<tr>
<td valign="top" align="left">LR</td>
<td valign="top" align="left">Logistic Regression</td>
</tr>
<tr>
<td valign="top" align="left">NB</td>
<td valign="top" align="left">Na&#xef;ve Bayes</td>
</tr>
<tr>
<td valign="top" align="left">SVM</td>
<td valign="top" align="left">Support Vector Machine</td>
</tr>
<tr>
<td valign="top" align="left">KNN</td>
<td valign="top" align="left">k-Nearest Neighbors</td>
</tr>
<tr>
<td valign="top" align="left">DT</td>
<td valign="top" align="left">Decision Tree</td>
</tr>
<tr>
<td valign="top" align="left">RF</td>
<td valign="top" align="left">Random Forest</td>
</tr>
<tr>
<td valign="top" align="left">XGBoost</td>
<td valign="top" align="left">eXtreme Gradient Boosting</td>
</tr>
<tr>
<td valign="top" align="left">CNN</td>
<td valign="top" align="left">Convolutional neural network</td>
</tr>
<tr>
<td valign="top" align="left">TPE</td>
<td valign="top" align="left">Tree-structured Parzen Estimator</td>
</tr>
<tr>
<td valign="top" align="left">MCH</td>
<td valign="top" align="left">Mean corpuscular hemoglobin</td>
</tr>
<tr>
<td valign="top" align="left">SG-STY</td>
<td valign="top" align="left">Urinary osmolality</td>
</tr>
<tr>
<td valign="top" align="left">LYMPH</td>
<td valign="top" align="left">Percentage of lymphocytes</td>
</tr>
<tr>
<td valign="top" align="left">XAI</td>
<td valign="top" align="left">Explainable artificial intelligence</td>
</tr>
<tr>
<td valign="top" align="left">SHAP</td>
<td valign="top" align="left">SHapley Additive exPlanations</td>
</tr>
<tr>
<td valign="top" align="left">DiCE</td>
<td valign="top" align="left">Diverse Counterfactual Explanations</td>
</tr>
<tr>
<td valign="top" align="left">RECIST</td>
<td valign="top" align="left">Response Evaluation Criteria in Solid Tumors</td>
</tr>
<tr>
<td valign="top" align="left">CT</td>
<td valign="top" align="left">Computed tomography</td>
</tr>
<tr>
<td valign="top" align="left">MRI</td>
<td valign="top" align="left">Magnetic resonance imaging</td>
</tr>
<tr>
<td valign="top" align="left">MDI</td>
<td valign="top" align="left">Mean decrease in impurity</td>
</tr>
<tr>
<td valign="top" align="left">NGAL</td>
<td valign="top" align="left">Neutrophil gelatinase lipoprotein</td>
</tr>
<tr>
<td valign="top" align="left">CREA</td>
<td valign="top" align="left">Creatinine</td>
</tr>
<tr>
<td valign="top" align="left">TT</td>
<td valign="top" align="left">Thrombin time</td>
</tr>
<tr>
<td valign="top" align="left">eGFR</td>
<td valign="top" align="left">Glomerular filtration rate assessment</td>
</tr>
<tr>
<td valign="top" align="left">CA125</td>
<td valign="top" align="left">Carbohydrate antigen 125</td>
</tr>
<tr>
<td valign="top" align="left">FDP</td>
<td valign="top" align="left">Fibrinogen degradation products</td>
</tr>
</tbody>
</table>
</table-wrap>
</glossary>
</back>
</article>