<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article article-type="research-article" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Med. Technol.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Medical Technology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Med. Technol.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2673-3129</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmedt.2025.1732580</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Explainable machine learning for predicting postoperative length of stay after gastrectomy: a nationwide study using XGBoost and SHAP</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Maruyama</surname><given-names>Tsunehiko</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x002A;</xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x2020;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2941648/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Ikezawa</surname><given-names>Kazuto</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role></contrib>
<contrib contrib-type="author">
<name><surname>Suzuki</surname><given-names>Hideo</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role></contrib>
<contrib contrib-type="author">
<name><surname>Kurokawa</surname><given-names>Tomohiro</given-names></name>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2243327/overview" />
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Akashi</surname><given-names>Yoshimasa</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Oda</surname><given-names>Tatsuya</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Department of Surgery, Mito Saiseikai General Hospital</institution>, <city>Mito</city>, <country country="jp">Japan</country></aff>
<aff id="aff2"><label>2</label><institution>Department of Gastroenterological Surgery, University of Tsukuba</institution>, <city>Tsukuba</city>, <country country="jp">Japan</country></aff>
<aff id="aff3"><label>3</label><institution>Saiseikai Research Institute of Health Care and Welfare</institution>, <city>Tokyo</city>, <country country="jp">Japan</country></aff>
<aff id="aff4"><label>4</label><institution>Department of Gastroenterology, Tsukuba Memorial Hospital</institution>, <city>Tsukuba</city>, <country country="jp">Japan</country></aff>
<aff id="aff5"><label>5</label><institution>Research and Development Center for Precision Medicine, University of Tsukuba</institution>, <city>Tsukuba</city>, <country country="jp">Japan</country></aff>
<aff id="aff6"><label>6</label><institution>Department of Medical Epigenomics Research, Fukushima Medical University</institution>, <city>Fukushima</city>, <country country="jp">Japan</country></aff>
<author-notes>
<corresp id="cor1"><label>&#x002A;</label><bold>Correspondence:</bold> Tsunehiko Maruyama <email xlink:href="mailto:t-maru@ya2.so-net.ne.jp">t-maru@ya2.so-net.ne.jp</email></corresp>
<fn fn-type="other" id="fn001"><label>&#x2020;</label><p>ORCID Tsunehiko Maruyama <uri xlink:href="https://orcid.org/0000-0002-9701-7604">orcid.org/0000-0002-9701-7604</uri></p></fn>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2025-12-05"><day>05</day><month>12</month><year>2025</year></pub-date>
<pub-date publication-format="electronic" date-type="collection"><year>2025</year></pub-date>
<volume>7</volume><elocation-id>1732580</elocation-id>
<history>
<date date-type="received"><day>28</day><month>10</month><year>2025</year></date>
<date date-type="rev-recd"><day>20</day><month>11</month><year>2025</year></date>
<date date-type="accepted"><day>20</day><month>11</month><year>2025</year></date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2025 Maruyama, Ikezawa, Suzuki, Kurokawa, Akashi and Oda.</copyright-statement>
<copyright-year>2025</copyright-year><copyright-holder>Maruyama, Ikezawa, Suzuki, Kurokawa, Akashi and Oda</copyright-holder><license><ali:license_ref start_date="2025-12-05">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p></license>
</permissions>
<abstract><sec><title>Background</title>
<p>Gastric cancer remains a major cause of cancer-related morbidity and mortality. Despite advances in surgical and perioperative care, prolonged hospitalization continues to strain healthcare systems. Predicting postoperative length of stay (LOS) could support personalized care and efficient resource allocation. Japan&#x0027;s nationwide Diagnosis Procedure Combination (DPC) database provides real-world data for large-scale analysis, but no study has applied machine learning to predict LOS after gastrectomy.</p>
</sec><sec><title>Methods</title>
<p>This retrospective study included 26,097 patients who underwent gastrectomy between 2017 and 2022 at 472 hospitals in Japan. Using XGBoost, we developed a predictive model based on 1,433 admission-time variables extracted from the DPC database. Model performance was evaluated using Root Mean Squared Error (RMSE) and Mean Absolute Error (MAE) in a five-fold cross-validation. SHAP values were used to interpret feature importance.</p>
</sec><sec><title>Results</title>
<p>The final model achieved an RMSE of 3.74 and MAE of 2.82 days. Key predictors of LOS included surgical procedure (laparoscopic distal gastrectomy and open total gastrectomy), designated cancer hospital, hospital size, peritoneal dissemination, and admission ADL score. SHAP analysis revealed that Laparoscopic distal gastrectomy and higher hospital volume were associated with shorter LOS, while open total gastrectomy was associated with longer LOS.</p>
</sec><sec><title>Conclusions</title>
<p>We developed a machine learning model that predicts postoperative length of stay with an error range of 2&#x2013;4 days using admission data. This proof-of-concept study demonstrates the feasibility of predicting length of stay from admission data, showing that explainable AI can replicate intuitive patterns in surgical oncology while simultaneously identifying unexpected insights from administrative data. These findings highlight the clinical potential of explainable AI for perioperative workflow optimization.</p>
</sec>
</abstract>
<kwd-group>
<kwd>artificial intelligence</kwd>
<kwd>DPC data</kwd>
<kwd>gastric cancer</kwd>
<kwd>explainable AI</kwd>
<kwd>length of stay</kwd>
</kwd-group><funding-group>
<funding-statement>The author(s) declare that financial support was received for the research and/or publication of this article. This study was funded by JSPS KAKENHI Grant Number JP 22K10407. This paper was supported by a research Grants from Japan Society of Metabolism and Clinical Nutrition.</funding-statement>
</funding-group>
<counts>
<fig-count count="3"/>
<table-count count="3"/><equation-count count="0"/><ref-count count="26"/><page-count count="8"/><word-count count="58264"/></counts><custom-meta-group><custom-meta><meta-name>section-at-acceptance</meta-name><meta-value>Medtech Data Analytics</meta-value></custom-meta></custom-meta-group>
</article-meta>
</front>
<body><sec id="s1" sec-type="intro"><label>1</label><title>Introduction</title>
<p>According to GLOBOCAN statistics, gastric cancer ranks fifth in both incidence and mortality worldwide (<xref ref-type="bibr" rid="B1">1</xref>). Advances in perioperative management have led to a reduction in postoperative complication rates and shorter hospital stays following gastric cancer surgery. Curative gastrectomy remains the only potentially curative treatment for patients with resectable gastric cancer. However, despite improvements in surgical techniques and perioperative care, this procedure is still associated with a high rate of complications (30&#x0025;&#x2013;44&#x0025;) and a mortality rate of 3&#x0025;&#x2013;4&#x0025; (<xref ref-type="bibr" rid="B2">2</xref>, <xref ref-type="bibr" rid="B3">3</xref>). In recent years, the aging population, increase in comorbidities, and diversity of surgical risk have contributed to a consistent incidence of postoperative complications, prolonged hospital stays, increased healthcare costs, and greater burdens on healthcare providers.</p>
<p>The Diagnosis Procedure Combination (DPC) system, introduced in Japan in 2003, is the largest medical data in Japan, collecting data from approximately 1,700 acute care hospitals nationwide (<xref ref-type="bibr" rid="B4">4</xref>). It has been widely used in various clinical studies.</p>
<p>Machine learning, a subfield of artificial intelligence (AI), has evolved to flexibly capture complex nonlinear relationships, particularly when handling large-scale datasets, and is becoming increasingly common in medical research (<xref ref-type="bibr" rid="B5">5</xref>). The combination of AI with big data is highly synergistic, and analyses based on both are expected to play a key role in improving healthcare systems. Accurate prediction of postoperative length of stay (LOS) could help reduce the burden on healthcare staff and improve patient outcomes. Although several studies have analyzed short-term outcomes after surgery using Japan&#x0027;s DPC data (<xref ref-type="bibr" rid="B6">6</xref>), none have applied machine learning to these data. Therefore, the objective of this study was to identify factors associated with LOS after gastrectomy for gastric cancer using machine learning applied to the DPC database, and to develop a predictive model capable of estimating postoperative LOS at the time of admission. We emphasize the proof-of-concept nature of this work and discuss the methodological, clinical, and healthcare-system implications.</p>
</sec>
<sec id="s2" sec-type="methods"><label>2</label><title>Methods</title>
<p>This study was conducted in accordance with the Transparent Reporting of a multivariable prediction model for Individual Prognosis Or Diagnosis (TRIPOD) guidelines (<xref ref-type="bibr" rid="B7">7</xref>).</p>
<sec id="s2a"><label>2.1</label><title>Source of data</title>
<p>We obtained anonymized DPC data from Medical Data Vision Co., Ltd. Patients were identified using the International Classification of Diseases, 10th Revision (ICD-10) code C16 (malignant neoplasm of the stomach). A total of 284,953 gastric cancer patients hospitalized at 472 institutions between August 1, 2017, and July 31, 2022, were initially identified. Among them, 26,299 surgical cases were extracted using the Japanese surgical classification code, known as the K-code. The exclusion criteria were as follows: (1) unknown height or weight; (2) age under 18 or over 100 years; (3) hospital stay shorter than 2 days; and (4) death within 7 days of admission. After applying these criteria, 26,097 patients were included in the final analysis (<xref ref-type="fig" rid="F1">Figure&#x00A0;1</xref>).</p>
<fig id="F1" position="float"><label>Figure&#x00A0;1</label>
<caption><p>Flowchart of patient selection from the DPC database. Among 284,953 gastric cancer patients, 26,097 who underwent surgery and met inclusion criteria were analyzed.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fmedt-07-1732580-g001.tif"><alt-text content-type="machine-generated">Flowchart detailing the selection process from a DPC database of 284,953 gastric cancer patients. Out of these, 258,654 without surgery were excluded. From 26,299 who underwent gastric surgery, 202 more were excluded due to incomplete data, age constraints, early discharge, or death. A total of 26,097 eligible patients remained.</alt-text>
</graphic>
</fig>
<p>In Japan, healthcare staff are required to select appropriate DPC codes when admitting patients (<xref ref-type="bibr" rid="B8">8</xref>). The DPC database includes a wide range of clinical information such as patient background characteristics, comorbidities, admission and discharge details, diagnoses, surgeries, and medications. To prepare the dataset for analysis, all statistical information related to each hospitalization was reviewed. The original DPC dataset consisted of multiple CSV files containing mixed data types, including numerical variables (e.g., age, Barthel Index), categorical variables (e.g., surgical codes, hospital type), and free-text diagnosis fields converted to ICD-10 codes automatically standardized by the DPC system. All available data were thoroughly examined, including variable contents, formats, patterns, missing values, and invalid entries, to ensure standardization. The data files were then merged for analysis, cleaned, and filtered. Columns or rows with unreadable content due to numerical or format errors were removed. Missing values were encoded as &#x201C;NA&#x201D; and processed according to XGBoost&#x0027;s built-in handling of missingness. To ensure dataset consistency prior to model training, all variables were converted to standardized numerical values using one-hot encoding. A total of 1,433 variables were extracted as features for analysis.</p>
</sec>
<sec id="s2b"><label>2.2</label><title>Data analysis</title>
<p>To build the machine learning model and identify factors with high predictive importance, we employed eXtreme Gradient Boosting (XGBoost), a representative ensemble learning method. XGBoost is a tree-based machine learning algorithm widely used for classification and regression tasks, and is known for its high performance and computational efficiency as an advanced implementation of gradient boosting (an ensemble learning method) (<xref ref-type="bibr" rid="B9">9</xref>). The analytical procedure using XGBoost was as follows. We used the Python XGBoost library to calculate feature importance and constructed a predictive model for postoperative LOS in gastric cancer patients as the target variable. The model&#x0027;s predictive accuracy was evaluated using Root Mean Squared Error (RMSE) and Mean Absolute Error (MAE). Five-fold cross-validation was performed by splitting the data into five subsets, using one subset for testing and the remaining four for training. The evaluation metric was the average of performance across all five folds (folds 0&#x2013;4). XGBoost was selected because it generally outperforms random forests when handling large tabular datasets containing mixed data types, missing values, and complex nonlinear interactions. Furthermore, gradient boosting decision tree models have repeatedly demonstrated high predictive performance for clinical outcomes in tabular medical datasets, often surpassing traditional methods and other machine learning models such as random forests (<xref ref-type="bibr" rid="B10">10</xref>, <xref ref-type="bibr" rid="B11">11</xref>).</p>
<p>To interpret the model&#x0027;s predictions, we used SHapley Additive exPlanations (SHAP), a model-agnostic method that helps explain the outputs of machine learning models (<xref ref-type="bibr" rid="B12">12</xref>). SHAP is a model-agnostic interpretability method widely applicable across various machine learning models. SHAP is based on cooperative game theory, where each feature is treated as a &#x201C;player&#x201D; contributing to the prediction. A feature&#x0027;s SHAP value represents its average marginal contribution across all possible combinations of features. Formally, SHAP calculates this value by evaluating the difference in model output when the feature is included vs. excluded. This allows for a mathematically consistent measurement of feature contribution within complex models like XGBoost (<xref ref-type="bibr" rid="B13">13</xref>, <xref ref-type="bibr" rid="B14">14</xref>). It enables the quantification and interpretation of the contribution of each explanatory variable to the model&#x0027;s predictions, even in complex models with multiple features. A positive SHAP value indicates that the variable contributes to a longer postoperative LOS, while a negative SHAP value indicates that the variable contributes to a shorter LOS.</p>
<p>The data supporting the findings of this study are available from the University of Tsukuba. However, due to licensing agreements, the data are not publicly accessible. Data may be obtained from the corresponding author upon reasonable request and with permission from the University of Tsukuba.</p>
<p>This study was conducted in accordance with the Declaration of Helsinki and was approved by the Ethics Committee for Saiseikai Research Institute of Health Care and Welfare (Approval No. R03-01-03). Because this study was based on secondary analysis of administrative data, individual patients could not be identified. Informed consent was waived due to the anonymized nature of the dataset. Clinical trial number: not applicable.</p>
</sec>
</sec>
<sec id="s3" sec-type="results"><label>3</label><title>Results</title>
<sec id="s3a"><label>3.1</label><title>Patient characteristics and surgical outcomes</title>
<p>The clinicopathological characteristics of the patients are summarized in <xref ref-type="table" rid="T1">Table&#x00A0;1</xref>. Of the 26,097 patients, 17,864 (68.5&#x0025;) were male and 8,223 (31.5&#x0025;) were female. The median age was 72 years (range, 19&#x2013;98 years), and the median body mass index (BMI) was 22.4&#x2005;kg/m<sup>2</sup> (range, 11.1&#x2013;42.3). The median total hospital stay was 13 days (range, 2&#x2013;290 days), and the median postoperative LOS was 11 days (range, 0&#x2013;30 days). A total of 114 patients (0.4&#x0025;) died within 28 days after surgery.</p>
<table-wrap id="T1" position="float"><label>Table&#x00A0;1</label>
<caption><p>Baseline characteristics of patients undergoing gastrectomy for gastric cancer.</p></caption>
<table>
<thead>
<tr>
<th valign="top" align="left" rowspan="2">Factor</th>
<th valign="top" align="left" rowspan="2">Value</th>
<th valign="top" align="center"><italic>N</italic>&#x2009;&#x003D;&#x2009;26,097</th>
<th valign="top" align="center" rowspan="2">&#x0025;</th>
</tr>
<tr>
<th valign="top" align="center">Number of cases</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Gender, <italic>n</italic> (&#x0025;)</td>
<td valign="top" align="left">Male</td>
<td valign="top" align="center">17,864</td>
<td valign="top" align="center">68.5</td>
</tr>
<tr>
<td valign="top" align="left"/>
<td valign="top" align="left">Female</td>
<td valign="top" align="center">8,233</td>
<td valign="top" align="center">31.5</td>
</tr>
<tr>
<td valign="top" align="left">Age (years)</td>
<td valign="top" align="left">median (range)</td>
<td valign="top" align="center">72.0 (19.0&#x2013;98.0)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">Height (cm)</td>
<td valign="top" align="left">median (range)</td>
<td valign="top" align="center">162.0 (105.0&#x2013;193.0)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">Weight (kg)</td>
<td valign="top" align="left">median (range)</td>
<td valign="top" align="center">58.2 (30.1&#x2013;129.5)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">BMI</td>
<td valign="top" align="left">median (range)</td>
<td valign="top" align="center">22.4 (11.1&#x2013;42.3)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">Length of hospital stay (day)</td>
<td valign="top" align="left">median (range)</td>
<td valign="top" align="center">13.0 (2.0&#x2013;290.0)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">Length of hospital stay after surgery (day)</td>
<td valign="top" align="left">median (range)</td>
<td valign="top" align="center">11.0 (0.0&#x2013;30.0)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left" rowspan="7">UICC T, <italic>n</italic> (&#x0025;)</td>
<td valign="top" align="left">0</td>
<td valign="top" align="center">25</td>
<td valign="top" align="center">&#x003C;0.1</td>
</tr>
<tr>
<td valign="top" align="left">is</td>
<td valign="top" align="center">30</td>
<td valign="top" align="center">0.1</td>
</tr>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="center">10,648</td>
<td valign="top" align="center">40.8</td>
</tr>
<tr>
<td valign="top" align="left">2</td>
<td valign="top" align="center">3,851</td>
<td valign="top" align="center">15.2</td>
</tr>
<tr>
<td valign="top" align="left">3</td>
<td valign="top" align="center">5,043</td>
<td valign="top" align="center">20</td>
</tr>
<tr>
<td valign="top" align="left">4</td>
<td valign="top" align="center">4,659</td>
<td valign="top" align="center">18.4</td>
</tr>
<tr>
<td valign="top" align="left">X</td>
<td valign="top" align="center">1,018</td>
<td valign="top" align="center">4</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="6">UICC N, <italic>n</italic> (&#x0025;)</td>
<td valign="top" align="left">0</td>
<td valign="top" align="center">15,890</td>
<td valign="top" align="center">62.9</td>
</tr>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="center">3,768</td>
<td valign="top" align="center">14.9</td>
</tr>
<tr>
<td valign="top" align="left">2</td>
<td valign="top" align="center">2,613</td>
<td valign="top" align="center">10.3</td>
</tr>
<tr>
<td valign="top" align="left">3</td>
<td valign="top" align="center">1,906</td>
<td valign="top" align="center">7.5</td>
</tr>
<tr>
<td valign="top" align="left">4</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">&#x003C;0.1</td>
</tr>
<tr>
<td valign="top" align="left">X</td>
<td valign="top" align="center">1,099</td>
<td valign="top" align="center">4.3</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="3">UICC M, <italic>n</italic> (&#x0025;)</td>
<td valign="top" align="left">0</td>
<td valign="top" align="center">22,994</td>
<td valign="top" align="center">91</td>
</tr>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="center">1,177</td>
<td valign="top" align="center">4.7</td>
</tr>
<tr>
<td valign="top" align="left">X</td>
<td valign="top" align="center">1,109</td>
<td valign="top" align="center">4.4</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="3">Number of hospital beds, <italic>n</italic> (&#x0025;)</td>
<td valign="top" align="left">199&#x2265;</td>
<td valign="top" align="center">916</td>
<td valign="top" align="center">3.5</td>
</tr>
<tr>
<td valign="top" align="left">200&#x2264;, 499&#x2265;</td>
<td valign="top" align="center">13,660</td>
<td valign="top" align="center">52.3</td>
</tr>
<tr>
<td valign="top" align="left">500&#x2264;</td>
<td valign="top" align="center">11,521</td>
<td valign="top" align="center">44.1</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="2">Designated cancer hospitals, <italic>n</italic> (&#x0025;)</td>
<td valign="top" align="left">&#x002B;</td>
<td valign="top" align="center">21,222</td>
<td valign="top" align="center">81.3</td>
</tr>
<tr>
<td valign="top" align="left">-</td>
<td valign="top" align="center">4,875</td>
<td valign="top" align="center">18.7</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="2">Death within 28 days after surgery, <italic>n</italic> (&#x0025;)</td>
<td valign="top" align="left">&#x002B;</td>
<td valign="top" align="center">114</td>
<td valign="top" align="center">0.4</td>
</tr>
<tr>
<td valign="top" align="left">-</td>
<td valign="top" align="center">25,984</td>
<td valign="top" align="center">99.6</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Regarding the type of surgery, laparoscopic distal gastrectomy was the most common procedure, performed in 11,509 patients (44.1&#x0025;), followed by open distal gastrectomy in 6,946 patients (26.6&#x0025;) (<xref ref-type="table" rid="T2">Table&#x00A0;2</xref>).</p>
<table-wrap id="T2" position="float"><label>Table&#x00A0;2</label>
<caption><p>Distribution of surgical procedures in the study cohort.</p></caption>
<table>
<thead>
<tr>
<th valign="top" align="left" rowspan="2">Surgical codes in Japan</th>
<th valign="top" align="left" rowspan="2">Surgical procedure</th>
<th valign="top" align="center"><italic>N</italic>&#x2009;&#x003D;&#x2009;26,097</th>
<th valign="top" align="center" rowspan="2">&#x0025;</th>
</tr>
<tr>
<th valign="top" align="center">Number of cases</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">K6552</td>
<td valign="top" align="left">Open distal gastrectomy</td>
<td valign="top" align="center">6,946</td>
<td valign="top" align="center">26.6</td>
</tr>
<tr>
<td valign="top" align="left">K655-22</td>
<td valign="top" align="left">Laparoscopic distal gastrectomy</td>
<td valign="top" align="center">11,509</td>
<td valign="top" align="center">44.1</td>
</tr>
<tr>
<td valign="top" align="left">K655-42</td>
<td valign="top" align="left">Open proximal gastrectomy</td>
<td valign="top" align="center">417</td>
<td valign="top" align="center">1.6</td>
</tr>
<tr>
<td valign="top" align="left">K655-52</td>
<td valign="top" align="left">Laparoscopic proximal gastrectomy</td>
<td valign="top" align="center">987</td>
<td valign="top" align="center">3.9</td>
</tr>
<tr>
<td valign="top" align="left">K6572</td>
<td valign="top" align="left">Open total gastrectomy</td>
<td valign="top" align="center">4,436</td>
<td valign="top" align="center">17.0</td>
</tr>
<tr>
<td valign="top" align="left">K657-22</td>
<td valign="top" align="left">Laparoscopic total gastrectomy</td>
<td valign="top" align="center">1,802</td>
<td valign="top" align="center">6.9</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3b"><label>3.2</label><title>Feature importance and model performance (RMSE and MAE)</title>
<p>The relative importance of each predictive feature was assessed using the feature importance metrics provided by the XGBoost model. The most important predictors included laparoscopic distal gastrectomy, designation cancer hospital, presence of peritoneal metastasis, open total gastrectomy, hospital size, and the Activities of Daily Living (ADL) score at admission (<xref ref-type="fig" rid="F2">Figure&#x00A0;2</xref>). The model achieved a RMSE of 3.74 (standard error 0.03) and a MAE of 2.82 (standard error 0.01), indicating that postoperative LOS could be predicted with an error of approximately 2&#x2013;4 days (<xref ref-type="table" rid="T3">Table&#x00A0;3</xref>).</p>
<fig id="F2" position="float"><label>Figure&#x00A0;2</label>
<caption><p>Top features contributing to postoperative length of stay predicted by XGBoost. Features such as laparoscopic distal gastrectomy, designation as a cancer hospital, presence of peritoneal dissemination, open total gastrectomy, hospital size, and the ADL (Activities of Daily Living) score at admission had the greatest impact on predicted length of stay.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fmedt-07-1732580-g002.tif"><alt-text content-type="machine-generated">Bar chart displaying the relative frequencies of various medical conditions and hospital factors. \"Laparoscopic distal gastrectomy\" has the highest value, followed by \"designated cancer hospital\" and \"peritoneal dissemination.\" Error bars are shown for each category.</alt-text>
</graphic>
</fig>
<table-wrap id="T3" position="float"><label>Table&#x00A0;3</label>
<caption><p>Performance metrics of the machine learning model for predicting postoperative length of stay.</p></caption>
<table>
<thead>
<tr>
<th valign="top" align="left">Metric</th>
<th valign="top" align="center">Mean</th>
<th valign="top" align="center">SD</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">RMSE</td>
<td valign="top" align="center">3.74</td>
<td valign="top" align="center">0.03</td>
</tr>
<tr>
<td valign="top" align="left">MAE</td>
<td valign="top" align="center">2.82</td>
<td valign="top" align="center">0.01</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3c"><label>3.3</label><title>SHAP-based interpretation of predictive features</title>
<p><xref ref-type="fig" rid="F3">Figure&#x00A0;3</xref> shows the SHAP comprehensive plot generated from the model. In this plot, explanatory variables contributing most to the model&#x0027;s prediction are ranked from top to bottom. Each row corresponds to a variable, and each dot represents a single patient. Red dots indicate higher values of the corresponding feature, while blue dots indicate lower values. The horizontal axis shows the SHAP value, which corresponds to the log-odds in logistic regression. Red dots on the left side suggest that higher feature values are associated with shorter postoperative LOS, whereas blue dots on the left indicate that lower values are associated with longer LOS.</p>
<fig id="F3" position="float"><label>Figure&#x00A0;3</label>
<caption><p>SHAP summary plot for feature interpretability. SHAP (SHapley Additive exPlanations) values visualize the influence of each feature on the model&#x0027;s output across all patients. Each dot represents an individual patient. Red dots indicate higher feature values, while blue dots indicate lower values. Features are ranked by overall impact. Laparoscopic surgery and large hospital size were associated with shorter length of stay, while open total gastrectomy contributed to prolonged hospitalization.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fmedt-07-1732580-g003.tif"><alt-text content-type="machine-generated">SHAP summary plot illustrating the impact on model output for various medical and health features. Features are listed on the left, with corresponding SHAP values along the x-axis indicating impact from low to high. Colors range from red (high) to blue (low), showing contribution strength.</alt-text>
</graphic>
</fig>
<p>Laparoscopic distal gastrectomy was associated with shorter postoperative LOS, while open total gastrectomy was associated with longer stays. Undergoing surgery at designation cancer hospital, or larger hospitals size to shorten LOS. Interestingly, the presence of peritoneal metastasis also showed a trend toward shorter LOS, while the impact of ADL score at admission was inconclusive.</p>
</sec>
</sec>
<sec id="s4" sec-type="discussion"><label>4</label><title>Discussion</title>
<p>Accurately predicting LOS is essential for hospital bed management and staffing decisions (<xref ref-type="bibr" rid="B15">15</xref>). However, LOS is influenced by numerous factors, making it difficult to forecast with high precision. machine learning enables computers to uncover patterns in data without explicitly programmed rules (<xref ref-type="bibr" rid="B16">16</xref>). Given the data-driven nature of machine learning, which does not require assumptions, we hypothesized that it could help predict LOS in hospitalized patients. Using Japan&#x0027;s DPC database and machine learning techniques, we developed a model capable of predicting postoperative LOS after gastric cancer surgery with an error margin of approximately 2&#x2013;4 days based on RMSE and MAE.</p>
<p>In recent years, an increasing number of studies have developed clinical prediction models using machine learning. Pera et al. developed a 90-day postoperative mortality prediction model for gastric cancer using a development cohort of 3,182 cases and a validation cohort of 266 cases, achieving an Area Under the Curve (AUC) of 0.844. They identified six major contributing variables: age, hospital size, preoperative albumin and hemoglobin levels, type of gastrectomy, and a history of chronic obstructive pulmonary disease (<xref ref-type="bibr" rid="B17">17</xref>). Similarly, Zhang et al. analyzed 1,481 cases of thoracoscopic surgery for lung cancer and developed a model to predict postoperative LOS, reporting an AUC of 0.72&#x2013;0.80. Key predictors included operation time, age, and serum creatinine (<xref ref-type="bibr" rid="B18">18</xref>). Additionally, Lu et al. predicted postoperative complications (<xref ref-type="bibr" rid="B19">19</xref>), while Jo et al. and Cho et al. predicted length of stay using XGBoost across the broad surgical dataset (<xref ref-type="bibr" rid="B20">20</xref>, <xref ref-type="bibr" rid="B21">21</xref>). However, no studies exist that utilize nationwide administrative data specifically focused on gastrectomy. Shi et al. applied SHAP interpretation to gastrointestinal surgery, but their study targeted a small, single-institution prospective cohort (<xref ref-type="bibr" rid="B22">22</xref>).</p>
<p>In our model for predicting postoperative LOS in gastric cancer, key contributing factors included the type of gastrectomy, designation cancer hospital, hospital size, and the patient&#x0027;s ADL score. Laparoscopic distal gastrectomy emerged as the most influential factor in shortening LOS, consistent with previous analyses using Japan&#x0027;s National Clinical Database (NCD) (<xref ref-type="bibr" rid="B23">23</xref>). Conversely, open total gastrectomy was associated with longer LOS, which is a clinically intuitive finding for surgeons. While the impact of hospital size on postoperative outcomes has been previously debated (<xref ref-type="bibr" rid="B24">24</xref>, <xref ref-type="bibr" rid="B25">25</xref>), our results suggest that hospital size plays a more significant role than patient-related factors in predicting LOS. Similarly, surgeries performed at designation cancer hospital in Japan were associated with shorter LOS. The presence of peritoneal metastasis was identified as a high-impact variable, and SHAP analysis further indicated that it contributed to a shorter postoperative LOS. This finding diverges from typical clinical expectations, highlighting a potential discrepancy between clinical intuition and machine learning&#x2013;based predictions. The finding that peritoneal dissemination was associated with shorter postoperative LOS contradicts typical clinical expectations. One possible explanation is that patients with peritoneal metastasis often undergo diagnostic or palliative rather than curative surgery, leading to earlier discharge or transfer to other facilities postoperatively. Such inconsistencies underscore an important challenge for the future application of machine learning in clinical practice. Importantly, these relationships are correlations based on data, not causal relationships. Since machine learning identifies statistical patterns without prior assumptions, observed relationships should not be interpreted as direct causation.</p>
<p>This study has several limitations. Unlike the prospective randomized dataset analyzed by Shi et al. (<xref ref-type="bibr" rid="B22">22</xref>), our study utilized retrospective administrative data. While our cohort is significantly larger and multi-center, prospective data may offer higher internal validity. It is a retrospective analysis using a claims-based administrative database. This study is based on secondary use of Japan&#x0027;s DPC administrative claims data, which lacks detailed clinical information such as tumor staging, postoperative complication classification, and preoperative laboratory values. Additionally, variability in coding practices and differences in data entry accuracy across institutions may have influenced the performance of the machine learning model. Nonetheless, the XGBoost algorithm is robust to such noise and missing data. During training, the algorithm learns the optimal split direction for missing values, allowing it to handle incomplete data without requiring imputation or feature removal (<xref ref-type="bibr" rid="B26">26</xref>). Furthermore, the study period overlaps with the COVID-19 pandemic, which influenced discharge policies. External or temporal validation is needed to assess generalizability.</p>
<p>A major strength of this study is that, to our knowledge, it is the first to develop a machine learning-based model using Japan&#x0027;s DPC data to predict postoperative LOS following gastric cancer surgery. Furthermore, the model achieved a high degree of accuracy, with an error margin of only 2&#x2013;4 days. Being able to predict LOS at the time of admission and identify the relative importance and directionality (positive or negative) of contributing factors may facilitate effective perioperative interventions. Despite limitations, the model has potential clinical applications. First, it may aid in bed management by providing early LOS estimates. Second, it may enable earlier multidisciplinary interventions for patients predicted to have prolonged stays. Third, it may support communication with patients and families by offering realistic LOS expectations. These applications require prospective validation and careful integration into clinical workflows.</p>
<p>Future research should include external validation on other datasets, temporal validation across different time periods, and adaptation to international healthcare systems. Integrating additional clinical information, including tumor stage and postoperative complications, will further enhance predictive performance and clinical utility.</p>
</sec>
<sec id="s5" sec-type="conclusions"><label>5</label><title>Conclusions</title>
<p>We developed and interpreted an XGBoost-based model to predict LOS after gastric cancer surgery using Japanese DPC data. This model demonstrated reasonable accuracy and identified both intuitive and non-intuitive predictors. This proof-of-concept study highlighted the feasibility of predictions based on administrative data, while emphasizing the need for external validation, and demonstrating the potential of explainable AI for perioperative decision support in oncology.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability"><title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec id="s7" sec-type="ethics-statement"><title>Ethics statement</title>
<p>The studies involving humans were approved by the Ethics Committee for Saiseikai Research Institute of Health Care and Welfare (Approval No. R03-01-03). The studies were conducted in accordance with the local legislation and institutional requirements. The ethics committee/institutional review board waived the requirement of written informed consent for participation from the participants or the participants&#x0027; legal guardians/next of kin.</p>
</sec>
<sec id="s8" sec-type="author-contributions"><title>Author contributions</title>
<p>TM: Conceptualization, Investigation, Methodology, Visualization, Writing &#x2013; original draft. KI: Conceptualization, Methodology, Writing &#x2013; original draft. HS: Conceptualization, Methodology, Writing &#x2013; original draft. TK: Conceptualization, Methodology, Writing &#x2013; original draft. YA: Validation, Writing &#x2013; review &#x0026; editing. TO: Supervision, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<ack><title>Acknowledgments</title>
<p>We would like to express our sincere gratitude to Associate Professor Yasunori Futamura and Professor Tetsuya Sakurai of the Center for Artificial Intelligence Research, University of Tsukuba, for their invaluable contributions to the machine learning analyses conducted in this study.</p>
</ack>
<sec id="s10" sec-type="COI-statement"><title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s11" sec-type="ai-statement"><title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec id="s12" sec-type="disclaimer"><title>Publisher&#x0027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list><title>References</title>
<ref id="B1"><label>1.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bray</surname> <given-names>F</given-names></name> <name><surname>Laversanne</surname> <given-names>M</given-names></name> <name><surname>Sung</surname> <given-names>H</given-names></name> <name><surname>Ferlay</surname> <given-names>J</given-names></name> <name><surname>Siegel</surname> <given-names>RL</given-names></name> <name><surname>Soerjomataram</surname> <given-names>I</given-names></name><etal/></person-group> <article-title>Global cancer statistics 2022: GLOBOCAN estimates of incidence and mortality worldwide for 36 cancers in 185 countries</article-title>. <source>CA Cancer J Clin</source>. (<year>2024</year>) <volume>74</volume>:<fpage>229</fpage>&#x2013;<lpage>63</lpage>. <pub-id pub-id-type="doi">10.3322/caac.21834</pub-id><pub-id pub-id-type="pmid">38572751</pub-id></mixed-citation></ref>
<ref id="B2"><label>2.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Baiocchi</surname> <given-names>GL</given-names></name> <name><surname>Giacopuzzi</surname> <given-names>S</given-names></name> <name><surname>Reim</surname> <given-names>D</given-names></name> <name><surname>Piessen</surname> <given-names>G</given-names></name> <name><surname>Costa</surname> <given-names>PMD</given-names></name> <name><surname>Reynolds</surname> <given-names>JV</given-names></name><etal/></person-group> <article-title>Incidence and grading of complications after gastrectomy for cancer using the GASTRODATA registry: a European retrospective observational study</article-title>. <source>Ann Surg</source>. (<year>2020</year>) <volume>272</volume>:<fpage>807</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1097/sla.0000000000004341</pub-id><pub-id pub-id-type="pmid">32925254</pub-id></mixed-citation></ref>
<ref id="B3"><label>3.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Voeten</surname> <given-names>DM</given-names></name> <name><surname>Busweiler</surname> <given-names>LAD</given-names></name> <name><surname>van der Werf</surname> <given-names>LR</given-names></name> <name><surname>Wijnhoven</surname> <given-names>BPL</given-names></name> <name><surname>Verhoeven</surname> <given-names>RHA</given-names></name> <name><surname>van Sandick</surname> <given-names>JW</given-names></name><etal/></person-group> <article-title>Outcomes of esophagogastric cancer surgery during eight years of surgical auditing by the Dutch upper gastrointestinal cancer audit (DUCA)</article-title>. <source>Ann Surg</source>. (<year>2021</year>) <volume>274</volume>:<fpage>866</fpage>&#x2013;<lpage>73</lpage>. <pub-id pub-id-type="doi">10.1097/sla.0000000000005116</pub-id><pub-id pub-id-type="pmid">34334633</pub-id></mixed-citation></ref>
<ref id="B4"><label>4.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hayashida</surname> <given-names>K</given-names></name> <name><surname>Murakami</surname> <given-names>G</given-names></name> <name><surname>Matsuda</surname> <given-names>S</given-names></name> <name><surname>Fushimi</surname> <given-names>K</given-names></name></person-group>. <article-title>History and profile of diagnosis procedure combination (DPC): development of a real data collection system for acute inpatient care in Japan</article-title>. <source>J Epidemiol</source>. (<year>2021</year>) <volume>31</volume>:<fpage>1</fpage>&#x2013;<lpage>11</lpage>. <pub-id pub-id-type="doi">10.2188/jea.JE20200288</pub-id><pub-id pub-id-type="pmid">33012777</pub-id></mixed-citation></ref>
<ref id="B5"><label>5.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Collins</surname> <given-names>GS</given-names></name> <name><surname>Dhiman</surname> <given-names>P</given-names></name> <name><surname>Navarro</surname> <given-names>A</given-names></name> <name><surname>Ma</surname> <given-names>CL</given-names></name> <name><surname>Hooft</surname> <given-names>J</given-names></name> <name><surname>Reitsma</surname> <given-names>L</given-names></name><etal/></person-group> <article-title>Protocol for development of a reporting guideline (TRIPOD-AI) and risk of bias tool (PROBAST-AI) for diagnostic and prognostic prediction model studies based on artificial intelligence</article-title>. <source>BMJ Open</source>. (<year>2021</year>) <volume>11</volume>:<fpage>e048008</fpage>. <pub-id pub-id-type="doi">10.1136/bmjopen-2020-048008</pub-id><pub-id pub-id-type="pmid">34244270</pub-id></mixed-citation></ref>
<ref id="B6"><label>6.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tajima</surname> <given-names>T</given-names></name> <name><surname>Nagata</surname> <given-names>J</given-names></name> <name><surname>Akiyama</surname> <given-names>Y</given-names></name> <name><surname>Torigoe</surname> <given-names>T</given-names></name> <name><surname>Fujimoto</surname> <given-names>K</given-names></name> <name><surname>Sato</surname> <given-names>N</given-names></name><etal/></person-group> <article-title>Open colectomy vs. laparoscopic colectomy in Japan: a retrospective study using real-world data from the diagnosis procedure combination database</article-title>. <source>Surg Today</source>. (<year>2020</year>) <volume>50</volume>:<fpage>1255</fpage>&#x2013;<lpage>61</lpage>. <pub-id pub-id-type="doi">10.1007/s00595-020-02006-6</pub-id><pub-id pub-id-type="pmid">32335714</pub-id></mixed-citation></ref>
<ref id="B7"><label>7.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Collins</surname> <given-names>GS</given-names></name> <name><surname>Moons</surname> <given-names>KGM</given-names></name> <name><surname>Dhiman</surname> <given-names>P</given-names></name> <name><surname>Riley</surname> <given-names>RD</given-names></name> <name><surname>Beam</surname> <given-names>AL</given-names></name> <name><surname>Van Calster</surname> <given-names>B</given-names></name><etal/></person-group> <article-title>TRIPOD&#x2009;&#x002B;&#x2009;AI statement: updated guidance for reporting clinical prediction models that use regression or machine learning methods</article-title>. <source>Br Med J</source>. (<year>2024</year>) <volume>385</volume>:<fpage>e078378</fpage>. <pub-id pub-id-type="doi">10.1136/bmj-2023-078378</pub-id></mixed-citation></ref>
<ref id="B8"><label>8.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Okamoto</surname> <given-names>K</given-names></name> <name><surname>Uchiyama</surname> <given-names>T</given-names></name> <name><surname>Takemura</surname> <given-names>T</given-names></name> <name><surname>Kume</surname> <given-names>N</given-names></name> <name><surname>Adachi</surname> <given-names>T</given-names></name> <name><surname>Kuroda</surname> <given-names>T</given-names></name><etal/></person-group> <article-title>Qualitative evaluation of the supporting system for diagnosis procedure combination code selection</article-title>. <source>Stud Health Technol Inform</source>. (<year>2013</year>) <volume>192</volume>:<fpage>1031</fpage>.<pub-id pub-id-type="pmid">23920805</pub-id></mixed-citation></ref>
<ref id="B9"><label>9.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hsiao</surname> <given-names>YW</given-names></name> <name><surname>Tao</surname> <given-names>CL</given-names></name> <name><surname>Chuang</surname> <given-names>EY</given-names></name> <name><surname>Lu</surname> <given-names>TP</given-names></name></person-group>. <article-title>A risk prediction model of gene signatures in ovarian cancer through bagging of GA-XGBoost models</article-title>. <source>J Adv Res</source>. (<year>2021</year>) <volume>30</volume>:<fpage>113</fpage>&#x2013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1016/j.jare.2020.11.006</pub-id><pub-id pub-id-type="pmid">34026291</pub-id></mixed-citation></ref>
<ref id="B10"><label>10.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hou</surname> <given-names>N</given-names></name> <name><surname>Li</surname> <given-names>M</given-names></name> <name><surname>He</surname> <given-names>L</given-names></name> <name><surname>Xie</surname> <given-names>B</given-names></name> <name><surname>Wang</surname> <given-names>L</given-names></name> <name><surname>Zhang</surname> <given-names>R</given-names></name><etal/></person-group> <article-title>Predicting 30-days mortality for MIMIC-III patients with sepsis-3: a machine learning approach using XGboost</article-title>. <source>J Transl Med</source>. (<year>2020</year>) <volume>18</volume>:<fpage>462</fpage>. <pub-id pub-id-type="doi">10.1186/s12967-020-02620-5</pub-id><pub-id pub-id-type="pmid">33287854</pub-id></mixed-citation></ref>
<ref id="B11"><label>11.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Z</given-names></name> <name><surname>Wu</surname> <given-names>J</given-names></name> <name><surname>Wang</surname> <given-names>J</given-names></name> <name><surname>Chen</surname> <given-names>Y</given-names></name> <name><surname>Yao</surname> <given-names>R</given-names></name> <name><surname>Zhu</surname> <given-names>L</given-names></name><etal/></person-group> <article-title>Extreme gradient boosting-based explainable machine learning model for predicting significant fibrosis in autoimmune hepatitis</article-title>. <source>QJM</source>. (<year>2025</year>):<fpage>hcaf215</fpage>. <pub-id pub-id-type="doi">10.1093/qjmed/hcaf215</pub-id><pub-id pub-id-type="pmid">40973145</pub-id></mixed-citation></ref>
<ref id="B12"><label>12.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lundberg</surname> <given-names>SM</given-names></name> <name><surname>Lee</surname> <given-names>SI</given-names></name></person-group>. <article-title>A unified approach to interpreting model predictions</article-title>. <source>Adv Neural Inf Process Syst</source>. (<year>2017</year>) <volume>30</volume>:<fpage>4768</fpage>&#x2013;<lpage>77</lpage>.</mixed-citation></ref>
<ref id="B13"><label>13.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lundberg</surname> <given-names>SM</given-names></name> <name><surname>Nair</surname> <given-names>B</given-names></name> <name><surname>Vavilala</surname> <given-names>MS</given-names></name> <name><surname>Horibe</surname> <given-names>M</given-names></name> <name><surname>Eisses</surname> <given-names>MJ</given-names></name> <name><surname>Adams</surname> <given-names>T</given-names></name><etal/></person-group> <article-title>Explainable machine-learning predictions for the prevention of hypoxaemia during surgery</article-title>. <source>Nat Biomed Eng</source>. (<year>2018</year>) <volume>2</volume>:<fpage>749</fpage>&#x2013;<lpage>60</lpage>. <pub-id pub-id-type="doi">10.1038/s41551-018-0304-0</pub-id><pub-id pub-id-type="pmid">31001455</pub-id></mixed-citation></ref>
<ref id="B14"><label>14.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ponce-Bobadilla</surname> <given-names>AV</given-names></name> <name><surname>Schmitt</surname> <given-names>V</given-names></name> <name><surname>Maier</surname> <given-names>CS</given-names></name> <name><surname>Mensing</surname> <given-names>S</given-names></name> <name><surname>Stodtmann</surname> <given-names>S</given-names></name></person-group>. <article-title>Practical guide to SHAP analysis: explaining supervised machine learning model predictions in drug development</article-title>. <source>Clin Transl Sci</source>. (<year>2024</year>) <volume>17</volume>:<fpage>e70056</fpage>. <pub-id pub-id-type="doi">10.1111/cts.70056</pub-id><pub-id pub-id-type="pmid">39463176</pub-id></mixed-citation></ref>
<ref id="B15"><label>15.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Robinson</surname> <given-names>GH</given-names></name> <name><surname>Davis</surname> <given-names>LE</given-names></name> <name><surname>Leifer</surname> <given-names>RP</given-names></name></person-group>. <article-title>Prediction of hospital length of stay</article-title>. <source>Health Serv Res</source>. (<year>1966</year>) <volume>1</volume>:<fpage>287</fpage>&#x2013;<lpage>300</lpage>.<pub-id pub-id-type="pmid">5971638</pub-id></mixed-citation></ref>
<ref id="B16"><label>16.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Handelman</surname> <given-names>GS</given-names></name> <name><surname>Kok</surname> <given-names>HK</given-names></name> <name><surname>Chandra</surname> <given-names>RV</given-names></name> <name><surname>Razavi</surname> <given-names>AH</given-names></name> <name><surname>Lee</surname> <given-names>MJ</given-names></name> <name><surname>Asadi</surname> <given-names>H</given-names></name></person-group>. <article-title>Edoctor: machine learning and the future of medicine</article-title>. <source>J Intern Med</source>. (<year>2018</year>) <volume>284</volume>:<fpage>603</fpage>&#x2013;<lpage>19</lpage>. <pub-id pub-id-type="doi">10.1111/joim.12822</pub-id><pub-id pub-id-type="pmid">30102808</pub-id></mixed-citation></ref>
<ref id="B17"><label>17.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pera</surname> <given-names>M</given-names></name> <name><surname>Gibert</surname> <given-names>J</given-names></name> <name><surname>Gimeno</surname> <given-names>M</given-names></name> <name><surname>Garsot</surname> <given-names>E</given-names></name> <name><surname>Eizaguirre</surname> <given-names>E</given-names></name> <name><surname>Mir&#x00F3;</surname> <given-names>M</given-names></name><etal/></person-group> <article-title>Machine learning risk prediction model of 90-day mortality after gastrectomy for cancer</article-title>. <source>Ann Surg</source>. (<year>2022</year>) <volume>276</volume>:<fpage>776</fpage>&#x2013;<lpage>83</lpage>. <pub-id pub-id-type="doi">10.1097/sla.0000000000005616</pub-id><pub-id pub-id-type="pmid">35866643</pub-id></mixed-citation></ref>
<ref id="B18"><label>18.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>G</given-names></name> <name><surname>Liu</surname> <given-names>X</given-names></name> <name><surname>Hu</surname> <given-names>Y</given-names></name> <name><surname>Luo</surname> <given-names>Q</given-names></name> <name><surname>Ruan</surname> <given-names>L</given-names></name> <name><surname>Xie</surname> <given-names>H</given-names></name><etal/></person-group> <article-title>Development and comparison of machine-learning models for predicting prolonged postoperative length of stay in lung cancer patients following video-assisted thoracoscopic surgery</article-title>. <source>Asia Pac J Oncol Nurs</source>. (<year>2024</year>) <volume>11</volume>:<fpage>100493</fpage>. <pub-id pub-id-type="doi">10.1016/j.apjon.2024.100493</pub-id><pub-id pub-id-type="pmid">38808011</pub-id></mixed-citation></ref>
<ref id="B19"><label>19.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lu</surname> <given-names>S</given-names></name> <name><surname>Yan</surname> <given-names>M</given-names></name> <name><surname>Li</surname> <given-names>C</given-names></name> <name><surname>Yan</surname> <given-names>C</given-names></name> <name><surname>Zhu</surname> <given-names>Z</given-names></name> <name><surname>Lu</surname> <given-names>W</given-names></name></person-group>. <article-title>Machine-learning-assisted prediction of surgical outcomes in patients undergoing gastrectomy</article-title>. <source>Chin J Cancer Res</source>. (<year>2019</year>) <volume>31</volume>:<fpage>797</fpage>&#x2013;<lpage>805</lpage>. <pub-id pub-id-type="doi">10.21147/j.issn.1000-9604.2019.05.09</pub-id><pub-id pub-id-type="pmid">31814683</pub-id></mixed-citation></ref>
<ref id="B20"><label>20.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jo</surname> <given-names>YY</given-names></name> <name><surname>Han</surname> <given-names>J</given-names></name> <name><surname>Park</surname> <given-names>HW</given-names></name> <name><surname>Jung</surname> <given-names>H</given-names></name> <name><surname>Lee</surname> <given-names>JD</given-names></name> <name><surname>Jung</surname> <given-names>J</given-names></name><etal/></person-group> <article-title>Prediction of prolonged length of hospital stay after cancer surgery using machine learning on electronic health records: retrospective cross-sectional study</article-title>. <source>JMIR Med Inform</source>. (<year>2021</year>) <volume>9</volume>:<fpage>e23147</fpage>. <pub-id pub-id-type="doi">10.2196/23147</pub-id><pub-id pub-id-type="pmid">33616544</pub-id></mixed-citation></ref>
<ref id="B21"><label>21.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cho</surname> <given-names>HN</given-names></name> <name><surname>Ahn</surname> <given-names>I</given-names></name> <name><surname>Gwon</surname> <given-names>H</given-names></name> <name><surname>Kang</surname> <given-names>HJ</given-names></name> <name><surname>Kim</surname> <given-names>Y</given-names></name> <name><surname>Seo</surname> <given-names>H</given-names></name><etal/></person-group> <article-title>Explainable predictions of a machine learning model to forecast the postoperative length of stay for severe patients: machine learning model development and evaluation</article-title>. <source>BMC Med Inform Decis Mak</source>. (<year>2024</year>) <volume>24</volume>:<fpage>350</fpage>. <pub-id pub-id-type="doi">10.1186/s12911-024-02755-1</pub-id><pub-id pub-id-type="pmid">39563368</pub-id></mixed-citation></ref>
<ref id="B22"><label>22.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shi</surname> <given-names>J</given-names></name> <name><surname>Huang</surname> <given-names>Y</given-names></name> <name><surname>Han</surname> <given-names>J</given-names></name> <name><surname>Zhang</surname> <given-names>S</given-names></name> <name><surname>Cheng</surname> <given-names>X</given-names></name> <name><surname>Luo</surname> <given-names>T</given-names></name></person-group>. <article-title>Development and validation of a machine learning-based prediction model for prolonged length of stay after laparoscopic gastrointestinal surgery: a secondary analysis of the FDP-PONV trial</article-title>. <source>BMC Gastroenterol</source>. (<year>2025</year>) <volume>25</volume>:<fpage>703</fpage>. <pub-id pub-id-type="doi">10.1186/s12876-025-04330-y</pub-id><pub-id pub-id-type="pmid">41062989</pub-id></mixed-citation></ref>
<ref id="B23"><label>23.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yoshida</surname> <given-names>K</given-names></name> <name><surname>Honda</surname> <given-names>M</given-names></name> <name><surname>Kumamaru</surname> <given-names>H</given-names></name> <name><surname>Kodera</surname> <given-names>Y</given-names></name> <name><surname>Kakeji</surname> <given-names>Y</given-names></name> <name><surname>Hiki</surname> <given-names>N</given-names></name><etal/></person-group> <article-title>Surgical outcomes of laparoscopic distal gastrectomy compared to open distal gastrectomy: a retrospective cohort study based on a nationwide registry database in Japan</article-title>. <source>Ann Gastroenterol Surg</source>. (<year>2018</year>) <volume>2</volume>:<fpage>55</fpage>&#x2013;<lpage>64</lpage>. <pub-id pub-id-type="doi">10.1002/ags3.12054</pub-id><pub-id pub-id-type="pmid">29863131</pub-id></mixed-citation></ref>
<ref id="B24"><label>24.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname> <given-names>EY</given-names></name> <name><surname>Song</surname> <given-names>KY</given-names></name> <name><surname>Lee</surname> <given-names>J</given-names></name></person-group>. <article-title>Does hospital volume really affect the surgical and oncological outcomes of gastric cancer in Korea?</article-title> <source>J Gastric Cancer</source>. (<year>2017</year>) <volume>17</volume>:<fpage>246</fpage>&#x2013;<lpage>54</lpage>. <pub-id pub-id-type="doi">10.5230/jgc.2017.17.e31</pub-id><pub-id pub-id-type="pmid">28970955</pub-id></mixed-citation></ref>
<ref id="B25"><label>25.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Iwatsuki</surname> <given-names>M</given-names></name> <name><surname>Yamamoto</surname> <given-names>H</given-names></name> <name><surname>Miyata</surname> <given-names>H</given-names></name> <name><surname>Kakeji</surname> <given-names>Y</given-names></name> <name><surname>Yoshida</surname> <given-names>K</given-names></name> <name><surname>Konno</surname> <given-names>H</given-names></name><etal/></person-group> <article-title>Association of surgeon and hospital volume with postoperative mortality after total gastrectomy for gastric cancer: data from 71,307 Japanese patients collected from a nationwide web-based data entry system</article-title>. <source>Gastric Cancer</source>. (<year>2021</year>) <volume>24</volume>:<fpage>526</fpage>&#x2013;<lpage>34</lpage>. <pub-id pub-id-type="doi">10.1007/s10120-020-01127-8</pub-id><pub-id pub-id-type="pmid">33037492</pub-id></mixed-citation></ref>
<ref id="B26"><label>26.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>McCaffrey</surname> <given-names>DF</given-names></name> <name><surname>Ridgeway</surname> <given-names>G</given-names></name> <name><surname>Morral</surname> <given-names>AR</given-names></name></person-group>. <article-title>Propensity score estimation with boosted regression for evaluating causal effects in observational studies</article-title>. <source>Psychol Methods</source>. (<year>2004</year>) <volume>9</volume>:<fpage>403</fpage>&#x2013;<lpage>25</lpage>. <pub-id pub-id-type="doi">10.1037/1082-989x.9.4.403</pub-id><pub-id pub-id-type="pmid">15598095</pub-id></mixed-citation></ref></ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by"><p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/996909/overview">Kunal Pal</ext-link>, National Institute of Technology Rourkela, India</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by"><p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2142881/overview">Yeganeh Pasebani</ext-link>, Johns Hopkins University, United States</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3282933/overview">D. Lakshmi</ext-link>, VIT Bhopal University, India</p></fn>
<fn fn-type="abbr" id="abbrev1"><label>Abbreviations:</label><p>DPC, diagnosis procedure combination; AI, artificial intelligence; LOS, length of stay; TRIPOD, transparent reporting of a multivariable prediction model for Individual prognosis or diagnosis; ICD-10, international classification of diseases, 10th revision; XGBoost, eXtreme gradient boosting; RMSE, root mean squared error; MAE, mean absolute error; SHAP, SHapley additive exPlanations; BMI, body mass index; ADL, activities of daily living; AUC, area under the curve; NCD, national clinical database.</p></fn>
</fn-group>
</back>
</article>