<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Oncol.</journal-id>
<journal-title>Frontiers in Oncology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Oncol.</abbrev-journal-title>
<issn pub-type="epub">2234-943X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fonc.2023.1092478</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Oncology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Development and validation of machine learning models to predict survival of patients with resected stage-III NSCLC</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Jin</surname>
<given-names>Long</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2082645"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhao</surname>
<given-names>Qifan</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1990976"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Fu</surname>
<given-names>Shenbo</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2082656"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Cao</surname>
<given-names>Fei</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2082662"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Hou</surname>
<given-names>Bin</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2082665"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Ma</surname>
<given-names>Jia</given-names>
</name>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2082669"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of Radiation Oncology, Shaanxi Provincial People&#x2019;s Hospital</institution>, <addr-line>Xi&#x2019;an</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>School of Material Science &amp; Engineering, Huazhong University of Science and Technology</institution>, <addr-line>Wuhan</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Department of Radiation Oncology, Shaanxi Provincial Cancer Hospital</institution>, <addr-line>Xi&#x2019;an</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Department of Oncology, Shaanxi Provincial People&#x2019;s Hospital</institution>, <addr-line>Xi&#x2019;an</addr-line>, <country>China</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Department of Thoracic Surgery, Shaanxi Provincial People&#x2019;s Hospital</institution>, <addr-line>Xi&#x2019;an</addr-line>, <country>China</country>
</aff>
<aff id="aff6">
<sup>6</sup>
<institution>Shaanxi Provincial People&#x2019;s Hospital</institution>, <addr-line>Xi&#x2019;an</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Lal Hussain, University of Azad Jammu and Kashmir, Pakistan</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Peng-Chan Lin, National Cheng Kung University, Taiwan; Adeel Ahmed Abbasi, Central South University, China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Jia Ma, <email xlink:href="mailto:majia1110@sina.com">majia1110@sina.com</email>
</p>
</fn>
<fn fn-type="equal" id="fn003">
<p>&#x2020;These authors have contributed equally to this work</p>
</fn>
<fn fn-type="other" id="fn002">
<p>This article was submitted to Thoracic Oncology, a section of the journal Frontiers in Oncology</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>13</day>
<month>03</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>13</volume>
<elocation-id>1092478</elocation-id>
<history>
<date date-type="received">
<day>08</day>
<month>11</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>13</day>
<month>02</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Jin, Zhao, Fu, Cao, Hou and Ma</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Jin, Zhao, Fu, Cao, Hou and Ma</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec>
<title>Objective</title>
<p>To compare the performance of three machine learning algorithms with the tumor, node, and metastasis (TNM) staging system in survival prediction and validate the individual adjuvant treatment recommendations plan based on the optimal model.</p>
</sec>
<sec>
<title>Methods</title>
<p>In this study, we trained three machine learning madel and validated 3 machine learning survival models-deep learning neural network, random forest and cox proportional hazard model- using the data of patients with stage-al3 NSCLC patients who received resection surgery from the National Cancer Institute Surveillance, Epidemiology, and End Results (SEER) database from 2012 to 2017,the performance of survival predication from all machine learning models were assessed using a concordance index (c-index) and the averaged c-index is utilized for cross-validation. The optimal model was externally validated in an independent cohort from Shaanxi Provincial People&#x2019;s Hospital. Then we compare the performance of the optimal model and TNM staging system. Finally, we developed a Cloud-based recommendation system for adjuvant therapy to visualize survival curve of each treatment plan and deployed on the internet.</p>
</sec>
<sec>
<title>Results</title>
<p>A total of 4617 patients were included in this study. The deep learning network performed more stably and accurately in predicting stage-iii NSCLC resected patients survival than the random survival forest and Cox proportional hazard model on the internal test dataset (C-index=0.834 vs. 0.678 vs. 0.640) and better than TNM staging system (C-index=0.820 vs. 0.650) in the external validation. The individual patient who follow the reference from recommendation system had superior survival compared to those who did not. The predicted 5-year-survival curve for each adjuvant treatment plan could be accessed in the recommender system <italic>via</italic> the browser.</p>
</sec>
<sec>
<title>Conclusion</title>
<p>Deep learning model has several advantages over linear model and random forest model in prognostic predication and treatment recommendations. This novel analytical approach may provide accurate predication on individual survival and treatment recommendations for resected Stage-iii NSCLC patients.</p>
</sec>
</abstract>
<kwd-group>
<kwd>non-small cell lung cancer (NSCLC)</kwd>
<kwd>stage-III</kwd>
<kwd>machine learning</kwd>
<kwd>survival predication</kwd>
<kwd>treatment recommendation</kwd>
<kwd>adjuvant therapy</kwd>
</kwd-group>
<counts>
<fig-count count="6"/>
<table-count count="2"/>
<equation-count count="1"/>
<ref-count count="34"/>
<page-count count="11"/>
<word-count count="4979"/>
</counts>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Stage-iii non-small cell lung cancer (NSCLC) accounts for about 1/4 to 1/3 of total lung cancer and is a very heterogeneous disease with a discouraging clinical prognosis, the 5-year survival rate of NSCLC is only 15%-40% (<xref ref-type="bibr" rid="B1">1</xref>). For operable stage-iii lung cancer patients, surgery-based comprehensive treatment is recommended. However, even after radical tumor resection, there is still a high risk of recurrence and metastasis, so adjuvant therapy after surgery is required to improve long-term survival probability. Postoperative adjuvant therapy mainly includes adjuvant chemotherapy, radiotherapy and targeted therapy. Among them, adjuvant targeting is mainly aimed at the EGFR-amplified non-small cell lung cancer patients. Targeted therapy can improve its prognosis, but the proportion of this population is relatively low, only 9% of the total non-small cell lung cancer patients (<xref ref-type="bibr" rid="B2">2</xref>). For the vast majority of patients with EGFR-negative stage-iii lung cancer, studies have shown that postoperative chemotherapy (POCT) can improve the 5-year survival rate by 5% (<xref ref-type="bibr" rid="B3">3</xref>). Other researches confirm that the value of postoperative radiotherapy for high-risk subgroups (<xref ref-type="bibr" rid="B4">4</xref>&#x2013;<xref ref-type="bibr" rid="B6">6</xref>), While the results of the meta-analysis in 1998 determines that postoperative adjuvant radiotherapy is not recommended for patients with stage I-IIIB (N0-N1) (<xref ref-type="bibr" rid="B7">7</xref>). In addition, the 2020 Lung ART study suggests that adjuvant radiotherapy is not recommended for patients with N2 after lung cancer surgery (<xref ref-type="bibr" rid="B8">8</xref>). Therefore, whether postoperative radiotherapy has a beneficial effect on overall survival (OS) is controversial. In the current clinical practice, the formulation and implementation of adjuvant chemotherapy and radiotherapy treatment plans are mainly based on the TNM staging system. Therefore, there are two drawbacks. The first defect is that only three clinical indicators of patients T, N, and M are considered to guide the clinical treatment of patients while ignoring other important characteristics of patients such as physiological characteristics (age, gender) and Other important clinical characteristics (surgical method, primary tumor location, tumor grade, number of positive lymph nodes (LNs), number of LNs examined, and adjuvant therapy methods). Secondly, the TNM staging system is used for risk stratification of the population, and cannot work as a tool to provide prognosis prediction for individual patients. Therefore, it cannot meet the need to improve patient prognosis. Today, with today&#x2019;s increasingly perfect electronic medical record system, deep learning has been widely used in the medical field to predict the survival rate of cancer patients, which performs better than the traditional cox regression method (<xref ref-type="bibr" rid="B9">9</xref>&#x2013;<xref ref-type="bibr" rid="B17">17</xref>). In this experiment, we trained a deep learning model based on a large amount of clinical data and developed a patient-oriented assistant utilizing this model. A recommendation system for radiotherapy and chemotherapy can be accessed through the Internet to provide patients with reference opinions for postoperative radiotherapy and chemotherapy regimens <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Diagram of the training and recommendation procedure.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-13-1092478-g001.tif"/>
</fig>
</sec>
<sec id="s2">
<label>2</label>
<title>Method</title>
<sec id="s2_1">
<label>2.1</label>
<title>Eligibility criteria and patient information</title>
<p>Regarding the training cohort, We selected 4517 medical cases from Database: Incidence - SEER Research Plus Data, 18 Registries, Nov 2019 Sub (2000&#x2013;2017) - Linked To County Attributes - Total U.S., 1969-2018 Counties, National Cancer Institute,DCCPS, Surveillance Research Program, released April 2021, based on the November 2019 submission. We included Data records if they meet the criterion (1), patients pathologically diagnosed between January 2012 and December 2017 with primary stage-teriii non-small cell lung cancer (NSCLC) and (2) the existence of one malignant lesion. On the contrary, We excluded clinical cases according to the standard (1), patients whose regional lymph nodes performed during the initial work-up or first course of therapy are unknown or missing. Then we choose the features relevant to the OS (overall survival) of the NSCLC, including demographic information (Age and Sex) and NSCLC-cancer-related characteristics (TNM stage, histology type, primary site, tumor size, regional node number examined, regional node positive number and laterality of the tumor), and treatment details(surgery of primary site, radiation, and chemotherapy), The outcome is the patient survival time and death indicator. As for the cohort for external validation of the model, the inclusion criteria and exclusion criteria are consistent with the training group, So we randomly collected 100 stage-iii non-small cell lung cancer patients who underwent surgery (Lobectomy WITH mediastinal lymph node dissection and Pneumonectomy) from January 2012 to December 2017 in Shaanxi Provincial People&#x2019;s Hospital, China.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Data preprocessing and feature engineering</title>
<p>The training data and the testing data are stored in CSV files. Both datasets contain two types of variables in the covariates, numerical variable and categorical variable. In the dataset, we have 3 numerical variable fields, including regional node positive number, regional node number examined and tumor size as well as other 10 categorical variable features. In order to avoid the evaluation problems <italic>via</italic> using label encoding conversion to categorical, we converse the 10 categorical features by utilizing one hot encoding to identify the different categorical values in the feature in a binary fashion. To illustrate, Regarding feature surgery on the primary site, before conversion, this field contains two values encoded for two surgery types (Lobectomy WITH mediastinal lymph node dissection, Pneumonectomy WITH mediastinal lymph node dissection). After transformation, the very field will be replaced by two surgery types, the value of the two features could only be 0 or 1 to identify the specific surgery type. In addition, as for feature tumor size, in the training set the unit is millimeter while in the testing dataset, the unit is centimeter. So we divide the value in the training set by 10 to make the unit the same. Finally, we perform normalization in order to accelerate the training process.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Machine learning survival model design</title>
<p>In this section, we created three machine learning models to perform the survival analysis to select the optimal one.</p>
<p>We developed a deep learning model based on DeepSurv to predict personal hazard rate according to the patient&#x2019;s current clinical condition. From the input to output, the patient&#x2019;s baseline data is the input to the neural network, followed by the fully-connected hidden layers of nodes as well as a drop layer after each hidden layer. The output of the network is the hazard rate. Regarding the activation function of each node, in order to overcome the problem of vanishing gradients, we select ReLU to add nonlinearity to the model which could help the model learn the complex relationship between covariates and the hazard rate. As for the loss function, we train the model to minimize the average negative log partial likelihood with regularization:</p>
<disp-formula>
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>:</mml:mo>
<mml:mi>E</mml:mi>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:munder>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>h</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mi>&#x3b8;</mml:mi>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>log</mml:mi>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:munder>
<mml:mrow>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>h</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mi>&#x3b8;</mml:mi>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:mo>&#xb7;</mml:mo>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mo>|</mml:mo>
</mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:msubsup>
<mml:mrow>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mo>|</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mn>2</mml:mn>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>&#x3b8;</italic> is the weight of every node in the network, <inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula> is the number of dead patients and <italic>&#x3bb;</italic> is the <italic>l</italic>
<sub>2</sub> regularization parameter, <inline-formula>
<mml:math display="inline" id="im2">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>h</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mi>&#x3b8;</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the predicated hazard rate. we use Adam for the gradient descent algorithm to update the parameter of the model for lots of epochs, because Adam is more efficient when working with problems involving high dimensional data and requiring less memory for optimization process compared with SGD method (<xref ref-type="bibr" rid="B18">18</xref>). We utilize random Search to optimize the hyper-parameters because compared to Grid Search, Random Search could try more cases for important hyper-parameters. In the experiment, we perform this on the log space of the learning rate in [0.00001, 0.1], the dropout rate in [0.2-0.5], the number of hidden layers in [1, 7] and the number of nodes in each hidden layer in [5,90].</p>
<p>We also trained a random forest model, this model is reliable because it forces each split to consider only a subset of the predictors. In this study, Random Search is still used to tune the number of the comprising trees in [100,300], the minimum number of samples required to split an internal node in [2,50] as well as the minimum number of samples required to be at a leaf node in [1,20].</p>
<p>Lastly, we trained the Penalized Cox Proportional hazard model with the same loss function as the deep learning model. we tuned the hyperparameter by using Random Search Method, specifically, the penalizer in [0.001,1] and the learning rate in [0.001,1].</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Model training and evaluation</title>
<p>The concordance index(C-index) is used to measure the performance of the model. The C-index is the ratio of pairs of patients ordered correctly to all pairs. Thus the higher C-index, the better performance of the model. In the study, The 4517 SEER data records were divided into two groups, 3534(80%) records were used for training while 883(20%) records were treated as the validation set. The five-fold cross-validation was performed to tune the hyper-parameters of each model and select the best model for survival prediction. Additionally, external validation was performed on the selected optimal model and TNM staging system and compare the generalizability of the two models. Eventually, we performed the attribution analysis for the deep learning survival model by the integrated gradients (<xref ref-type="bibr" rid="B19">19</xref>) method based on the testing dataset to rank the clinical feature importance.</p>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>Cloud-based adjuvant therapy recommender system deployment</title>
<p>The deep learning algorithm could recommend treatment for patients according to their current clinical conditions (<xref ref-type="bibr" rid="B20">20</xref>). we could load the model and set the input according to the patient&#x2019;s demographic feature(age and gender), Surgery Type(lobectomy and pneumonectomy), Type(histology type and laterality) and the stage information of NSCLC(TNM, the number of the examined regional node, the number of the positive regional node and the tumor size). As for Adjuvant therapy, we predict the hazard rate under four adjuvant therapy treatments (with radiation and chemotherapy, with radiation and without chemotherapy, without radiation and with chemotherapy, and without radiation and chemotherapy). Then we could get the four cumulative hazard functions under each adjuvant therapy treatment and finally derive the four 5-year survival functions after negating and exponentiating the cumulative hazard function. In this application, we develop the backend code to calculate the four 5-year adjuvant therapy survival functions and implement the UI code to display the predicated survival functions in the line race chart.</p>
</sec>
<sec id="s2_6">
<label>2.6</label>
<title>Computation software</title>
<p>The three models are trained with Python v 3.9, PyTorch v 1.11.0 is used to train the deep learning algorithm and PySurvival v 0.1.2 is utilized to train the random survival forest and penalized cox proportional hazard model. The Front UI of the adjuvant therapy recommender system is developed with Vue.js javascript framework and a Material Design component framework called Vuetify. The backend code of the web application is implemented by the Django REST framework. The recommender system is deployed on Tencent Cloud, which could be accessed through a web browser.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Patient baseline characteristics</title>
<p>Based on the inclusion criteria, we include 4617 stage-iii NSCLC patients who received Surgeries (Lobectomy and Pneumonectomy with mediastinal lymph node dissection) in this study. The 4517 patients out of 4617 are extracted from the SEER database and used as a training set while the other 100 patients are from China Database for model testing. The baseline medical characteristics of the two cohorts are shown in <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>. From the AJCC TNM staging system&#x2019;s perspective, all patients in the training set and the testing set are stage-iii NSCLC patients. In the SEER cohort, most patients&#x2019; histology type is Adenocarcinoma, which takes 44.28%. The next one is Squamous cell carcinoma, which takes 23.27%. Regarding the Received surgeries, 85.51% of patients received Lobectomy WITH mediastinal lymph node dissection while the rest (14.48%) accepted Pneumonectomy WITH mediastinal lymph node dissection for treatment. Concerning Adjuvant treatment, 74.12% of patients accepted chemotherapy and about 41.88% received beam radiation. On the contrary, in the test cohort, most patients received Lobectomy WITH mediastinal lymph node dissection, the two leading histology types are Squamous cell carcinoma and Adenocarcinoma, respectively 46% and 43% of the population. As for Adjuvant treatment, 1/3 received beam radiation and almost everyone received chemotherapy.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Main Baseline Clinical Characteristics of Patients.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Characteristic</th>
<th valign="top" colspan="2" align="center">&#x2003;Data set, No. (%)</th>
</tr>
</thead>
<tbody>
<tr>
<th valign="top" colspan="3" align="left">Age</th>
</tr>
<tr>
<td valign="top" align="left">85+ years</td>
<td valign="top" align="center">60 (1.35)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">80-84 years</td>
<td valign="top" align="center">242 (5.47)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">75-79 years</td>
<td valign="top" align="center">530 (11.99)</td>
<td valign="top" align="center">2 (2.00)</td>
</tr>
<tr>
<td valign="top" align="left">70-74 years</td>
<td valign="top" align="center">731 (16.54)</td>
<td valign="top" align="center">10 (10.00)</td>
</tr>
<tr>
<td valign="top" align="left">65-69 years</td>
<td valign="top" align="center">891 (20.17)</td>
<td valign="top" align="center">14 (14.00)</td>
</tr>
<tr>
<td valign="top" align="left">60-64 year</td>
<td valign="top" align="center">713 (16.14)</td>
<td valign="top" align="center">22 (22.00)</td>
</tr>
<tr>
<td valign="top" align="left">55-59 years</td>
<td valign="top" align="center">573 (12.97)</td>
<td valign="top" align="center">22 (22.00)</td>
</tr>
<tr>
<td valign="top" align="left">50-54 years</td>
<td valign="top" align="center">395 (8.94)</td>
<td valign="top" align="center">20 (20.00)</td>
</tr>
<tr>
<td valign="top" align="left">45-49 years</td>
<td valign="top" align="center">171 (3.87)</td>
<td valign="top" align="center">6 (6.00)</td>
</tr>
<tr>
<td valign="top" align="left">40-44 years</td>
<td valign="top" align="center">63 (1.42)</td>
<td valign="top" align="center">3 (3.00)</td>
</tr>
<tr>
<td valign="top" align="left">35-39 years</td>
<td valign="top" align="center">25 (0.56)</td>
<td valign="top" align="center">1 (1.00)</td>
</tr>
<tr>
<td valign="top" align="left">30-34 years</td>
<td valign="top" align="center">13 (0.31)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">25-29 years</td>
<td valign="top" align="center">7 (0.57)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">20-24 years</td>
<td valign="top" align="center">0 (0)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">15-19 years</td>
<td valign="top" align="center">3 (0.07)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Histologic type</th>
</tr>
<tr>
<td valign="top" align="left">Neoplasm, malignant</td>
<td valign="top" align="center">6 (0.13)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Carcinoma, NOS</td>
<td valign="top" align="center">11 (0.24</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Large cell carcinoma, NOS</td>
<td valign="top" align="center">45 (1.01)</td>
<td valign="top" align="center">1 (1.00)</td>
</tr>
<tr>
<td valign="top" align="left">Large cell neuroendocrine carcinoma</td>
<td valign="top" align="center">40 (0.90)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Large cell carcinoma with rhabdoid phenotype</td>
<td valign="top" align="center">1 (0.02)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Pleomorphic carcinoma</td>
<td valign="top" align="center">20 (0.45)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Giant cell carcinoma</td>
<td valign="top" align="center">6 (0.13)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Spindle cell carcinoma, NOS</td>
<td valign="top" align="center">4 (0.09)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Pseudosarcomatous carcinoma</td>
<td valign="top" align="center">13 (0.29)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Combined small cell carcinoma</td>
<td valign="top" align="center">16 (0.36)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Non-small cell carcinoma</td>
<td valign="top" align="center">114 (2.58)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Papillary carcinoma, NOS</td>
<td valign="top" align="center">3 (0.06)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Papillary squamous cell carcinoma</td>
<td valign="top" align="center">2 (0.04)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Squamous cell carcinoma, NOS</td>
<td valign="top" align="center">1028 (23.27)</td>
<td valign="top" align="center">46 (46.00)</td>
</tr>
<tr>
<td valign="top" align="left">Squamous cell carcinoma, keratinizing, NOS</td>
<td valign="top" align="center">76 (1.72)</td>
<td valign="top" align="center">1 (1.00)</td>
</tr>
<tr>
<td valign="top" align="left">Squamous cell carcinoma, large cell, nonkeratinizing, NOS</td>
<td valign="top" align="center">26 (0.58)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Squamous cell carcinoma, spindle cell</td>
<td valign="top" align="center">2 (0.04)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Lymphoepithelial carcinoma</td>
<td valign="top" align="center">4 (0.09)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Basaloid squamous cell carcinoma</td>
<td valign="top" align="center">7 (0.15)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Squamous cell carcinoma, clear cell type</td>
<td valign="top" align="center">3 (0.07)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Basaloid carcinoma</td>
<td valign="top" align="center">4 (0.09)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Adenocarcinoma, NOS</td>
<td valign="top" align="center">1956 (44.28)</td>
<td valign="top" align="center">43 (43.00)</td>
</tr>
<tr>
<td valign="top" align="left">Adenoid cystic carcinoma</td>
<td valign="top" align="center">6 (0.13)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Solid carcinoma, NOS</td>
<td valign="top" align="center">20 (0.45)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Carcinoid tumor, NOS</td>
<td valign="top" align="center">66 (1.49)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Neuroendocrine carcinoma, NOS</td>
<td valign="top" align="center">31 (0.70)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Atypical carcinoid tumor</td>
<td valign="top" align="center">31 (0.70)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Bronchiolo-alveolar adenocarcinoma, NOS</td>
<td valign="top" align="center">57 (1.29)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Alveolar adenocarcinoma</td>
<td valign="top" align="center">1 (0.02)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Bronchiolo-alveolar carcinoma, non-mucinous</td>
<td valign="top" align="center">4 (0.09)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Adenocarcinoma with mixed subtypes</td>
<td valign="top" align="center">277 (6.27)</td>
<td valign="top" align="center">1 (1.00)</td>
</tr>
<tr>
<td valign="top" align="left">Papillary adenocarcinoma, NOS</td>
<td valign="top" align="center">78 (1.76)</td>
<td valign="top" align="center">1 (1.00)</td>
</tr>
<tr>
<td valign="top" align="left">Clear cell adenocarcinoma, NOS</td>
<td valign="top" align="center">11 (0.24)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Mixed cell adenocarcinoma</td>
<td valign="top" align="center">11 (0.24)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Papillary microcarcinoma</td>
<td valign="top" align="center">1 (0.02)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Mucoepidermoid carcinoma</td>
<td valign="top" align="center">2 (0.04)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Mucinous adenocarcinoma</td>
<td valign="top" align="center">97 (2.19)</td>
<td valign="top" align="center">2 (2.00)</td>
</tr>
<tr>
<td valign="top" align="left">Mucin-producing adenocarcinoma</td>
<td valign="top" align="center">19 (0.43)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Signet ring cell carcinoma</td>
<td valign="top" align="center">5 (0.11)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Ductal carcinoma, micropapillary</td>
<td valign="top" align="center">2 (0.04)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Acinar cell carcinoma</td>
<td valign="top" align="center">162 (3.66)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Adenosquamous carcinoma</td>
<td valign="top" align="center">129 (2.92)</td>
<td valign="top" align="center">4 (4.00)</td>
</tr>
<tr>
<td valign="top" align="left">Adenocarcinoma with neuroendocrine differentiation</td>
<td valign="top" align="center">4 (0.09)</td>
<td valign="top" align="center">1 (1.00</td>
</tr>
<tr>
<td valign="top" align="left">Carcinosarcoma, NOS</td>
<td valign="top" align="center">4 (0.09)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Bronchiolo-alveolar carcinoma, mucinous</td>
<td valign="top" align="center">7 (0.16)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Bronchiolo-alveolar carcinoma, mixed mucinous and non-mucinous</td>
<td valign="top" align="center">4 (0.09)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">T stage</th>
</tr>
<tr>
<td valign="top" align="left">T1</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">2 (2.00)</td>
</tr>
<tr>
<td valign="top" align="left">T1NOS</td>
<td valign="top" align="center">2 (0.05)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">T1a</td>
<td valign="top" align="center">371 (8.40)</td>
<td valign="top" align="center">1 (1.00)</td>
</tr>
<tr>
<td valign="top" align="left">T1b</td>
<td valign="top" align="center">390 (8.83)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">T2NOS</td>
<td valign="top" align="center">25 (0.56)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">T2a</td>
<td valign="top" align="center">1162 (26.31)</td>
<td valign="top" align="center">35 (35.00)</td>
</tr>
<tr>
<td valign="top" align="left">T2b</td>
<td valign="top" align="center">353 (7.99)</td>
<td valign="top" align="center">15 (15.00)</td>
</tr>
<tr>
<td valign="top" align="left">T3</td>
<td valign="top" align="center">1285 (29.09)</td>
<td valign="top" align="center">27 (27.00)</td>
</tr>
<tr>
<td valign="top" align="left">T3</td>
<td valign="top" align="center">828 (18.74)</td>
<td valign="top" align="center">20 (20.00)</td>
</tr>
<tr>
<td valign="top" align="left">TX</td>
<td valign="top" align="center">1 (0.02)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">N stage</th>
</tr>
<tr>
<td valign="top" align="left">N0</td>
<td valign="top" align="center">404 (9.14)</td>
<td valign="top" align="center">5 (5.00)</td>
</tr>
<tr>
<td valign="top" align="left">N1</td>
<td valign="top" align="center">866 (19.61)</td>
<td valign="top" align="center">11 (11.00)</td>
</tr>
<tr>
<td valign="top" align="left">N2</td>
<td valign="top" align="center">3087 (69.88)</td>
<td valign="top" align="center">84 (84.00)</td>
</tr>
<tr>
<td valign="top" align="left">N3</td>
<td valign="top" align="center">60 (1.36)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">M stage</th>
</tr>
<tr>
<td valign="top" align="left">M0</td>
<td valign="top" align="center">4417 (100.00)</td>
<td valign="top" align="center">100 (100.00)</td>
</tr>
<tr>
<td valign="top" align="left">Sex</td>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">Female</td>
<td valign="top" align="center">2141 (48.47)</td>
<td valign="top" align="center">24 (24.00)</td>
</tr>
<tr>
<td valign="top" align="left">Male</td>
<td valign="top" align="center">2276 (51.52)</td>
<td valign="top" align="center">76 (76.00)</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Radiation</th>
</tr>
<tr>
<td valign="top" align="left">Beam radiation</td>
<td valign="top" align="center">1850 (41.88)</td>
<td valign="top" align="center">34 (34.00</td>
</tr>
<tr>
<td valign="top" align="left">Combination of beam with implants or isotopes</td>
<td valign="top" align="center">2 (0.05)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">None</td>
<td valign="top" align="center">2412 (54.60)</td>
<td valign="top" align="center">66 (66.00</td>
</tr>
<tr>
<td valign="top" align="left">Radiation, NOS method or source not specified</td>
<td valign="top" align="center">14 (0.31)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Recommended, unknown if administered</td>
<td valign="top" align="center">88 (1.99)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Refused</td>
<td valign="top" align="center">49 (1.11)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Radioactive implants (includes brachytherapy)</td>
<td valign="top" align="center">2 (0.05)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Chemotherapy</th>
</tr>
<tr>
<td valign="top" align="left">Yes</td>
<td valign="top" align="center">3274 (74.12)</td>
<td valign="top" align="center">95 (95.00)</td>
</tr>
<tr>
<td valign="top" align="left">No/Unknown</td>
<td valign="top" align="center">1143 (25.87)</td>
<td valign="top" align="center">5 (5.00)</td>
</tr>
<tr>
<td valign="top" colspan="3" align="left">Surgery to primary site</td>
</tr>
<tr>
<td valign="top" align="left">Lobectomy WITH mediastinal lymph node dissection</td>
<td valign="top" align="center">3777 (85.51)</td>
<td valign="top" align="center">81 (81.00)</td>
</tr>
<tr>
<td valign="top" align="left">Pneumonectomy WITH mediastinal lymph node dissection</td>
<td valign="top" align="center">640 (14.48)</td>
<td valign="top" align="center">19 (19.00)</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Laterality</th>
</tr>
<tr>
<td valign="top" align="left">Left - origin of primary</td>
<td valign="top" align="center">1923 (43.53)</td>
<td valign="top" align="center">43 (43.00)</td>
</tr>
<tr>
<td valign="top" align="left">Only one side - side unspecified</td>
<td valign="top" align="center">1 (0.02)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Paired site, but no information concerning laterality</td>
<td valign="top" align="center">1 (0.02)</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">Paired site, but no information concerning laterality</td>
<td valign="top" align="center">2492 (56.41)</td>
<td valign="top" align="center">57 (57.00)</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Training curve and model performance</title>
<p>After the process of random search, we finally settled down on the hyperparameter of the deep learning model, the model consists of 2 hidden layers, from input to output, including 60, 43 neurons in each layer with a dropping out unit between each layer. we improve neural network generalization by setting the learning rate to 0.001 and 0.5 as the dropout rate to avoid overfitting. <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref> shows the training loss curves of the survival network. At the beginning of the training process, the loss of the validation and training set decreases continually. After 331 epochs of parameter optimization, the loss of the validation set begins at 3.6936 and stops decreasing at 3.1753 while the training loss continues to decrease from 3.3844 started at 3.8446. Then we terminate the optimization to avoid overfitting and save the model for test.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Diagram of the traing loss and the validation loss in the optimization procedure. The x-axis represents the number of epoch, and the y-axis represents value of loss function. The orange line is the validation loss function and the blue one represents the training loss function.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-13-1092478-g002.tif"/>
</fig>
<p>In the random survival forest, We set the number of the estimating trees to 959, the minimum number of samples required to split an internal node to 10 and the minimum number of samples required to be at a leaf node to 15. In the Penalized Cox Proportional hazard model, we configure the penalizer to 0.005 and the learning rate to 0.01</p>
<p>Then we perform 5-fold cross-validation to select the optimal model for survival prediction. <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref> displays the exact value and the line chart of each model in every fold validation, the deep learning model shows a more stable and exceptional performance on the concordance index compared to the other two models. The mean of the concordance index of the deep learning algorithm is 0.843, which is much higher than the random forest (0.678) and cox proportional hazard model (0.678) (<xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>). Based on the result of cross-validation, deep learning is selected to compare the TNM staging system on external validation. The performance of the deep learning model is better (0.82 vs 0.65)</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>The concordance index of three models for 5 fold cross validation. The x-axis represents the number of fold, and the y-axis represents value of concordance index for each model.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-13-1092478-g003.tif"/>
</fig>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Performance of the survival models to predict hazard rate of the stage-III NSCLC patient received resection surgery.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="left">MODEL</th>
<th valign="top" align="center">Cross Validation</th>
<th valign="top" align="center">External Validation</th>
</tr>
<tr>
<th valign="top" align="center">Concordance Index Mean</th>
<th valign="top" align="center">Concordance Index</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Deep Learning</td>
<td valign="top" align="center">0.834</td>
<td valign="top" align="center">0.820</td>
</tr>
<tr>
<td valign="top" align="left">Random Forest</td>
<td valign="top" align="center">0.678</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">Cox Proportional</td>
<td valign="top" align="center">0.640</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">TNM Staging</td>
<td valign="top" align="center"/>
<td valign="top" align="center">0.650</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As for the feature importance for the network, from the <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref> we can observe four of the top important features: regional positive nodes (0.6634), regional examied nodes (-0.7648), tumor size (-0.5633) and Age(-0.4633). In terms of least important features, we observe that the surgery on the primary site (0.0632) is voted to be least significant based on attribution algorithm. The absolute value for attribution scores of other features is greater than 0.1 and less than 0.5.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>The attribution score of all input features in the deep learning model. The x-axis represents the name of the input features, and the y-axis represents value of attribution score for each feature.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-13-1092478-g004.tif"/>
</fig>
<p>Then we perform 5-fold cross-validation to select the optimal model for survival prediction. <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref> displays the exact value and the line chart of each model in every fold validation, the deep learning model shows a more stable and exceptional performance on the concordance index compared to the other two models. The mean of the concordance index of the deep learning algorithm is 0.843, which is much higher than the random forest (0.678) and cox proportional hazard model (0.678) (<xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>). Based on the result of cross-validation, deep learning is selected to compare the TNM staging system on external validation. The performance of the deep learning model is better (0.82 vs 0.65)</p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>The adjuvant therapy recommender system</title>
<p>Since the deep learning model has better performance than the TNM staging system, we could not only predict the survival function of the current patient but also offer an adjuvant therapy reference to the oncology doctor based on prediction over different therapy treatment plans. Thus we deployed the recommender system to the Internet, which could be accessed with a browser in [<ext-link ext-link-type="uri" xlink:href="http://1.15.80.136/nsclc/">http://1.15.80.136/nsclc/</ext-link>], input the current clinical status, including Demographic, surgery type, cancer type and stage information, of one patient, and click the submit button (<xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5</bold>
</xref>).</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>The input page of the recommender system.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-13-1092478-g005.tif"/>
</fig>
<p>Then the browser will redirect to the result page (<xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6</bold>
</xref>), and we could see four 5-year predicted survival curves for each treatment plan. Based on the plot, the predicted optimal treatment plan is only receiving beam radiation for adjuvant treatment, whose survival probability is highest in the next 60 months.</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>The output page of the recommender system.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-13-1092478-g006.tif"/>
</fig>
<p>Thus, the specialist could get the reference for adjuvant treatment plan decision-making. Code related to this application can be found at <ext-link ext-link-type="uri" xlink:href="https://github.com/snowflake-Zhao/nsclc">https://github.com/snowflake-Zhao/nsclc</ext-link>.</p>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>This study provides a model that is more accurate than the TNM staging system to predict the prognosis of the stage-iii received resection NSCLC cancer patients in 5 years. Additionally, the deep learning survival model is more precise and stable than the random survival forest and cox proportional model to predict the hazard rate of the stage-III resectable NSCLC cancer patients. This demonstrated our first goal that the deep learning approach is more reliable than TNM in predicting the hazard rate. Driven by the desire to resolve the controversy on devising adjuvant treatment plans for stage-iii received resection NSCLC cancer patients, we did solve this problem by developing a recommender system based on the externally validated deep learning model. To our best knowledge, this is the first recommender system to provide adjuvant treatment plans reference for stage-iii NSCLC cancer patients who received resection.</p>
<p>As reported, Adeoye J, et&#xa0;al. have trained DeepSurv and RSF (random survival forest) models for predicting the malignant transformation probability of oral leukoplakia and lichenoid lesions with (N=716) patients (<xref ref-type="bibr" rid="B21">21</xref>). Their exceptional results suggest a considerable improvement of accuracy for hazard prediction using the deep learning model when it is compared with the Cox proportional hazard model(C-index=0.95 vs 0.83), and RSF&#x2019;s performance is much better and more stable than that of Cox proportional hazard model(C-index=0.91 vs 0.83) in this task. Our outcome of the experiment is consistent with their conclusion. In another study, Huang C, et&#xa0;al. developed software to select adjuvant radiotherapy and chemotherapy treatment plan according to the corresponding output hazard rate. Our software has two major points different from their product (<xref ref-type="bibr" rid="B22">22</xref>). One is the output page for oncology specialists. Their output is just one hazard rate, which is difficult for specialists and patients to understand. On the contrary, we plot the four adjuvant treatments predicted survival curves in 60 months, which is more straightforward for patients and doctors because people could understand their probability of survival for each adjuvant treatment plan in the 5 years. The other point is our software could be accessed directly through the web browser either on mobile phones, iPad or personal computers instead of installed on the personal computer for seeking recommendation guidance, which is not convenient for doctors to use.</p>
<p>In our study, the random survival forest did not perform well as Lin J, et&#xa0;al&#x2019;s (C-index= 0.678 vs 0.723) (<xref ref-type="bibr" rid="B23">23</xref>), I think this is mainly because the two features in the dataset after one hot encoding, the Histologic type and Radiation, generate lots of sparse variables, including Radioactive implants, Signet ring cell carcinoma and so on, which eventually cause harm to the formation of different estimator trees. The result that the deep learning model&#x2019;s C-index is higher than the Cox Proportional hazard model(C-index= 0.834 vs 0.640) meets our expectations, mainly because deep learning could formulate the complex relationships between clinical baseline characteristics and the patient&#x2019;s hazard rate, which is more accurate than the linear relationship assumption of the Cox proportional hazard model. Additionally, the deep learning model has superior performance than the TNM staging system(C-index= 0.82 vs 0.65) is expected, because the neural network takes in more clinical features related to the prognosis of the patients, including Histologic type, age, sex, tumor size and many others, than the TNM staging system and the most important features of the network are regional postive nodes,regional examied nodes, tumor size and the Age, which is slightly different from the TNM stage system, even though the T stage value comes from the tumor size, N stage value comes form the regional nodes, we could tell the exact detailed number of the tumor size and the regional positive nodes could help the model to predict the prognoses more clearly than the general value. Besides the trained model could perform personal prognosis prediction while the TNM staging system could only predict the cohort prognosis. Thus, the deep learning model could possibly substitute the TNM staging system in the future if more medical records could be utilized for training.</p>
<p>In the current medical practice, there is a lack of consensus regarding the principles of adjuvant therapy for stage-iii NSCLC patients. For instance, According to the latest version of NCCN Guidelines for NSCLC(Version5.2022), one major controversy is inconsistent results among different randomized controlled trials of stage-III NSCLC (<xref ref-type="bibr" rid="B23">23</xref>&#x2013;<xref ref-type="bibr" rid="B26">26</xref>). The one reason for the inconsistent results among different randomized controlled trials is the RCT lacks external validity (<xref ref-type="bibr" rid="B27">27</xref>), which means there might be neglected features that are effective for the prognosis. Because the externally validated deep learning model could include lots of features might related to the prognosis and be sensitive to the different inputs, the model could output the hazard risk of the different treatment plans, then the optimal plan could be obtained by comparing the output of different treatments. In our adjuvant recommendation system, we could obtain the reliable and accurate hazard rate for 4 adjuvant treatment plans from the developed externally validated model. To visualize the outcome, after mathematic transformation, the predicated survival curves for 4 treatment plans are displayed on the Web User Interface. Because of the significant prognostic benefit of following the treatment recommendation which clearly outweighs those who don&#x2019;t, the recommendation system is promising to serve as a dependable tool for decision-making on the adjuvant treatment plan for each stage-iii NSCLC patient.</p>
<p>From the results of our experiment, the deep learning model performs well in the survival analysis task. However, the model is lacking in explainability owing to the high complexity inside the neural network, which is not realistic to explain the process to humans. If we want to extensively apply the deep learning algorithm in the decision-making of the NSCLC, we definitely need to improve the explainability of the model (<xref ref-type="bibr" rid="B28">28</xref>&#x2013;<xref ref-type="bibr" rid="B30">30</xref>). we could incorporate the causal inference ideas in designing inherently interpretable models by adding sample reweighting technique into the loss function to compare the performance with our deep learning result in the future (<xref ref-type="bibr" rid="B31">31</xref>&#x2013;<xref ref-type="bibr" rid="B34">34</xref>). Even though the SEER database has numerous NSCLC patient&#x2019;s medical records, the database could record more detailed attributes in three aspects, including 1) resection information in detail, like resection status (R0/R1/R2) 2)detailed information related to beam radiation, for instance, total dose and dose per fraction 3) further information relevant to chemotherapy on drugs and dosage.</p>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusions</title>
<p>To our best knowledge, this study is the first to research the performance of a deep learning network and random forest in resected Stage-III NSCLC and obtain satisfactory results in survival prediction. In addition, the recommendation system for adjuvant therapy based on the deep learning model will be likely applied to offer recommendation reference to the specialist in the clinical practice.</p>
</sec>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="supplementary-material" rid="SM2">
<bold>Supplementary Material</bold>
</xref>. Further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s7" sec-type="ethics-statement">
<title>Ethics statement</title>
<p>Written informed consent was obtained from the individual(s) for the publication of any potentially identifiable images or data included in this article.</p>
</sec>
<sec id="s8" sec-type="author-contributions">
<title>Author contributions</title>
<p>LJ and QZ designed the research. LJ collected the training and testing dataset. QZ trained the models and developed the web-application. LJ and QZ wrote the manuscript. JM, BH, SF and FC edited and critically revised the manuscript in regard to important intellectual content. All authors read and approved the manuscript.</p>
</sec>
</body>
<back>
<sec id="s9" sec-type="funding-information">
<title>Funding</title>
<p>This research was funded by a grant from the Science and Technology Foundation of Shaanxi Province (2022JQ-934 and 2022JQ-862) and the Shaanxi Provincial People&#x2019;s Hospital (2021JY-07).</p>
</sec>
<sec id="s10" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s12" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fonc.2023.1092478/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fonc.2023.1092478/full#supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet_1.csv" id="SM1" mimetype="text/csv"/>
<supplementary-material xlink:href="DataSheet_2.csv" id="SM2" mimetype="text/csv"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Putora</surname> <given-names>PM</given-names>
</name>
<name>
<surname>Leskow</surname> <given-names>P</given-names>
</name>
<name>
<surname>McDonald</surname> <given-names>F</given-names>
</name>
<name>
<surname>Batchelor</surname> <given-names>T</given-names>
</name>
<name>
<surname>Evison</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>International guidelines on stage iii n2 nonsmall cell lung cancer: surgery or radiotherapy</article-title>? <source>ERJ Open Res</source> (<year>2020</year>) <volume>6</volume>. doi: <pub-id pub-id-type="doi">10.1183/23120541.00159-2019</pub-id>
</citation>
</ref>
<ref id="B2">
<label>2</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kato</surname> <given-names>S</given-names>
</name>
<name>
<surname>Okamura</surname> <given-names>R</given-names>
</name>
<name>
<surname>Mareboina</surname> <given-names>M</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>S</given-names>
</name>
<name>
<surname>Goodman</surname> <given-names>A</given-names>
</name>
<name>
<surname>Patel</surname> <given-names>SP</given-names>
</name>
<etal/>
</person-group>. <article-title>Revisiting epidermal growth factor receptor (egfr) amplification as a target for anti-egfr therapy: analysis of cell-free circulating 290 tumor dna in patients with advanced malignancies</article-title>. <source>JCO Precis Oncol</source> (<year>2019</year>) <volume>3</volume>:<fpage>1</fpage>&#x2013;<lpage>14</lpage>. doi: <pub-id pub-id-type="doi">10.1200/PO.18.00180</pub-id>
</citation>
</ref>
<ref id="B3">
<label>3</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Felip</surname> <given-names>E</given-names>
</name>
<name>
<surname>Altorki</surname> <given-names>N</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>C</given-names>
</name>
<name>
<surname>Csoszi</surname> <given-names>T</given-names>
</name>
<name>
<surname>Vynnychenko</surname> <given-names>I</given-names>
</name>
<name>
<surname>Goloborodko</surname> <given-names>O</given-names>
</name>
<etal/>
</person-group>. <article-title>Adjuvant atezolizumab after adjuvant chemotherapy in resected stage ib&#x2013;iiia non-small-cell lung cancer (impower010): a randomised, multicentre, open-label, phase 3 trial</article-title>. <source>Lancet</source> (<year>2021</year>) <volume>398</volume>:<page-range>1344&#x2013;57</page-range>. doi: <pub-id pub-id-type="doi">10.1016/S0140-6736(21)02098-5</pub-id>
</citation>
</ref>
<ref id="B4">
<label>4</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Deng</surname> <given-names>W</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>T</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>Y</given-names>
</name>
<etal/>
</person-group>. <article-title>Survival patterns for patients with resected n2 non&#x2013;small cell lung cancer and postoperative radiotherapy: a prognostic scoring model and heat map approach</article-title>. <source>J Thorac Oncol</source> (<year>2018</year>) <volume>13</volume>:<page-range>1968&#x2013;74</page-range>. doi: <pub-id pub-id-type="doi">10.1016/j.jtho.2018.08.2021</pub-id>
</citation>
</ref>
<ref id="B5">
<label>5</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wei</surname> <given-names>S</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>M</given-names>
</name>
<name>
<surname>Tian</surname> <given-names>J</given-names>
</name>
<name>
<surname>Song</surname> <given-names>X</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>B</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>L</given-names>
</name>
</person-group>. <article-title>Propensity score-matching analysis of postoperative radiotherapy for stage iiia-n2 non-small cell lung cancer using the surveillance, epidemiology, and end results database</article-title>. <source>Radiat Oncol</source> (<year>2017</year>) <volume>12</volume>:<fpage>1</fpage>&#x2013;<lpage>11</lpage>. doi: <pub-id pub-id-type="doi">10.1186/s13014-017-0836-6</pub-id>
</citation>
</ref>
<ref id="B6">
<label>6</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gao</surname> <given-names>F</given-names>
</name>
<name>
<surname>Li</surname> <given-names>N</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>G</given-names>
</name>
</person-group>. <article-title>Evaluation of postoperative radiotherapy effect on survival of resected stage iii-n2 non-small cell lung cancer patients</article-title>. <source>Front Oncol</source> (<year>2020</year>) <volume>10</volume>:<elocation-id>1135</elocation-id>. doi: <pub-id pub-id-type="doi">10.3389/fonc.2020.01135</pub-id>
</citation>
</ref>
<ref id="B7">
<label>7</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>EH</given-names>
</name>
<name>
<surname>Corso</surname> <given-names>CD</given-names>
</name>
<name>
<surname>Park</surname> <given-names>HS</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>AB</given-names>
</name>
<name>
<surname>Wilson</surname> <given-names>LD</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>AW</given-names>
</name>
<etal/>
</person-group>. <article-title>Association between radiation 303 dose and outcomes with postoperative radiotherapy for n0-n1 non&#x2013;small cell lung cancer</article-title>. <source>Am J Clin Oncol</source> (<year>2018</year>) <volume>41</volume>:<page-range>152&#x2013;8</page-range>. doi: <pub-id pub-id-type="doi">10.1097/COC.0000000000000245</pub-id>
</citation>
</ref>
<ref id="B8">
<label>8</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Le Pechoux</surname> <given-names>C</given-names>
</name>
<name>
<surname>Pourel</surname> <given-names>N</given-names>
</name>
<name>
<surname>Barlesi</surname> <given-names>F</given-names>
</name>
<name>
<surname>Lerouge</surname> <given-names>D</given-names>
</name>
<name>
<surname>Antoni</surname> <given-names>D</given-names>
</name>
<name>
<surname>Lamezec</surname> <given-names>B</given-names>
</name>
<etal/>
</person-group>. <article-title>Postoperative radiotherapy versus no postoperative radiotherapy in patients with completely resected non-small-cell lung cancer and proven mediastinal n2 involvement (lung art): an open-label, randomised, phase 3 trial</article-title>. <source>Lancet 3Oncology</source> (<year>2022</year>) <volume>23</volume>:<page-range>104&#x2013;14</page-range>. doi: <pub-id pub-id-type="doi">10.1016/S1470-2045(21)00606-9</pub-id>
</citation>
</ref>
<ref id="B9">
<label>9</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname> <given-names>C</given-names>
</name>
<name>
<surname>Light</surname> <given-names>A</given-names>
</name>
<name>
<surname>Alaa</surname> <given-names>A</given-names>
</name>
<name>
<surname>Thurtle</surname> <given-names>D</given-names>
</name>
<name>
<surname>van der Schaar</surname> <given-names>M</given-names>
</name>
<name>
<surname>Gnanapragasam</surname> <given-names>VJ</given-names>
</name>
</person-group>. <article-title>Application of a novel machine learning framework for predicting non-metastatic prostate cancer-specific mortality in men 311 using the surveillance, epidemiology, and end results (seer) database</article-title>. <source>Lancet Digital Health</source> (<year>2021</year>) <volume>3 312</volume>:<page-range>e158&#x2013;65</page-range>. doi: <pub-id pub-id-type="doi">10.1016/S2589-7500(20)30314-9</pub-id>
</citation>
</ref>
<ref id="B10">
<label>10</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jones</surname> <given-names>O</given-names>
</name>
<name>
<surname>Matin</surname> <given-names>R</given-names>
</name>
<name>
<surname>van der Schaar</surname> <given-names>M</given-names>
</name>
<name>
<surname>Bhayankaram</surname> <given-names>KP</given-names>
</name>
<name>
<surname>Ranmuthu</surname> <given-names>C</given-names>
</name>
<name>
<surname>Islam</surname> <given-names>M</given-names>
</name>
<etal/>
</person-group>. <article-title>Artificial intelligence and machine learning algorithms for early detection of skin cancer in community and primary care settings: a systematic review</article-title>. <source>Lancet Digital Health</source> (<year>2022</year>) <volume>4</volume>:<page-range>e466&#x2013;76</page-range>. doi: <pub-id pub-id-type="doi">10.1016/S2589-7500(22)00023-1</pub-id>
</citation>
</ref>
<ref id="B11">
<label>11</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname> <given-names>C</given-names>
</name>
<name>
<surname>Light</surname> <given-names>A</given-names>
</name>
<name>
<surname>Saveliev</surname> <given-names>ES</given-names>
</name>
<name>
<surname>van der Schaar</surname> <given-names>M</given-names>
</name>
<name>
<surname>Gnanapragasam</surname> <given-names>VJ</given-names>
</name>
</person-group>. <article-title>Developing machine learning algorithms for dynamic estimation of progression during active surveillance for prostate cancer</article-title>. <source>NPJ digital Med</source> (<year>2022</year>) <volume>5</volume>:<fpage>1</fpage>&#x2013;<lpage>7</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41746-022-00659-w</pub-id>
</citation>
</ref>
<ref id="B12">
<label>12</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname> <given-names>H</given-names>
</name>
<name>
<surname>Cheng</surname> <given-names>C</given-names>
</name>
<name>
<surname>Lipton</surname> <given-names>ZC</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>GH</given-names>
</name>
<name>
<surname>Weiss</surname> <given-names>JC</given-names>
</name>
</person-group>. <article-title>Predicting mortality risk in viral and unspecified pneumonia to assist clinicians with covid-19 ecmo planning</article-title>. <source>arXiv preprint arXiv:2006.01898.</source> (<year>2020</year>). doi: <pub-id pub-id-type="doi">10.48550/arXiv.2006.01898</pub-id>
</citation>
</ref>
<ref id="B13">
<label>13</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname> <given-names>S</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>GH</given-names>
</name>
</person-group>. <article-title>Distributionally robust survival analysis: A novel fairness loss without demographics</article-title>. <source>Mach Learn Health (PMLR)</source> (<year>2022</year>), <fpage>62</fpage>&#x2013;<lpage>87</lpage>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.2211.10508</pub-id>
</citation>
</ref>
<ref id="B14">
<label>14</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chiappetta</surname> <given-names>M</given-names>
</name>
<name>
<surname>Tabacco</surname> <given-names>D</given-names>
</name>
<name>
<surname>Iaffaldano</surname> <given-names>AG</given-names>
</name>
<name>
<surname>Evangelista</surname> <given-names>J</given-names>
</name>
<name>
<surname>Congedo</surname> <given-names>MT</given-names>
</name>
<name>
<surname>Sassorossi</surname> <given-names>C</given-names>
</name>
<etal/>
</person-group>. <article-title>Clinical stage iii nsclc patients treated with neoadjuvant therapy and surgery: The prognostic role of nodal characteristics</article-title>. <source>Life</source> (<year>2022</year>) <volume>12</volume>:<fpage>1753</fpage>. doi: <pub-id pub-id-type="doi">10.3390/life12111753</pub-id>
</citation>
</ref>
<ref id="B15">
<label>15</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jagjampi</surname> <given-names>A</given-names>
</name>
<name>
<surname>Khadirnaikar</surname> <given-names>S</given-names>
</name>
<name>
<surname>Malik</surname> <given-names>PS</given-names>
</name>
<name>
<surname>Jain</surname> <given-names>D</given-names>
</name>
<name>
<surname>N</surname> <given-names>MB</given-names>
</name>
<name>
<surname>Shukla</surname> <given-names>S</given-names>
</name>
</person-group>. <article-title>Deepmps: Development and validation of a deep learning model for whole slide image base prognostic prediction of low grade lung adenocarcinoma patients</article-title>. <source>bioRxiv</source> (<year>2022</year>) <volume>2022</volume>:<fpage>12</fpage>. doi: <pub-id pub-id-type="doi">10.1101/2022.12.27.522072</pub-id>
</citation>
</ref>
<ref id="B16">
<label>16</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lococo</surname> <given-names>F</given-names>
</name>
<name>
<surname>Chiappetta</surname> <given-names>M</given-names>
</name>
<name>
<surname>Evangelista</surname> <given-names>J</given-names>
</name>
<name>
<surname>Sperduti</surname> <given-names>I</given-names>
</name>
<name>
<surname>Nachira</surname> <given-names>D</given-names>
</name>
<name>
<surname>Porziella</surname> <given-names>V</given-names>
</name>
<etal/>
</person-group>. <article-title>Role of peripheral blood markers for detecting response and predicting prognosis in patients with non-small-cell lung cancer undergoing neoadjuvant therapy and surgery</article-title>. <source>Lung</source> (<year>2022</year>) <volume>200</volume>:<fpage>393</fpage>&#x2013;<lpage>400</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s00408-022-00541-2</pub-id>
</citation>
</ref>
<ref id="B17">
<label>17</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>L</given-names>
</name>
<name>
<surname>Fan</surname> <given-names>X</given-names>
</name>
<name>
<surname>Qin</surname> <given-names>W</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Zou</surname> <given-names>B</given-names>
</name>
<name>
<surname>Fan</surname> <given-names>B</given-names>
</name>
<etal/>
</person-group>. <article-title>A novel deep learning prognostic system improves survival predictions for stage iii non-small cell lung cancer</article-title>. <source>Cancer Med</source> (<year>2022</year>) <volume>11</volume>:<page-range>4246&#x2013;55</page-range>. doi: <pub-id pub-id-type="doi">10.1002/cam4.4782</pub-id>
</citation>
</ref>
<ref id="B18">
<label>18</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kingma</surname> <given-names>DP</given-names>
</name>
<name>
<surname>Ba</surname> <given-names>J</given-names>
</name>
</person-group>. <article-title>Adam: A method for stochastic optimization</article-title>. <source>arXiv preprint arXiv</source> (<year>2014</year>) <volume>1412</volume>:<fpage>6980</fpage>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1412.6980</pub-id>
</citation>
</ref>
<ref id="B19">
<label>19</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Sundararajan</surname> <given-names>M</given-names>
</name>
<name>
<surname>Taly</surname> <given-names>A</given-names>
</name>
<name>
<surname>Yan</surname> <given-names>Q</given-names>
</name>
</person-group>. (<year>2017</year>). <article-title>Axiomatic attribution for deep networks</article-title>, in: <conf-name>Proceedings of the 34th International Conference on Machine Learning</conf-name>, <volume>70</volume> ICML&#x2019;17:<page-range>3319&#x2013;28</page-range>. doi: <pub-id pub-id-type="doi">10.5555/3305890.3306024</pub-id>
</citation>
</ref>
<ref id="B20">
<label>20</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Katzman</surname> <given-names>JL</given-names>
</name>
<name>
<surname>Shaham</surname> <given-names>U</given-names>
</name>
<name>
<surname>Cloninger</surname> <given-names>A</given-names>
</name>
<name>
<surname>Bates</surname> <given-names>J</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>T</given-names>
</name>
<name>
<surname>Kluger</surname> <given-names>Y</given-names>
</name>
</person-group>. <article-title>Deepsurv: personalized treatment recommender system using a cox proportional hazards deep neural network</article-title>. <source>BMC Med Res Method</source> (<year>2018</year>) <volume>18</volume>:<fpage>1</fpage>&#x2013;<lpage>12</lpage>. doi: <pub-id pub-id-type="doi">10.1186/s12874-018-0482-1</pub-id>
</citation>
</ref>
<ref id="B21">
<label>21</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Adeoye</surname> <given-names>J</given-names>
</name>
<name>
<surname>Koohi-Moghadam</surname> <given-names>M</given-names>
</name>
<name>
<surname>Lo</surname> <given-names>AWI</given-names>
</name>
<name>
<surname>Tsang</surname> <given-names>RKY</given-names>
</name>
<name>
<surname>Chow</surname> <given-names>VLY</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>LW</given-names>
</name>
<etal/>
</person-group>. <article-title>Deep learning predicts the malignant-transformation-free survival of oral potentially malignant disorders</article-title>. <source>Cancers</source> (<year>2021</year>) <volume>13</volume>:<fpage>6054</fpage>. doi: <pub-id pub-id-type="doi">10.3390/cancers13236054</pub-id>
</citation>
</ref>
<ref id="B22">
<label>22</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname> <given-names>C</given-names>
</name>
<name>
<surname>Dai</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Q</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>H</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>J</given-names>
</name>
<etal/>
</person-group>. <article-title>Development and validation of a deep learning model to predict survival of patients with esophageal cancer</article-title>. <source>Front Oncol</source> (<year>2022</year>) <volume>12</volume>. doi: <pub-id pub-id-type="doi">10.3389/fonc.2022.971190</pub-id>
</citation>
</ref>
<ref id="B23">
<label>23</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname> <given-names>J</given-names>
</name>
<name>
<surname>Yin</surname> <given-names>M</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>L</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>J</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>C</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X</given-names>
</name>
<etal/>
</person-group>. <article-title>The development of a prediction model based on random survival forest for the postoperative prognosis of pancreatic cancer: A seer-based study</article-title>. <source>Cancers</source> (<year>2022</year>) <volume>14</volume>:<fpage>4667</fpage>. doi: <pub-id pub-id-type="doi">10.3390/cancers14194667</pub-id>
</citation>
</ref>
<ref id="B24">
<label>24</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schaake-Koning</surname> <given-names>C</given-names>
</name>
<name>
<surname>Van den Bogaert</surname> <given-names>W</given-names>
</name>
<name>
<surname>Dalesio</surname> <given-names>O</given-names>
</name>
<name>
<surname>Festen</surname> <given-names>J</given-names>
</name>
<name>
<surname>Hoogenhout</surname> <given-names>J</given-names>
</name>
<name>
<surname>van Houtte</surname> <given-names>P</given-names>
</name>
<etal/>
</person-group>. <article-title>Effects of concomitant cisplatin and radiotherapy on inoperable non-small-cell lung cancer</article-title>. <source>New Engl J Med</source> (<year>1992</year>) <volume>326</volume>:<page-range>524&#x2013;30</page-range>. doi: <pub-id pub-id-type="doi">10.1056/NEJM199202203260805</pub-id>
</citation>
</ref>
<ref id="B25">
<label>25</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dillman</surname> <given-names>RO</given-names>
</name>
<name>
<surname>Seagren</surname> <given-names>SL</given-names>
</name>
<name>
<surname>Propert</surname> <given-names>KJ</given-names>
</name>
<name>
<surname>Guerra</surname> <given-names>J</given-names>
</name>
<name>
<surname>Eaton</surname> <given-names>WL</given-names>
</name>
<name>
<surname>Perry</surname> <given-names>MC</given-names>
</name>
<etal/>
</person-group>. <article-title>A randomized trial of induction chemotherapy plus high-dose radiation versus radiation alone in stage iii non-small-cell lung cancer</article-title>. <source>New Engl J Med</source> (<year>1990</year>) <volume>323</volume>:<page-range>940&#x2013;5</page-range>. doi: <pub-id pub-id-type="doi">10.1056/NEJM199010043231403</pub-id>
</citation>
</ref>
<ref id="B26">
<label>26</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dillman</surname> <given-names>RO</given-names>
</name>
<name>
<surname>Herndon</surname> <given-names>J</given-names>
</name>
<name>
<surname>Seagren</surname> <given-names>SL</given-names>
</name>
<name>
<surname>Eaton</surname> <given-names>WL</given-names>
<suffix>Jr.</suffix>
</name>
<name>
<surname>Green</surname> <given-names>MR</given-names>
</name>
</person-group>. <article-title>Improved survival in stage iii non-small-cell lung cancer: seven-year follow-up of cancer and leukemia group b (calgb) 8433 trial. JNCI</article-title>. <source>J Natl Cancer Institute</source> (<year>1996</year>) <volume>88</volume>:<page-range>1210&#x2013;5</page-range>. doi: <pub-id pub-id-type="doi">10.1093/jnci/88.17.1210</pub-id>
</citation>
</ref>
<ref id="B27">
<label>27</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mulder</surname> <given-names>R</given-names>
</name>
<name>
<surname>Singh</surname> <given-names>AB</given-names>
</name>
<name>
<surname>Hamilton</surname> <given-names>A</given-names>
</name>
<name>
<surname>Das</surname> <given-names>P</given-names>
</name>
<name>
<surname>Outhred</surname> <given-names>T</given-names>
</name>
<name>
<surname>Morris</surname> <given-names>G</given-names>
</name>
<etal/>
</person-group>. <article-title>The limitations of using randomised controlled trials as a basis for developing treatment guidelines</article-title>. <source>Evidence-Based Ment Health</source> (<year>2018</year>) <volume>21</volume>:<fpage>4</fpage>&#x2013;<lpage>6</lpage>. doi: <pub-id pub-id-type="doi">10.1136/eb-2017-102701</pub-id>
</citation>
</ref>
<ref id="B28">
<label>28</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pearl</surname> <given-names>J</given-names>
</name>
</person-group>. <article-title>Theoretical impediments to machine learning with seven sparks from the causal revolution</article-title>. <source>arXiv preprint arXiv</source> (<year>2018</year>) <volume>1801</volume>:<fpage>04016</fpage>. doi: <pub-id pub-id-type="doi">10.1145/3159652.3176182</pub-id>
</citation>
</ref>
<ref id="B29">
<label>29</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Heinze-Deml</surname> <given-names>C</given-names>
</name>
<name>
<surname>Meinshausen</surname> <given-names>N</given-names>
</name>
</person-group>. <article-title>Conditional variance penalties and domain shift robustness</article-title>. <source>arXiv preprint arXiv</source> (<year>2017</year>) <volume>1710</volume>:<fpage>11469</fpage>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1710.11469</pub-id>
</citation>
</ref>
<ref id="B30">
<label>30</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Athey</surname> <given-names>SC</given-names>
</name>
<name>
<surname>Bryan</surname> <given-names>KA</given-names>
</name>
<name>
<surname>Gans</surname> <given-names>JS</given-names>
</name>
</person-group>. (<year>2020</year>). <article-title>The allocation of decision authority to human and artificial intelligence</article-title>, in: <conf-name>AEA Papers and Proceedings</conf-name>, <conf-loc>American Economic Association 2014 Broadway, Suite 305, Nashville, TN 37203</conf-loc>, Vol. <volume>110</volume>. pp. <page-range>80&#x2013;4</page-range>. Available at: <uri xlink:href="https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3517287">https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3517287</uri>.</citation>
</ref>
<ref id="B31">
<label>31</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cui</surname> <given-names>P</given-names>
</name>
<name>
<surname>Athey</surname> <given-names>S</given-names>
</name>
</person-group>. <article-title>Stable learning establishes some common ground between causal inference and machine learning</article-title>. <source>Nat Mach Intell</source> (<year>2022</year>) <volume>4</volume>:<page-range>110&#x2013;5</page-range>. doi: <pub-id pub-id-type="doi">10.1038/s42256-022-00445-z</pub-id>
</citation>
</ref>
<ref id="B32">
<label>32</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>R</given-names>
</name>
<name>
<surname>Cui</surname> <given-names>P</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>T</given-names>
</name>
</person-group>. <article-title>Why stable learning works? a theory of covariate shift generalization</article-title>. <source>arXiv preprint arXiv:2111.02355</source> (<year>2021</year>) <volume>2</volume>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.2111.02355</pub-id>
</citation>
</ref>
<ref id="B33">
<label>33</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Kuang</surname> <given-names>K</given-names>
</name>
<name>
<surname>Cui</surname> <given-names>P</given-names>
</name>
<name>
<surname>Athey</surname> <given-names>S</given-names>
</name>
<name>
<surname>Xiong</surname> <given-names>R</given-names>
</name>
<name>
<surname>Li</surname> <given-names>B</given-names>
</name>
</person-group>. (<year>2018</year>). <article-title>Stable prediction across unknown environments</article-title>, in: <conf-name>Proceedings of the 24th ACM SIGKDD international conference on knowledge discovery &amp; data mining</conf-name>, .pp. <page-range>1617&#x2013;26</page-range>. doi: <pub-id pub-id-type="doi">10.1145/3219819.3220082</pub-id>
</citation>
</ref>
<ref id="B34">
<label>34</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shen</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>J</given-names>
</name>
<name>
<surname>He</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>R</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>H</given-names>
</name>
<etal/>
</person-group>. <article-title>Towards out-of-distribution generalization: A survey</article-title>. <source>arXiv preprint arXiv:2108.13624</source> (<year>2021</year>). doi: <pub-id pub-id-type="doi">10.48550/arXiv.2108.13624</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>