<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Public Health</journal-id>
<journal-title>Frontiers in Public Health</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Public Health</abbrev-journal-title>
<issn pub-type="epub">2296-2565</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpubh.2024.1368217</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Public Health</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Application value of the automated machine learning model based on modified CT index combined with serological indices in the early prediction of lung cancer</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author"><name><surname>Meng</surname> <given-names>Leyuan</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" corresp="yes"><name><surname>Zhu</surname> <given-names>Ping</given-names></name><xref ref-type="aff" rid="aff2"><sup>2</sup></xref><xref ref-type="aff" rid="aff3"><sup>3</sup></xref><xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2626206/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes"><name><surname>Xia</surname> <given-names>Kaijian</given-names></name><xref ref-type="aff" rid="aff2"><sup>2</sup></xref><xref ref-type="aff" rid="aff3"><sup>3</sup></xref><xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1046594/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Department of Respiratory and Critical Care Medicine, Affiliated Hospital of Nantong University, Medical School of Nantong University</institution>, <addr-line>Jiangsu, Nantong</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Scientific Research, The Changshu Affiliated Hospital of Soochow University</institution>, <addr-line>Jiangsu, Suzhou</addr-line>, <country>China</country></aff>
<aff id="aff3"><sup>3</sup><institution>Changshu Key Laboratory of Medical Artificial Intelligence and Big Data</institution>, <addr-line>Jiangsu, Suzhou</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by" id="fn0002">
<p>Edited by: Yi Zhu, Yangzhou University, China</p>
</fn>
<fn fn-type="edited-by" id="fn0003">
<p>Reviewed by: Rao L. Divi, National Institutes of Health (NIH), United States</p>
<p>Xiangmin Meng, Zhejiang Wanli University, China</p>
</fn>
<corresp id="c001">&#x002A;Correspondence: Ping Zhu, <email>zhuping5262@163.com</email>; Kaijian Xia, <email>kjxia@suda.edu.cn</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>05</day>
<month>04</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>12</volume>
<elocation-id>1368217</elocation-id>
<history>
<date date-type="received">
<day>10</day>
<month>01</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>19</day>
<month>03</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2024 Meng, Zhu and Xia.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Meng, Zhu and Xia</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec id="sec1">
<title>Background and objective</title>
<p>Accurately predicting the extent of lung tumor infiltration is crucial for improving patient survival and cure rates. This study aims to evaluate the application value of an improved CT index combined with serum biomarkers, obtained through an artificial intelligence recognition system analyzing CT features of pulmonary nodules, in early prediction of lung cancer infiltration using machine learning models.</p>
</sec>
<sec id="sec2">
<title>Patients and methods</title>
<p>A retrospective analysis was conducted on clinical data of 803 patients hospitalized for lung cancer treatment from January 2020 to December 2023 at two hospitals: Hospital 1 (Affiliated Changshu Hospital of Soochow University) and Hospital 2 (Nantong Eighth People&#x2019;s Hospital). Data from Hospital 1 were used for internal training, while data from Hospital 2 were used for external validation. Five algorithms, including traditional logistic regression (LR) and machine learning techniques (generalized linear models [GLM], random forest [RF], gradient boosting machine [GBM], deep neural network [DL], and naive Bayes [NB]), were employed to construct models predicting early lung cancer infiltration and were analyzed. The models were comprehensively evaluated through receiver operating characteristic curve (AUC) analysis based on LR, calibration curves, decision curve analysis (DCA), as well as global and individual interpretative analyses using variable feature importance and SHapley additive explanations (SHAP) plots.</p>
</sec>
<sec id="sec3">
<title>Results</title>
<p>A total of 560 patients were used for model development in the training dataset, while a dataset comprising 243 patients was used for external validation. The GBM model exhibited the best performance among the five algorithms, with AUCs of 0.931 and 0.99 in the validation and test sets, respectively, and accuracies of 0.857 and 0.955 in the validation and test groups, respectively, outperforming other models. Additionally, the study found that nodule diameter and average CT value were the most significant features for predicting lung cancer infiltration using machine learning models.</p>
</sec>
<sec id="sec4">
<title>Conclusion</title>
<p>The GBM model established in this study can effectively predict the risk of infiltration in early-stage lung cancer patients, thereby improving the accuracy of lung cancer screening and facilitating timely intervention for infiltrative lung cancer patients by clinicians, leading to early diagnosis and treatment of lung cancer, and ultimately reducing lung cancer-related mortality.</p>
</sec>
</abstract>
<kwd-group>
<kwd>automated machine learning</kwd>
<kwd>predictive models</kwd>
<kwd>infiltrative lung cancer</kwd>
<kwd>medical image artificial intelligence recognition system (MIARS)</kwd>
<kwd>7-TAABs</kwd>
</kwd-group>
<contract-num rid="cn1">SZFCXK202147</contract-num>
<contract-num rid="cn2">CS202015</contract-num>
<contract-num rid="cn2">CS202246</contract-num>
<contract-num rid="cn3">CYZ202301</contract-num>
<contract-num rid="cn3">CS202314</contract-num>
<contract-sponsor id="cn1">Suzhou Key Supporting Subjects</contract-sponsor>
<contract-sponsor id="cn2">Changshu Science and Technology Program</contract-sponsor>
<contract-sponsor id="cn3">Changshu Key Laboratory of Medical Artificial Intelligence and Big Data</contract-sponsor>
<counts>
<fig-count count="10"/>
<table-count count="2"/>
<equation-count count="0"/>
<ref-count count="37"/>
<page-count count="11"/>
<word-count count="6196"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Digital Public Health</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec5">
<label>1</label>
<title>Introduction</title>
<p>Lung cancer is globally recognized as one of the malignancies with the highest incidence and mortality rates. According to the 2022 global cancer statistics survey, an average of approximately 350 individuals die from lung cancer every day, surpassing the combined total of breast, prostate, and pancreatic cancers. In China, lung cancer deaths account for 23.8% of the total cancer-related deaths, with the incidence and mortality rates ranking highest globally (<xref ref-type="bibr" rid="ref1">1</xref>). Due to factors such as existing medical conditions and awareness of check-ups, many patients are diagnosed with late-stage lung cancer during their initial medical visits. Effective treatment options for late-stage lung cancer are limited, with a 5-year cumulative survival rate of only 19% (<xref ref-type="bibr" rid="ref2">2</xref>). Early screening significantly improves the prognosis and survival of lung cancer patients (<xref ref-type="bibr" rid="ref3">3</xref>), so early screening and diagnosis is the key to reduce lung cancer mortality and improve survival rate.</p>
<p>Currently, there is a lack of effective early screening methods, with emphasis placed on low-dose spiral computed tomography (LDCT) scans, biological tumor markers, and tumor autoantibody screening (<xref ref-type="bibr" rid="ref4">4</xref>). However, these methods suffer from drawbacks such as high false positive rates, inadequate sensitivity, and suboptimal accuracy. Therefore, we attempt to accurately predict tumor malignancy and infiltration depth using an improved CT index obtained through artificial intelligence recognition technology combined with serum biomarkers consisting of lung cancer autoantibodies and tumor markers. This approach aims to assist clinicians in making more informed treatment decisions and improving patient survival benefits.</p>
<p>Machine learning, as a subset of artificial intelligence, has shown remarkable prospects in various fields such as economics, finance, business management, and bioinformatics. In the healthcare sector, it demonstrates outstanding applications in analyzing disease-related factors, predicting risks, and computer-aided diagnosis (<xref ref-type="bibr" rid="ref5 ref6 ref7">5&#x2013;7</xref>). Automated machine learning (AutoML) automates the application of machine learning to data by iteratively transforming data, selecting machine learning algorithms, and optimizing hyperparameters to choose the best model.</p>
<p>The aim of this study is to evaluate the predictive value of an improved CT index combined with serum biomarkers using a GBM model for early diagnosis of lung cancer. Clinical data from lung cancer patients from two hospitals were collected, and training, validation, and testing were conducted using the H2OAutoML platform. The performance of the GBM model was compared with traditional logistic regression (LR) to assess its efficacy.</p>
</sec>
<sec sec-type="materials|methods" id="sec6">
<label>2</label>
<title>Materials and methods</title>
<sec id="sec7">
<label>2.1</label>
<title>Inclusion and exclusion criteria</title>
<p>We retrospectively collected and analyzed data from patients who underwent lung cancer surgery at the Affiliated Changshu Hospital of Soochow University and Nantong Eighth People&#x2019;s Hospital from January 2020 to December 2023. Patients collected from January 2020 to December 2023 at the Affiliated Changshu Hospital of Soochow University were used as the training set, while patients collected from October 2022 to December 2023 at Nantong Eighth People&#x2019;s Hospital were used as the testing set.</p>
<p>The diagnostic criteria for lung cancer were referenced from the 2021 Fifth Edition of the WHO Classification of Thoracic Tumors (<xref ref-type="bibr" rid="ref8">8</xref>). Diagnosis of lung cancer required meeting the following criteria: (1) Confirmation of lung nodules by chest CT without any clinical or drug intervention; (2) Definitive pathological results confirming benign or malignant nodules after chest CT; (3) Age&#x2009;&#x2265;&#x2009;18&#x2009;years; (4) Preoperative testing for 7 lung cancer autoantibodies and tumor markers; (5) Absence of significant dysfunction in other major organs; (6) Absence of other primary malignant tumors; and (7) Lung nodule diameter&#x2009;&#x2264;&#x2009;3&#x2009;cm. Exclusion criteria included: absence of pathological examination despite confirmed lung nodules on chest CT; failure to undergo testing for the 7 lung cancer autoantibodies and tumor markers; clinical or drug intervention prior to blood sampling; presence of rheumatic immunological diseases; lung metastasis from other tumors; lung nodule diameter&#x2009;&#x003E;&#x2009;3&#x2009;cm. This study was approved by the hospital ethics committee.</p>
</sec>
<sec id="sec8">
<label>2.2</label>
<title>Data collection</title>
<p>Demographic features, clinical information, and comorbidities were extracted from electronic medical records. Chest plain scans were performed using a 64-slice spiral CT scanner to obtain conventional CT imaging features, including air bronchogram sign, spiculated sign, lobulation sign, vascular penetration, pleural retraction, bronchial inflation sign, nodule diameter, and solid proportion. And the patient&#x2019;s CT data were imported into the DeepRay medical image AI recognition system, which extracted quantitative features from medical images in high throughput and combined with convolutional neural networks to train deep learning on the data of the nodule&#x2019;s size, density, and the proportion of solidity to get the improved CT indexes: the pulmonary nodule&#x2019;s malignancy probability value and average CT value. Serum biomarkers primarily included 7 tumor-associated autoantibodies (TAABs) and commonly used tumor markers recommended by the American Clinical Biochemistry Committee and the European Tumor Marker Expert Group. TAABs detection involved extracting fasting peripheral venous blood (<xref ref-type="bibr" rid="ref9 ref10 ref11 ref12">9&#x2013;12</xref>) from patients preoperatively or before surgery. After centrifugation to separate serum, the levels of 7 lung cancer autoantibodies were measured using enzyme-linked immunosorbent assay (ELISA) (<xref ref-type="bibr" rid="ref13">13</xref>), including tumor suppressor gene P53 (normal reference range: P53&#x2009;&#x003C;&#x2009;13.09&#x2009;U/mL), protein gene product PGP 9.5 (normal reference range: PGP9.5&#x2009;&#x003C;&#x2009;11.1&#x2009;U/mL), SRY-box containing gene 2 (normal reference range: SOX2&#x2009;&#x003C;&#x2009;10.26&#x2009;U/mL), G antigen 7 (GAGE7) (normal reference range: GAGE7&#x2009;&#x003C;&#x2009;14.36&#x2009;U/mL), RNA helicase autoantibody 4&#x2013;5 (GBU4-5) (normal reference range: GBU4-5&#x2009;&#x003C;&#x2009;6.99&#x2009;U/mL), melanoma antigen A1 (MAGEA1) (normal reference range: MAGEA1&#x2009;&#x003C;&#x2009;11.92&#x2009;U/mL), and tumor-associated gene CAGE (normal reference range: CAGE &#x003C;7.23&#x2009;U/mL). TAABs detection results were considered positive if any of the indicators exceeded the normal reference range. Tumor markers were collected from blood tests and included primary lung cancer markers such as vascular endothelial growth factor (VEGF), carcinoembryonic antigen (CEA), neuron-specific enolase (NSE), cytokeratin fragment 19 (CYFRA21-1), pro-gastrin-releasing peptide (ProGRP), and squamous cell carcinoma antigen (SCC) (<xref ref-type="bibr" rid="ref14">14</xref>).</p>
</sec>
<sec id="sec9">
<label>2.3</label>
<title>Automated machine learning</title>
<p>Through the AI platform<xref ref-type="fn" rid="fn0001"><sup>1</sup></xref>, the H2O package is installed in the R language to implement AutoML analysis. Autonomy and automation are achieved through three aspects: feature selection, model construction, and hyperparameter optimization. The integrated algorithms include Generalized Linear Models (GLM), Random Forests (RF), Gradient Boosting Machines (GBM), Deep Neural Networks (DL), and Naive Bayes (NB), among others. The training set is split into development and validation sets in a 6:4 ratio, and blind verification is conducted with the testing set to evaluate the average accuracy and stability of the models. A confusion matrix consisting of true positives (TP), true negatives (TN), false positives (FP), and false negatives (FN) is established (<xref ref-type="bibr" rid="ref15">15</xref>). Performance metrics including sensitivity, specificity, positive predictive value (PPV), negative predictive value (NPV), positive likelihood ratio (LR+), negative likelihood ratio (LR-), accuracy, area under the receiver operating characteristic curve (AUC), and the F1-Measure are calculated. Formulas for calculation are as follows: Accuracy&#x2009;=&#x2009;(TP&#x2009;+&#x2009;TN)/(TP&#x2009;+&#x2009;FP&#x2009;+&#x2009;FN&#x2009;+&#x2009;TN); PPV&#x2009;=&#x2009;TP/(TP&#x2009;+&#x2009;NP); NPV&#x2009;=&#x2009;TN/(TN&#x2009;+&#x2009;FN); LR&#x2009;+&#x2009;=Sensitivity/(1&#x2212;Specificity); LR&#x2212;&#x2009;=&#x2009;(1&#x2212;Sensitivity)/Specificity; F1-Measure&#x2009;=&#x2009;(2&#x002A;precisionrecall)/(precision+recall). Through SHAP analysis (Shapley Additive Explanations), an additive explanatory model is constructed to determine significant factors influencing model predictions and their contributions to model performance.</p>
</sec>
<sec id="sec10">
<label>2.4</label>
<title>Statistical analysis</title>
<p>For continuous data, the Shapiro&#x2013;Wilk test and homogeneity of variance test (Homogeneity of variance test) were first performed. For normally distributed and homoscedastic continuous data, independent samples t-tests were employed, and results were presented as mean&#x2009;&#x00B1;&#x2009;standard deviation. For non-normally distributed and heteroscedastic continuous data, the Wilcoxon rank-sum test was used, and results were presented as median (M25, M75). Categorical data were expressed as frequencies and percentages, and inter-group differences were assessed using the chi-square test or Fisher&#x2019;s exact test. To prevent multicollinearity among variables, feature selection was conducted using the Least Absolute Shrinkage and Selection Operator (LASSO) regression model. Based on the selected variables, a binary logistic regression model was fitted. The predictive performance of the obtained model was evaluated using the area under the receiver operating characteristic curve (AUC), calibration curve, and decision curve analysis (DCA), and a Nomogram was constructed. The statistical significance level was set at <italic>p</italic>&#x2009;&#x003C;&#x2009;0.05. All statistical analyses were performed using R 4.3.3 software.</p>
</sec>
</sec>
<sec sec-type="results" id="sec11">
<label>3</label>
<title>Results</title>
<sec id="sec12">
<label>3.1</label>
<title>Baseline characteristics</title>
<p>A total of 803 lung cancer patients were included in this study, with 376 cases (47.0%) exhibiting infiltrative lesions. The study protocol is detailed in <xref ref-type="fig" rid="fig1">Figure 1</xref>. Among them, 560 patients from the Affiliated Changshu Hospital of Soochow University (Hospital 1) were included in the training set. Nantong Eighth People&#x2019;s Hospital (Hospital 2) contributed 243 patients as the testing set. In the training set, 64.3% (360/560) were male and 35.7% (200/560) were female, with a median age of 55&#x2009;years. In the testing set, females were more common in the infiltrative group, and the age range of 40&#x2013;60&#x2009;years was the peak incidence, consistent with previous reports (<xref ref-type="bibr" rid="ref16">16</xref>). There were no statistically significant differences between the two groups in terms of age, CY211, NSE, and Leafing (<italic>p</italic>&#x2009;&#x003E;&#x2009;0.05). Details are shown in <xref ref-type="table" rid="tab1">Table 1</xref>.</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>Roadmap for the research program.</p>
</caption>
<graphic xlink:href="fpubh-12-1368217-g001.tif"/>
</fig>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Baseline characteristics of patients in training and test groups.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Variable</th>
<th align="left" valign="top" rowspan="2">Group</th>
<th align="center" valign="top" colspan="2">Training data set (<italic>n</italic> =&#x2009;560)</th>
<th align="center" valign="top" rowspan="2">Z/&#x03C7;<sup>2</sup></th>
<th align="center" valign="top" rowspan="2">
<italic>p</italic>
</th>
<th align="center" valign="top" colspan="2">Test data set (<italic>n</italic> =&#x2009;243)</th>
<th align="center" valign="top" rowspan="2">Z/&#x03C7;<sup>2</sup></th>
<th align="center" valign="top" rowspan="2">
<italic>p</italic>
</th>
</tr>
<tr>
<th align="center" valign="top">Non-infiltration (<italic>n</italic> =&#x2009;297)</th>
<th align="center" valign="top">Infiltrate (<italic>n</italic> =&#x2009;263)</th>
<th align="center" valign="top">Non-infiltration (<italic>n</italic> =&#x2009;130)</th>
<th align="center" valign="top">Infiltrate (<italic>n</italic> =&#x2009;113)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Age</td>
<td align="left" valign="middle">&#x2013;</td>
<td align="center" valign="middle">53.8 (46.4, 64.57)</td>
<td align="center" valign="middle">54.4 (46.32, 62.45)</td>
<td align="center" valign="middle">&#x2212;0.593</td>
<td align="center" valign="middle">0.553</td>
<td align="center" valign="middle">54.75 (43.25, 64.24)</td>
<td align="center" valign="middle">55.09 (47.86, 62.49)</td>
<td align="center" valign="middle">&#x2212;0.478</td>
<td align="center" valign="middle">0.633</td>
</tr>
<tr>
<td align="left" valign="middle">Gender</td>
<td align="left" valign="middle">&#x7537;</td>
<td align="center" valign="middle">174 (58.6%)</td>
<td align="center" valign="middle">186 (70.7%)</td>
<td align="center" valign="middle">8.949</td>
<td align="center" valign="middle">0.003</td>
<td align="center" valign="middle">55 (42.3%)</td>
<td align="center" valign="middle">38 (33.6%)</td>
<td align="center" valign="middle">1.928</td>
<td align="center" valign="middle">0.165</td>
</tr>
<tr>
<td/>
<td align="left" valign="middle">&#x5973;</td>
<td align="center" valign="middle">123 (41.4%)</td>
<td align="center" valign="middle">77 (29.3%)</td>
<td/>
<td/>
<td align="center" valign="middle">75 (57.7%)</td>
<td align="center" valign="middle">75 (66.4%)</td>
<td/>
<td/>
</tr>
<tr>
<td align="left" valign="middle">VEGF</td>
<td align="left" valign="middle">&#x2013;</td>
<td align="center" valign="middle">128.53 (82.68, 172.74)</td>
<td align="center" valign="middle">152.29 (92.31, 214.55)</td>
<td align="center" valign="middle">&#x2212;3.583</td>
<td align="center" valign="middle">&#x003C;0.001</td>
<td align="center" valign="middle">117.47 (70.11, 159.96)</td>
<td align="center" valign="middle">153.05 (106.53, 210.52)</td>
<td align="center" valign="middle">&#x2212;3.833</td>
<td align="center" valign="middle">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="middle">CEA</td>
<td align="left" valign="middle">&#x2013;</td>
<td align="center" valign="middle">2.15 (1.62, 2.73)</td>
<td align="center" valign="middle">2.56 (1.79, 3.54)</td>
<td align="center" valign="middle">&#x2212;4.926</td>
<td align="center" valign="middle">&#x003C;0.001</td>
<td align="center" valign="middle">2.79 (1.98, 3.61)</td>
<td align="center" valign="middle">2.00 (1.46, 2.70)</td>
<td align="center" valign="middle">&#x2212;4.979</td>
<td align="center" valign="middle">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="middle">CY211</td>
<td align="left" valign="middle">&#x2013;</td>
<td align="center" valign="middle">2.02 (1.48, 2.67)</td>
<td align="center" valign="middle">1.90 (1.48, 2.34)</td>
<td align="center" valign="middle">&#x2212;1.896</td>
<td align="center" valign="middle">0.058</td>
<td align="center" valign="middle">1.86 (1.53, 2.46)</td>
<td align="center" valign="middle">2.09 (1.51, 2.56)</td>
<td align="center" valign="middle">&#x2212;1.447</td>
<td align="center" valign="middle">0.148</td>
</tr>
<tr>
<td align="left" valign="middle">NSE</td>
<td align="left" valign="middle">&#x2013;</td>
<td align="center" valign="middle">13.12 (11.56, 14.61)</td>
<td align="center" valign="middle">13.37 (11.39, 15.49)</td>
<td align="center" valign="middle">&#x2212;1.029</td>
<td align="center" valign="middle">0.303</td>
<td align="center" valign="middle">13.21 (11.03, 15.55)</td>
<td align="center" valign="middle">13.44 (12.05, 14.98)</td>
<td align="center" valign="middle">&#x2212;0.540</td>
<td align="center" valign="middle">0.589</td>
</tr>
<tr>
<td align="left" valign="middle">SCC</td>
<td align="left" valign="middle">&#x2013;</td>
<td align="center" valign="middle">0.71 (0.53, 0.87)</td>
<td align="center" valign="middle">0.87 (0.56, 1.20)</td>
<td align="center" valign="middle">&#x2212;5.389</td>
<td align="center" valign="middle">&#x003C;0.001</td>
<td align="center" valign="middle">0.90 (0.61, 1.25)</td>
<td align="center" valign="middle">13.44 (12.05, 14.98)</td>
<td align="center" valign="middle">&#x2212;4.263</td>
<td align="center" valign="middle">&#x003C;0.000</td>
</tr>
<tr>
<td align="left" valign="middle">proGRP</td>
<td align="left" valign="middle">&#x2013;</td>
<td align="center" valign="middle">41.91 (30.01, 53.17)</td>
<td align="center" valign="middle">37.06 (29.09, 44.52)</td>
<td align="center" valign="middle">&#x2212;3.686</td>
<td align="center" valign="middle">&#x003C;0.001</td>
<td align="center" valign="middle">38.22 (30.20, 46.68)</td>
<td align="center" valign="middle">43.94 (29.60, 55.10)</td>
<td align="center" valign="middle">&#x2212;2.362</td>
<td align="center" valign="middle">0.018</td>
</tr>
<tr>
<td align="left" valign="middle">Malignant.probability</td>
<td align="left" valign="middle">&#x2013;</td>
<td align="center" valign="middle">50.36 (34.13, 68.29)</td>
<td align="center" valign="middle">57.73 (41.70, 72.57)</td>
<td align="center" valign="middle">&#x2212;5.325</td>
<td align="center" valign="middle">&#x003C;0.001</td>
<td align="center" valign="middle">45.5 (30.75, 70.0)</td>
<td align="center" valign="middle">61.0 (44.5, 74.5)</td>
<td align="center" valign="middle">&#x2212;3.165</td>
<td align="center" valign="middle">0.002</td>
</tr>
<tr>
<td align="left" valign="middle">Mean.CT.value</td>
<td align="left" valign="middle">&#x2013;</td>
<td align="center" valign="middle">&#x2212;255.48 (&#x2212;395.77, &#x2212;99.97)</td>
<td align="center" valign="middle">&#x2212;462.50 (&#x2212;571.97, &#x2212;361.17)</td>
<td align="center" valign="middle">&#x2212;12.247</td>
<td align="center" valign="middle">&#x003C;0.001</td>
<td align="center" valign="middle">&#x2212;255.09 (&#x2212;392.49, &#x2212;72.21)</td>
<td align="center" valign="middle">&#x2212;460.62 (&#x2212;579.04, &#x2212;345.02)</td>
<td align="center" valign="middle">&#x2212;7.952</td>
<td align="center" valign="middle">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="middle">Nodule.diameter</td>
<td align="left" valign="middle">&#x2013;</td>
<td align="center" valign="middle">9.0 (6.0, 12.0)</td>
<td align="center" valign="middle">21.0 (13.0, 25.0)</td>
<td align="center" valign="middle">&#x2212;13.443</td>
<td align="center" valign="middle">&#x003C;0.001</td>
<td align="center" valign="middle">9.0 (6.0, 12.0)</td>
<td align="center" valign="middle">20.0 (14.0, 23.0)</td>
<td align="center" valign="middle">&#x2212;9.887</td>
<td align="center" valign="middle">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="middle">Proportion.of.solidity</td>
<td align="left" valign="middle">&#x2013;</td>
<td align="center" valign="middle">0.5 (0.3, 0.7)</td>
<td align="center" valign="middle">0.6 (0.4, 0.8)</td>
<td align="center" valign="middle">&#x2212;4.649</td>
<td align="center" valign="middle">&#x003C;0.001</td>
<td align="center" valign="middle">0.495 (0.29, 0.68)</td>
<td align="center" valign="middle">0.61 (0.41, 0.77)</td>
<td align="center" valign="middle">&#x2212;2.839</td>
<td align="center" valign="middle">0.005</td>
</tr>
<tr>
<td align="left" valign="middle">TAABs</td>
<td align="left" valign="middle">NO</td>
<td align="center" valign="middle">266 (89.6%)</td>
<td align="center" valign="middle">193 (73.4%)</td>
<td align="center" valign="middle">24.696</td>
<td align="center" valign="middle">&#x003C;0.001</td>
<td align="center" valign="middle">111 (85.4%)</td>
<td align="center" valign="middle">88 (77.9%)</td>
<td align="center" valign="middle">2.298</td>
<td align="center" valign="middle">0.130</td>
</tr>
<tr>
<td/>
<td align="left" valign="middle">YES</td>
<td align="center" valign="middle">31 (10.4%)</td>
<td align="center" valign="middle">70 (26.6%)</td>
<td/>
<td/>
<td align="center" valign="middle">19 (14.6%)</td>
<td align="center" valign="middle">25 (22.1%)</td>
<td/>
<td/>
</tr>
<tr>
<td align="left" valign="middle">Vacuolar</td>
<td align="left" valign="middle">NO</td>
<td align="center" valign="middle">246 (82.8%)</td>
<td align="center" valign="middle">218 (82.9%)</td>
<td align="center" valign="middle">0.000</td>
<td align="center" valign="middle">0.985</td>
<td align="center" valign="middle">105 (80.8%)</td>
<td align="center" valign="middle">74 (65.5%)</td>
<td align="center" valign="middle">7.278</td>
<td align="center" valign="middle">0.007</td>
</tr>
<tr>
<td/>
<td align="left" valign="middle">YES</td>
<td align="center" valign="middle">51 (17.2%)</td>
<td align="center" valign="middle">45 (17.1%)</td>
<td/>
<td/>
<td align="center" valign="middle">25 (19.2%)</td>
<td align="center" valign="middle">39 (34.5%)</td>
<td/>
<td/>
</tr>
<tr>
<td align="left" valign="middle">Burr</td>
<td align="left" valign="middle">NO</td>
<td align="center" valign="middle">180 (60.6%)</td>
<td align="center" valign="middle">173 (65.8%)</td>
<td align="center" valign="middle">1.602</td>
<td align="center" valign="middle">0.206</td>
<td align="center" valign="middle">77 (59.2%)</td>
<td align="center" valign="middle">82 (72.6%)</td>
<td align="center" valign="middle">4.753</td>
<td align="center" valign="middle">0.029</td>
</tr>
<tr>
<td/>
<td align="left" valign="middle">YES</td>
<td align="center" valign="middle">117 (39.4%)</td>
<td align="center" valign="middle">90 (34.2%)</td>
<td/>
<td/>
<td align="center" valign="middle">53 (40.8%)</td>
<td align="center" valign="middle">31 (27.4%)</td>
<td/>
<td/>
</tr>
<tr>
<td align="left" valign="middle">Leafing</td>
<td align="left" valign="middle">NO</td>
<td align="center" valign="middle">264 (88.9%)</td>
<td align="center" valign="middle">238 (90.5%)</td>
<td align="center" valign="middle">0.387</td>
<td align="center" valign="middle">0.534</td>
<td align="center" valign="middle">123 (94.6%)</td>
<td align="center" valign="middle">109 (96.5%)</td>
<td align="center" valign="middle">0.476</td>
<td align="center" valign="middle">0.490</td>
</tr>
<tr>
<td/>
<td align="left" valign="middle">YES</td>
<td align="center" valign="middle">33 (11.1%)</td>
<td align="center" valign="middle">25 (9.5%)</td>
<td/>
<td/>
<td align="center" valign="middle">7 (5.4%)</td>
<td align="center" valign="middle">4 (3.5%)</td>
<td/>
<td/>
</tr>
<tr>
<td align="left" valign="middle">BV</td>
<td align="left" valign="middle">NO</td>
<td align="center" valign="middle">108 (36.4%)</td>
<td align="center" valign="middle">98 (37.3%)</td>
<td align="center" valign="middle">0.048</td>
<td align="center" valign="middle">0.826</td>
<td align="center" valign="middle">53 (40.8%)</td>
<td align="center" valign="middle">30 (26.5%)</td>
<td align="center" valign="middle">5.436</td>
<td align="center" valign="middle">0.020</td>
</tr>
<tr>
<td/>
<td align="left" valign="middle">YES</td>
<td align="center" valign="middle">189 (63.6%)</td>
<td align="center" valign="middle">165 (62.7%)</td>
<td/>
<td/>
<td align="center" valign="middle">77 (59.2%)</td>
<td align="center" valign="middle">83 (73.5%)</td>
<td/>
<td/>
</tr>
<tr>
<td align="left" valign="middle">PI</td>
<td align="left" valign="middle">NO</td>
<td align="center" valign="middle">189 (63.6%)</td>
<td align="center" valign="middle">140 (53.2%)</td>
<td align="center" valign="middle">6.231</td>
<td align="center" valign="middle">0.013</td>
<td align="center" valign="middle">79 (60.8%)</td>
<td align="center" valign="middle">77 (68.1%)</td>
<td align="center" valign="middle">1.430</td>
<td align="center" valign="middle">0.232</td>
</tr>
<tr>
<td/>
<td align="left" valign="middle">YES</td>
<td align="center" valign="middle">108 (36.4%)</td>
<td align="center" valign="middle">123 (46.8%)</td>
<td/>
<td/>
<td align="center" valign="top">51 (39.2%)</td>
<td align="center" valign="top">36 (31.9%)</td>
<td/>
<td/>
</tr>
<tr>
<td align="left" valign="top">AB</td>
<td align="left" valign="top">NO</td>
<td align="center" valign="top">271 (91.2%)</td>
<td align="center" valign="top">242 (92.0%)</td>
<td align="center" valign="top">0.107</td>
<td align="center" valign="top">0.743</td>
<td align="center" valign="top">116 (89.2%)</td>
<td align="center" valign="top">110 (97.3%)</td>
<td align="center" valign="top">6.118</td>
<td align="center" valign="top">0.013</td>
</tr>
<tr>
<td/>
<td align="left" valign="top">YES</td>
<td align="center" valign="top">26 (8.8%)</td>
<td align="center" valign="top">21 (8.0%)</td>
<td/>
<td/>
<td align="center" valign="top">14 (10.8%)</td>
<td align="center" valign="top">3 (2.7%)</td>
<td/>
<td/>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>VEGF indicates vascular endothelial growth factor; CEA, carcinoembrionic antigen; CY211, cytokeratin fragment 19; NSE, neuron specific enolase; SCC, squamous cell carcinoma antigen; ProGRP, progastrin releasing peptide; 7-TAAB, seven tumor-associated autoantibodies; Vacuolar, vacuole sign; Burr, spicule sign; Leafing, lobulation; BV, pulmonary nodular vascular passage; PI, pleural indentation; AB, air bronchogram.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="sec13">
<label>3.2</label>
<title>Model construction and predictive performance comparison</title>
<sec id="sec14">
<label>3.2.1</label>
<title>LASSO regression feature screening and LR model construction</title>
<p>Considering the potential issue of multicollinearity among variables, we employed the LASSO regression model with the introduction of the L1 regularization coefficient. Through 10-fold cross-validation, we obtained the minimum standard lambda and selected 8 variables as independent risk factors from 19 variables. These variables included VEGF, TAABs, malignancy probability, average CT value, nodule diameter, solid proportion, gender, and pleural retraction, as shown in <xref ref-type="fig" rid="fig2">Figure 2</xref>.</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>Lasso regression variable screening.</p>
</caption>
<graphic xlink:href="fpubh-12-1368217-g002.tif"/>
</fig>
<p>The selected features were fitted to construct a serum-modified CT index model, and a Nomogram plot was generated to score the features (see <xref ref-type="fig" rid="fig3">Figure 3</xref>). The total score obtained by summing the scores of each feature allows estimation of the probability of developing infiltrative lesions in lung cancer. The study showed that when the total score of the Nomogram for lung cancer infiltrative lesions exceeds 180, the risk of lesions is over 90%.</p>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption>
<p>Nomogram (column line graph).</p>
</caption>
<graphic xlink:href="fpubh-12-1368217-g003.tif"/>
</fig>
<p>To further analyze the stability and clinical utility of the serum-modified CT index model, we compared the serum-modified CT index model with conventional imaging omics models and serum-imaging omics models in both the training and testing sets using ROC curve analysis, clinical calibration curve, and clinical decision curve analysis (DCA). The conventional imaging omics model consisted of nodule diameter, solid proportion, gender, and pleural retraction. The serum-imaging omics model included VEGF, TAABs, nodule diameter, solid proportion, gender, and pleural retraction. The serum-modified CT index model comprised VEGF, TAABs, malignancy probability, average CT value, nodule diameter, solid proportion, gender, and pleural retraction. In the training set, the ROC curve analysis revealed that the areas under the curve (AUC) for the conventional imaging omics model, serum-imaging omics model, and serum-modified CT index model were 0.861, 0.87, and 0.930, respectively (see <xref ref-type="fig" rid="fig4">Figure 4A</xref>). In the testing set, the AUC values were 0.901, 0.91, and 0.942 for the conventional imaging omics model, serum-imaging omics model, and serum-modified CT index model, respectively (see <xref ref-type="fig" rid="fig4">Figure 4B</xref>). The calibration curves for the training and testing sets (see <xref ref-type="fig" rid="fig5">Figures 5A</xref>,<xref ref-type="fig" rid="fig5">B</xref>) demonstrated that the estimated risks of the serum-modified CT index model were very close to the actual risks, indicating high reliability. The clinical decision curve analysis (DCA) showed that, across most threshold ranges, the net benefit of the serum-imaging omics model was greater than that of the conventional imaging omics model and serum-imaging omics model in both the training and testing sets, with the serum-imaging omics model outperforming the conventional imaging omics model (see <xref ref-type="fig" rid="fig6">Figures 6A</xref>,<xref ref-type="fig" rid="fig6">B</xref>).</p>
<fig position="float" id="fig4">
<label>Figure 4</label>
<caption>
<p>Three model ROCs; <bold>(A)</bold> Training set; <bold>(B)</bold> Testing set.</p>
</caption>
<graphic xlink:href="fpubh-12-1368217-g004.tif"/>
</fig>
<fig position="float" id="fig5">
<label>Figure 5</label>
<caption>
<p>Three model calibration curves; <bold>(A)</bold> Training set; <bold>(B)</bold> Test set.</p>
</caption>
<graphic xlink:href="fpubh-12-1368217-g005.tif"/>
</fig>
<fig position="float" id="fig6">
<label>Figure 6</label>
<caption>
<p>DCA curves for the three models; <bold>(A)</bold> Training set; <bold>(B)</bold> Test set.</p>
</caption>
<graphic xlink:href="fpubh-12-1368217-g006.tif"/>
</fig>
</sec>
<sec id="sec15">
<label>3.2.2</label>
<title>Machine learning model construction and performance comparison</title>
<p>Using the H2OAutoML platform, automatic training and adjustment of models were conducted within a 5&#x2009;min time limit, resulting in the construction of 75 models. However, due to limited interpretability and the presence of stacked ensemble models, these models were simplified, and the main algorithms involved were extracted, including Generalized Linear Model (GLM), Random Forest (RF), Gradient Boosting Machine (GBM), Deep Neural Network (DL), and Naive Bayes (NB). Among these models, the GBM model outperformed others, achieving the highest values for AUC, accuracy, and F1-Measure on both validation and testing sets, and hence was considered the optimal model. As shown in <xref ref-type="table" rid="tab2">Table 2</xref>, on the validation and testing sets, the AUC values obtained by the GBM algorithm were higher than those obtained by GLM, RF, DL, and NB algorithms, with values of (0.931, 0.99) compared to (0.917, 0.942), (0.918, 0.986), (0.901, 0.948), and (0.908, 0.944), respectively. Furthermore, compared to GLM, RF, DL, and NB algorithms, the GBM algorithm also achieved the highest accuracy, with values of (0.857, 0.955), (0.854, 0.864), (0.838, 0.947), (0.819, 0.877, 0.844, 0.889), respectively. Among these models, the RF model exhibited the highest sensitivity in both the validation and testing sets, with values of 0.914 and 0.991, respectively. Both RF and GLM models demonstrated good performance in terms of AUC, sensitivity, specificity, and accuracy.</p>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>Comparison of AutoML model performance in predicting lung cancer infiltration in the test cohort.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Targets</th>
<th align="center" valign="top" colspan="2">GLM</th>
<th align="center" valign="top" colspan="2">RF</th>
<th align="center" valign="top" colspan="2">GBM</th>
<th align="center" valign="top" colspan="2">DL</th>
<th align="center" valign="top" colspan="2">NB</th>
</tr>
<tr>
<th align="center" valign="top">Validation</th>
<th align="center" valign="top">Test set</th>
<th align="center" valign="top">Validation</th>
<th align="center" valign="top">Test set</th>
<th align="center" valign="top">Validation</th>
<th align="center" valign="top">Test set</th>
<th align="center" valign="top">Validation</th>
<th align="center" valign="top">Test set</th>
<th align="center" valign="top">Validation</th>
<th align="center" valign="top">Test set</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Accuracy</td>
<td align="center" valign="middle">0.854</td>
<td align="center" valign="middle">0.864</td>
<td align="center" valign="middle">0.838</td>
<td align="center" valign="middle">0.947</td>
<td align="center" valign="middle">0.857</td>
<td align="center" valign="middle">0.955</td>
<td align="center" valign="middle">0.819</td>
<td align="center" valign="middle">0.877</td>
<td align="center" valign="middle">0.844</td>
<td align="center" valign="middle">0.889</td>
</tr>
<tr>
<td align="left" valign="middle">AUC</td>
<td align="center" valign="middle">0.917</td>
<td align="center" valign="middle">0.942</td>
<td align="center" valign="middle">0.918</td>
<td align="center" valign="middle">0.986</td>
<td align="center" valign="middle">0.931</td>
<td align="center" valign="middle">0.99</td>
<td align="center" valign="middle">0.901</td>
<td align="center" valign="middle">0.948</td>
<td align="center" valign="middle">0.908</td>
<td align="center" valign="middle">0.944</td>
</tr>
<tr>
<td align="left" valign="middle">Sensitivity</td>
<td align="center" valign="middle">0.771</td>
<td align="center" valign="middle">0.903</td>
<td align="center" valign="middle">0.914</td>
<td align="center" valign="middle">0.991</td>
<td align="center" valign="middle">0.893</td>
<td align="center" valign="middle">0.982</td>
<td align="center" valign="middle">0.800</td>
<td align="center" valign="middle">0.885</td>
<td align="center" valign="middle">0.843</td>
<td align="center" valign="middle">0.885</td>
</tr>
<tr>
<td align="left" valign="middle">Specificity</td>
<td align="center" valign="middle">0.917</td>
<td align="center" valign="middle">0.831</td>
<td align="center" valign="middle">0.779</td>
<td align="center" valign="middle">0.908</td>
<td align="center" valign="middle">0.829</td>
<td align="center" valign="middle">0.931</td>
<td align="center" valign="middle">0.834</td>
<td align="center" valign="middle">0.869</td>
<td align="center" valign="middle">0.845</td>
<td align="center" valign="middle">0.892</td>
</tr>
<tr>
<td align="left" valign="middle">PPV</td>
<td align="center" valign="middle">0.878</td>
<td align="center" valign="middle">0.823</td>
<td align="center" valign="middle">0.762</td>
<td align="center" valign="middle">0.903</td>
<td align="center" valign="middle">0.801</td>
<td align="center" valign="middle">0.925</td>
<td align="center" valign="middle">0.789</td>
<td align="center" valign="middle">0.855</td>
<td align="center" valign="middle">0.808</td>
<td align="center" valign="middle">0.877</td>
</tr>
<tr>
<td align="left" valign="middle">NPV</td>
<td align="center" valign="middle">0.838</td>
<td align="center" valign="middle">0.908</td>
<td align="center" valign="middle">0.922</td>
<td align="center" valign="middle">0.992</td>
<td align="center" valign="middle">0.909</td>
<td align="center" valign="middle">0.984</td>
<td align="center" valign="middle">0.844</td>
<td align="center" valign="middle">0.897</td>
<td align="center" valign="middle">0.874</td>
<td align="center" valign="middle">0.899</td>
</tr>
<tr>
<td align="left" valign="middle">LR+</td>
<td align="center" valign="middle">9.309</td>
<td align="center" valign="middle">5.334</td>
<td align="center" valign="middle">4.137</td>
<td align="center" valign="middle">10.737</td>
<td align="center" valign="middle">5.213</td>
<td align="center" valign="middle">14.189</td>
<td align="center" valign="middle">4.827</td>
<td align="center" valign="middle">6.767</td>
<td align="center" valign="middle">5.448</td>
<td align="center" valign="middle">8.217</td>
</tr>
<tr>
<td align="left" valign="middle">LR&#x2212;</td>
<td align="center" valign="middle">0.249</td>
<td align="center" valign="middle">0.117</td>
<td align="center" valign="middle">0.110</td>
<td align="center" valign="middle">0.010</td>
<td align="center" valign="middle">0.129</td>
<td align="center" valign="middle">0.019</td>
<td align="center" valign="middle">0.240</td>
<td align="center" valign="middle">0.132</td>
<td align="center" valign="middle">0.186</td>
<td align="center" valign="middle">0.129</td>
</tr>
<tr>
<td align="left" valign="middle">F1-Measure</td>
<td align="center" valign="middle">0.821</td>
<td align="center" valign="middle">0.861</td>
<td align="center" valign="middle">0.831</td>
<td align="center" valign="middle">0.945</td>
<td align="center" valign="middle">0.845</td>
<td align="center" valign="middle">0.953</td>
<td align="center" valign="middle">0.794</td>
<td align="center" valign="middle">0.870</td>
<td align="center" valign="middle">0.825</td>
<td align="center" valign="middle">0.881</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>AUC indicates area under the curve; PPV, positive predictive value; NP, negative predictive value; LR&#x2212;, negative likelihood ratio; LR+, positive likelihood ratio; GLM, Generalized linear model; RF, Random forest; GBM, gradient boosting machine; DL, deep neural net; NB, Naive Bayes.</p>
</table-wrap-foot>
</table-wrap>
</sec>
</sec>
<sec id="sec16">
<label>3.3</label>
<title>Overall feature interpretability analysis</title>
<p><xref ref-type="fig" rid="fig7">Figure 7</xref> shows that nodule diameter size is the most important feature, followed by average CT value, solid proportion, NSE, VEGF, CYFRA21-1, SCC, malignancy probability, CEA, and proGRP. Additionally, nodule diameter size, average CT value, malignancy probability, solid proportion, and VEGF were identified as important feature variables shared by both the GBM and logistic regression models.</p>
<fig position="float" id="fig7">
<label>Figure 7</label>
<caption>
<p>Plot of the importance ranking of the GBM model variables in the test set.</p>
</caption>
<graphic xlink:href="fpubh-12-1368217-g007.tif"/>
</fig>
<p><xref ref-type="fig" rid="fig8">Figure 8</xref>, the SHAP summary plot, displays the impact of all features on the predictive performance of the GBM model in the testing set. The x-axis represents the SHAP values, indicating the contribution of features to the overall prediction. A SHAP value greater than 0 indicates a positive contribution, meaning that as the variable&#x2019;s value approaches 1, the likelihood of infiltration in patients increases. For example, on the SHAP plot corresponding to nodule diameter, red points are mainly located to the right of the zero axis, while blue points are more on the left, suggesting that as the nodule diameter increases, the likelihood of infiltrative lesions in lung nodules also increases.</p>
<fig position="float" id="fig8">
<label>Figure 8</label>
<caption>
<p>Summary plot of GBM model SHAP in the test set.</p>
</caption>
<graphic xlink:href="fpubh-12-1368217-g008.tif"/>
</fig>
</sec>
<sec id="sec17">
<label>3.4</label>
<title>Individual feature interpretability analysis</title>
<p>As shown in <xref ref-type="fig" rid="fig9">Figure 9</xref>, partial dependence plots illustrate the impact of individual features on the final discrimination of the GBM model and their distribution in the dataset. Nodule diameter size, malignancy probability, and VEGF are positively correlated with the likelihood of infiltrative lesions. Nodule diameter is mainly distributed below 15&#x2009;mm, but for lung cancer patients falling between 15 and 18 mm, there is a higher likelihood of infiltrative lesions, necessitating regular follow-up. As the average CT value gradually increases, it tends to indicate non-invasive lung cancer, particularly in patients with values above &#x2212;200, essentially ruling out the possibility of infiltrative lung cancer.</p>
<fig position="float" id="fig9">
<label>Figure 9</label>
<caption>
<p>Partial dependence plots.</p>
</caption>
<graphic xlink:href="fpubh-12-1368217-g009.tif"/>
</fig>
<p>The SHAP explanation illustrates the feature contributions for specific instances. As depicted in <xref ref-type="fig" rid="fig10">Figure 10</xref>, for instance 72, with a nodule diameter of 22&#x2009;mm, average CT value of -525HU, and malignancy probability of 86%, these factors significantly contribute to the model&#x2019;s final determination of infiltrative lung cancer. Conversely, in instance 98, although the nodule diameter is below 15&#x2009;mm, predictions of infiltrative lung cancer are made based on factors such as average CT value, NSE value, and malignancy probability.</p>
<fig position="float" id="fig10">
<label>Figure 10</label>
<caption>
<p>SHAP interpretation diagram.</p>
</caption>
<graphic xlink:href="fpubh-12-1368217-g010.tif"/>
</fig>
</sec>
</sec>
<sec sec-type="discussion" id="sec18">
<label>4</label>
<title>Discussion</title>
<p>Lung cancer ranks among the most prevalent and fatal malignancies globally, with adenocarcinoma being the most common histological subtype. Accurate differentiation between non-invasive and invasive lung cancer significantly impacts patient prognosis and survival. Therefore, constructing early lung cancer infiltration risk prediction models is crucial. In recent years, many researchers have built clinical risk prediction models for early lung cancer patients using multivariable logistic regression and selected feature variables such as low-dose CT (LDCT), seven autoantibodies, and other biomarkers (<xref ref-type="bibr" rid="ref17 ref18 ref19 ref20 ref21">17&#x2013;21</xref>). Unlike many previous studies, this research incorporates AI-improved malignancy probability and average CT value into the category of risk factors and compares models constructed by traditional LR regression with those built by AutoML algorithms to assess their efficacy and accuracy.</p>
<p>Feature interpretability analysis results show that the most crucial feature of the GBM model is nodule diameter size, consistent with the results of the logistic regression model in this study and the risk factors for lung nodule benignity/malignancy reported in related studies (<xref ref-type="bibr" rid="ref22">22</xref>, <xref ref-type="bibr" rid="ref23">23</xref>). Other researchers have pointed out that as nodule diameter increases, the likelihood of malignancy also increases. For instance, nodules below 5&#x2009;mm have a malignancy rate of only 1%, while those between 5 and 10 mm have a malignancy rate of 25% (<xref ref-type="bibr" rid="ref24">24</xref>). In this study, we found that nodules larger than 15&#x2009;mm have a higher malignancy probability, particularly between 15 and 18 mm, where infiltration is more likely to occur. Therefore, patients should have shorter follow-up intervals, and clinicians should pay close attention to patients with nodules larger than 15&#x2009;mm, increasing the frequency of follow-up visits. This finding is consistent with other research (<xref ref-type="bibr" rid="ref25">25</xref>, <xref ref-type="bibr" rid="ref26">26</xref>).</p>
<p>With the development and application of artificial intelligence technology, AI-based medical imaging has been widely used in clinical diagnosis and treatment, particularly in lung cancer early screening, significantly improving lung nodule detection rates and reducing the rate of missed small lesions. This study demonstrates that AI-enhanced CT indices significantly contribute to the discrimination of infiltrative lung cancer, enhancing lesion identification accuracy. However, there are limitations. According to previous studies, although CT AI has higher positive predictive values and sensitivity, its specificity is not ideal, ranging from 70 to 80% (<xref ref-type="bibr" rid="ref27 ref28 ref29 ref30">27&#x2013;30</xref>). Therefore, relying solely on radiological imaging to differentiate between benign and malignant lung nodules is too one-sided. This study established a predictive model combining AI with other laboratory indicators to improve the specificity and accuracy of lung nodule detection.</p>
<p>In recent years, laboratory indicators for lung cancer have mainly focused on primary lung cancer biomarkers and seven lung cancer autoantibodies. In contrast to artificial intelligence CT, these indicators have high specificity but low sensitivity when used alone. Therefore, they are typically used in combination for early lung cancer screening. Vascular endothelial growth factor (VEGF) levels serve as an independent risk factor for lung cancer infiltration, as evidenced by significant expression in both LR and GBM models. Studies have shown that VEGF can increase vascular permeability (<xref ref-type="bibr" rid="ref31 ref32 ref33">31&#x2013;33</xref>), thereby promoting tumor metastasis, and its overexpression indicates poor prognosis in lung cancer. Therefore, patients with abnormal VEGF levels should be closely monitored, and further diagnostic and clinical intervention measures should be implemented. Detection of serum lung cancer autoantibodies has a certain clinical decision-making value for lung cancer diagnosis (<xref ref-type="bibr" rid="ref34 ref35 ref36">34&#x2013;36</xref>), although in this study there was a statistically significant difference between the non-infiltrating group and the infiltrating group in the training set, but showed no statistically significant difference between the non-infiltrating group and the infiltrating group in the test set, which indicates that the 7-item serum lung cancer autoantibody test is not suitable to be applied alone in discriminating non-infiltrating versus infiltrating early stage lung cancer, and that it needs to be combined with other indicators for prediction.</p>
<p>In addition, we used five different ML algorithms to construct a high-precision prediction model. The GBM model showed optimal prediction efficacy on both the test and validation sets and achieved higher AUC and accuracy than the LDCT+7-TABBs model constructed by Zhong et al. (<xref ref-type="bibr" rid="ref37">37</xref>), which fully demonstrated that the CT metrics modified by AI are more accurate, and can provide more comprehensive and high-quality information for clinically assisted diagnosis and treatment. By accurately predicting the invasiveness of early lung nodules, this study can help patients receive earlier treatment, thereby improving survival rates and prognosis. The blind validation using a validation set and external dataset with larger sample sizes and higher external validity mitigated potential biases arising from unique circumstances at a single research center. However, our study also has some limitations. Firstly, it only studied benign and infiltrative lung cancer categories, necessitating the expansion of case numbers to further classify lung cancer. Additionally, this study is retrospective, which introduces selection bias, highlighting the need for more prospective studies for external validation.</p>
</sec>
<sec sec-type="conclusions" id="sec19">
<label>5</label>
<title>Conclusion</title>
<p>A predictive early-stage lung cancer infiltrative machine learning model was constructed and compared by combining improved CT indices with serological markers, using SHAP to elucidate the clinical significance of each risk factor in predicting infiltrative lesions in early-stage lung cancer patients. The CT indices improved by artificial intelligence are closely associated with lung cancer infiltrative features, holding significant application value in future clinical research. This combination can assist clinicians in implementing early clinical interventions, providing more comprehensive information for self-screening and disease management of early-stage lung cancer patients, thereby preventing and reducing the risk of infiltration.</p>
</sec>
<sec sec-type="data-availability" id="sec20">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec sec-type="ethics-statement" id="sec21">
<title>Ethics statement</title>
<p>The studies involving humans were approved by Ethics Management Committee of Changshu No.1 People&#x2019;s Hospital. The studies were conducted in accordance with the local legislation and institutional requirements. Written informed consent for participation was not required from the participants or the participants&#x2019; legal guardians/next of kin in accordance with the national legislation and institutional requirements.</p>
</sec>
<sec sec-type="author-contributions" id="sec22">
<title>Author contributions</title>
<p>LM: Writing &#x2013; original draft. PZ: Data curation, Visualization, Writing &#x2013; review &#x0026; editing. KX: Methodology, Writing &#x2013; review &#x0026; editing.</p>
</sec>
</body>
<back>
<sec sec-type="funding-information" id="sec23">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. This work was supported in part by the Suzhou Key Supporting Subjects [Health Informatics (No. SZFCXK202147)], in part by the Changshu Science and Technology Program [No. CS202015 and CS202246], in part by Changshu Key Laboratory of Medical Artificial Intelligence and Big Data [No. CYZ202301 and CS202314], and in part by the &#x201C;333 High Level Personnel Training Project of Jiangsu Province.&#x201D;</p>
</sec>
<sec sec-type="COI-statement" id="sec24">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="sec100" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<fn-group>
<fn id="fn0001">
<p>
<sup>1</sup>
<ext-link xlink:href="http://www.h2o.ai" ext-link-type="uri">www.h2o.ai</ext-link>
</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="ref1">
<label>1.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xia</surname> <given-names>C</given-names></name> <name><surname>Dong</surname> <given-names>X</given-names></name> <name><surname>Li</surname> <given-names>H</given-names></name> <name><surname>Cao</surname> <given-names>M</given-names></name> <name><surname>Sun</surname> <given-names>D</given-names></name> <name><surname>He</surname> <given-names>S</given-names></name> <etal/></person-group>. <article-title>Cancer statistics in China and United States, 2022: profiles, trends, and determinants</article-title>. <source>Chin Med J (Engl).</source> (<year>2022</year>) <volume>135</volume>:<fpage>584</fpage>&#x2013;<lpage>90</lpage>. doi: <pub-id pub-id-type="doi">10.1097/CM9.0000000000002108</pub-id></citation>
</ref>
<ref id="ref2">
<label>2.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Siegel</surname> <given-names>RL</given-names></name> <name><surname>Miller</surname> <given-names>KD</given-names></name> <name><surname>Fuchs</surname> <given-names>HE</given-names></name> <name><surname>Jemal</surname> <given-names>A</given-names></name></person-group>. <article-title>Cancer statistics, 2022</article-title>. <source>CA Cancer J Clin.</source> (<year>2022</year>) <volume>72</volume>:<fpage>7</fpage>&#x2013;<lpage>33</lpage>. doi: <pub-id pub-id-type="doi">10.3322/caac.21708</pub-id></citation>
</ref>
<ref id="ref3">
<label>3.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bassiri</surname> <given-names>A</given-names></name> <name><surname>Badrinathan</surname> <given-names>A</given-names></name> <name><surname>Alvarado</surname> <given-names>CE</given-names></name> <name><surname>Kwak</surname> <given-names>M</given-names></name> <name><surname>Sinopoli</surname> <given-names>J</given-names></name> <name><surname>Tapias Vargas</surname> <given-names>L</given-names></name> <etal/></person-group>. <article-title>Evaluating the Optimal Time Between Diagnosis and Surgical Intervention for Early-Stage Lung Cancer</article-title>. <source>J Surg Res.</source> (<year>2023</year>) <volume>292</volume>:<fpage>297</fpage>&#x2013;<lpage>306</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jss.2023.08.003</pub-id></citation>
</ref>
<ref id="ref4">
<label>4.</label>
<citation citation-type="other"><person-group person-group-type="author"><name><surname>Kong</surname> <given-names>L</given-names></name> <name><surname>Zhang</surname> <given-names>X</given-names></name> <name><surname>Li</surname> <given-names>X</given-names></name> <name><surname>Su</surname> <given-names>Y</given-names></name></person-group>. <article-title>Low-dose Spiral Computed Tomography in Lung Cancer Screening</article-title>. <source>Zhongguo Fei Ai Za Zhi</source>. (<year>2022</year>) <volume>25</volume>:<fpage>678</fpage>&#x2013;<lpage>683</lpage>. Chinese. doi: <pub-id pub-id-type="doi">10.3779/j.issn.1009-3419.2022.101.40</pub-id></citation>
</ref>
<ref id="ref5">
<label>5.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Albahra</surname> <given-names>S</given-names></name> <name><surname>Gorbett</surname> <given-names>T</given-names></name> <name><surname>Robertson</surname> <given-names>S</given-names></name> <name><surname>D&#x2019;Aleo</surname> <given-names>G</given-names></name> <name><surname>Kumar</surname> <given-names>SVS</given-names></name> <name><surname>Ockunzzi</surname> <given-names>S</given-names></name> <etal/></person-group>. <article-title>Artificial intelligence and machine learning overview in pathology &#x0026; laboratory medicine: A general review of data preprocessing and basic supervised concepts</article-title>. <source>Semin Diagn Pathol.</source> (<year>2023</year>) <volume>40</volume>:<fpage>71</fpage>&#x2013;<lpage>87</lpage>. doi: <pub-id pub-id-type="doi">10.1053/j.semdp.2023.02.002</pub-id></citation>
</ref>
<ref id="ref6">
<label>6.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>MacEachern</surname> <given-names>SJ</given-names></name> <name><surname>Forkert</surname> <given-names>ND</given-names></name></person-group>. <article-title>Machine learning for precision medicine</article-title>. <source>Genome.</source> (<year>2021</year>) <volume>64</volume>:<fpage>416</fpage>&#x2013;<lpage>25</lpage>. doi: <pub-id pub-id-type="doi">10.1139/gen-2020-0131</pub-id></citation>
</ref>
<ref id="ref7">
<label>7.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rauschert</surname> <given-names>S</given-names></name> <name><surname>Raubenheimer</surname> <given-names>K</given-names></name> <name><surname>Melton</surname> <given-names>PE</given-names></name> <name><surname>Huang</surname> <given-names>RC</given-names></name></person-group>. <article-title>Machine learning and clinical epigenetics: a review of challenges for diagnosis and classification</article-title>. <source>Clin Epigenetics.</source> (<year>2020</year>) <volume>12</volume>:<fpage>51</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s13148-020-00842-4</pub-id></citation>
</ref>
<ref id="ref8">
<label>8.</label>
<citation citation-type="other"><person-group person-group-type="author"><name><surname>Hou</surname> <given-names>LK</given-names></name> <name><surname>Zhang</surname> <given-names>LP</given-names></name> <name><surname>Huang</surname> <given-names>Y</given-names></name> <name><surname>Dong</surname> <given-names>ZW</given-names></name> <name><surname>Xie</surname> <given-names>HK</given-names></name> <name><surname>Zhang</surname> <given-names>W</given-names></name> <etal/></person-group>. <article-title>Application of the WHO Classification of Thoracic Tumors (2021) grading system in invasive pulmonary adenocarcinoma and its correlation with the targeted genes&#x2019; variations</article-title>. <source>Zhonghua Bing Li Xue Za Zhi</source>. (<year>2023</year>) <volume>52</volume>:<fpage>129</fpage>&#x2013;<lpage>135</lpage>. Chinese. doi: <pub-id pub-id-type="doi">10.3760/cma.j.cn112151-20220927-00814</pub-id></citation>
</ref>
<ref id="ref9">
<label>9.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>&#x0160;uti&#x0107;</surname> <given-names>M</given-names></name> <name><surname>Vuki&#x0107;</surname> <given-names>A</given-names></name> <name><surname>Barana&#x0161;i&#x0107;</surname> <given-names>J</given-names></name> <name><surname>F&#x00F6;rsti</surname> <given-names>A</given-names></name> <name><surname>D&#x017E;ubur</surname> <given-names>F</given-names></name> <name><surname>Samar&#x017E;ija</surname> <given-names>M</given-names></name> <etal/></person-group>. <article-title>Diagnostic, Predictive, and Prognostic Biomarkers in Non-Small Cell Lung Cancer (NSCLC) Management</article-title>. <source>J Pers Med.</source> (<year>2021</year>) <volume>11</volume>:<fpage>1102</fpage>. doi: <pub-id pub-id-type="doi">10.3390/jpm11111102</pub-id></citation>
</ref>
<ref id="ref10">
<label>10.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rastel</surname> <given-names>D</given-names></name> <name><surname>Ramaioli</surname> <given-names>A</given-names></name> <name><surname>Cornillie</surname> <given-names>F</given-names></name> <name><surname>Thirion</surname> <given-names>B</given-names></name></person-group>. <article-title>CYFRA 21-1, a sensitive and specific new tumour marker for squamous cell lung cancer. Report of the first European multicentre evaluation. CYFRA 21-1 Multicentre Study Group</article-title>. <source>Eur J Cancer.</source> (<year>1994</year>) <volume>30A</volume>:<fpage>601</fpage>&#x2013;<lpage>6</lpage>. doi: <pub-id pub-id-type="doi">10.1016/0959-8049(94)90528-2</pub-id></citation>
</ref>
<ref id="ref11">
<label>11.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>F</given-names></name> <name><surname>Wang</surname> <given-names>XY</given-names></name> <name><surname>Han</surname> <given-names>XH</given-names></name> <name><surname>Wang</surname> <given-names>H</given-names></name> <name><surname>Qi</surname> <given-names>J</given-names></name></person-group>. <article-title>Diagnostic value of Cyfra21-1, SCC and CEA for differentiation of early-stage NSCLC from benign lung disease</article-title>. <source>Int J Clin Exp Med.</source> (<year>2015</year>) <volume>8</volume>:<fpage>11295</fpage>&#x2013;<lpage>300</lpage>.</citation>
</ref>
<ref id="ref12">
<label>12.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Doseeva</surname> <given-names>V</given-names></name> <name><surname>Colpitts</surname> <given-names>T</given-names></name> <name><surname>Gao</surname> <given-names>G</given-names></name> <name><surname>Woodcock</surname> <given-names>J</given-names></name> <name><surname>Knezevic</surname> <given-names>V</given-names></name></person-group>. <article-title>Performance of a multiplexed dual analyte immunoassay for the early detection of non-small cell lung cancer</article-title>. <source>J Transl Med.</source> (<year>2015</year>) <volume>13</volume>:<fpage>55</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s12967-015-0419-y</pub-id></citation>
</ref>
<ref id="ref13">
<label>13.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>He</surname> <given-names>T</given-names></name> <name><surname>Wu</surname> <given-names>Z</given-names></name> <name><surname>Xia</surname> <given-names>P</given-names></name> <name><surname>Wang</surname> <given-names>W</given-names></name> <name><surname>Sun</surname> <given-names>H</given-names></name> <name><surname>Yu</surname> <given-names>L</given-names></name> <etal/></person-group>. <article-title>The combination of a seven-autoantibody panel with computed tomography scanning can enhance the diagnostic efficiency of non-small cell lung cancer</article-title>. <source>Front Oncol.</source> (<year>2022</year>) <volume>12</volume>:<fpage>1047019</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fonc.2022.1047019</pub-id></citation>
</ref>
<ref id="ref14">
<label>14.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vickers</surname> <given-names>AJ</given-names></name> <name><surname>Van Calster</surname> <given-names>B</given-names></name> <name><surname>Steyerberg</surname> <given-names>EW</given-names></name></person-group>. <article-title>Net benefit approaches to the evaluation of prediction models, molecular markers, and diagnostic tests</article-title>. <source>BMJ.</source> (<year>2016</year>) <volume>352</volume>:<fpage>i6</fpage>. doi: <pub-id pub-id-type="doi">10.1136/bmj.i6</pub-id></citation>
</ref>
<ref id="ref15">
<label>15.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bang</surname> <given-names>CS</given-names></name> <name><surname>Ahn</surname> <given-names>JY</given-names></name> <name><surname>Kim</surname> <given-names>JH</given-names></name> <name><surname>Kim</surname> <given-names>YI</given-names></name> <name><surname>Choi</surname> <given-names>IJ</given-names></name> <name><surname>Shin</surname> <given-names>WG</given-names></name></person-group>. <article-title>Establishing Machine Learning Models to Predict Curative Resection in Early Gastric Cancer with Undifferentiated Histology: Development and Usability Study</article-title>. <source>J Med Internet Res.</source> (<year>2021</year>) <volume>23</volume>:<fpage>e25053</fpage>. doi: <pub-id pub-id-type="doi">10.2196/25053</pub-id></citation>
</ref>
<ref id="ref16">
<label>16.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lin</surname> <given-names>Y</given-names></name> <name><surname>Chen</surname> <given-names>L</given-names></name> <name><surname>Li</surname> <given-names>R</given-names></name> <name><surname>Liu</surname> <given-names>X</given-names></name> <name><surname>Li</surname> <given-names>Q</given-names></name> <name><surname>Cai</surname> <given-names>J</given-names></name> <etal/></person-group>. <article-title>Survival analysis of patients with advanced non-small cell lung cancer receiving EGFR-TKI treatment of Yunnan in southwestern China: a real-world study</article-title>. <source>Front Oncol.</source> (<year>2023</year>) <volume>13</volume>:<fpage>1156647</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fonc.2023.1156647</pub-id></citation>
</ref>
<ref id="ref17">
<label>17.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Team</surname> <given-names>NLSTR</given-names></name> <name><surname>Aberle</surname> <given-names>DR</given-names></name> <name><surname>Adams</surname> <given-names>AM</given-names></name> <name><surname>Berg</surname> <given-names>CD</given-names></name> <name><surname>Black</surname> <given-names>WC</given-names></name> <name><surname>Clapp</surname> <given-names>JD</given-names></name> <etal/></person-group>. <article-title>Reduced lung-cancer mortality with low-dose computed tomographic screening</article-title>. <source>N Engl J Med.</source> (<year>2011</year>) <volume>365</volume>:<fpage>395</fpage>&#x2013;<lpage>409</lpage>. doi: <pub-id pub-id-type="doi">10.1056/NEJMoa1102873</pub-id></citation>
</ref>
<ref id="ref18">
<label>18.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>N</given-names></name> <name><surname>Tan</surname> <given-names>F</given-names></name> <name><surname>Chen</surname> <given-names>W</given-names></name> <name><surname>Dai</surname> <given-names>M</given-names></name> <name><surname>Wang</surname> <given-names>F</given-names></name> <name><surname>Shen</surname> <given-names>S</given-names></name> <etal/></person-group>. <article-title>National Lung Cancer Screening programme group. One-off low-dose CT for lung cancer screening in China: a multicentre, population-based, prospective cohort study. Lancet</article-title>. <source>Respir Med.</source> (<year>2022</year>) <volume>10</volume>:<fpage>378</fpage>&#x2013;<lpage>91</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S2213-2600(21)00560-9</pub-id></citation>
</ref>
<ref id="ref19">
<label>19.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sullivan</surname> <given-names>FM</given-names></name> <name><surname>Mair</surname> <given-names>FS</given-names></name> <name><surname>Anderson</surname> <given-names>W</given-names></name> <name><surname>Armory</surname> <given-names>P</given-names></name> <name><surname>Briggs</surname> <given-names>A</given-names></name> <name><surname>Chew</surname> <given-names>C</given-names></name> <etal/></person-group>. <article-title>Early Diagnosis of Lung Cancer Scotland (ECLS) Team. Earlier diagnosis of lung cancer in a randomised trial of an autoantibody blood test followed by imaging</article-title>. <source>Eur Respir J.</source> (<year>2021</year>) <volume>57</volume>:<fpage>2000670</fpage>. doi: <pub-id pub-id-type="doi">10.1183/13993003.00670-2020</pub-id></citation>
</ref>
<ref id="ref20">
<label>20.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sexauer</surname> <given-names>D</given-names></name> <name><surname>Gray</surname> <given-names>E</given-names></name> <name><surname>Zaenker</surname> <given-names>P</given-names></name></person-group>. <article-title>Tumour- associated autoantibodies as prognostic cancer biomarkers- a review</article-title>. <source>Autoimmun Rev.</source> (<year>2022r</year>) <volume>21</volume>:<fpage>103041</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.autrev.2022.103041</pub-id></citation>
</ref>
<ref id="ref21">
<label>21.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bi</surname> <given-names>H</given-names></name> <name><surname>Yin</surname> <given-names>L</given-names></name> <name><surname>Fang</surname> <given-names>W</given-names></name> <name><surname>Song</surname> <given-names>S</given-names></name> <name><surname>Wu</surname> <given-names>S</given-names></name> <name><surname>Shen</surname> <given-names>J</given-names></name></person-group>. <article-title>Association of CEA, NSE, CYFRA 21-1, SCC-Ag, and ProGRP with Clinicopathological Characteristics and Chemotherapeutic Outcomes of Lung Cancer</article-title>. <source>Lab Med.</source> (<year>2023</year>) <volume>54</volume>:<fpage>372</fpage>&#x2013;<lpage>9</lpage>. doi: <pub-id pub-id-type="doi">10.1093/labmed/lmac122</pub-id></citation>
</ref>
<ref id="ref22">
<label>22.</label>
<citation citation-type="other"><person-group person-group-type="author"><name><surname>Jin</surname> <given-names>T</given-names></name> <name><surname>He</surname> <given-names>Z</given-names></name> <name><surname>Li</surname> <given-names>Z</given-names></name> <name><surname>Tang</surname> <given-names>J</given-names></name> <name><surname>Xu</surname> <given-names>J</given-names></name> <name><surname>Wu</surname> <given-names>W</given-names></name> <etal/></person-group>. [<article-title>Risk Factors and Sampling Range Evaluation of Lymph node Metastasis for Non-small Cell Lung Cancer with Diameter &#x2264;2 cm</article-title>]. <source>Zhongguo Fei Ai Za Zhi</source>. (<year>2023</year>): <volume>26</volume>:<fpage>507</fpage>&#x2013;<lpage>514</lpage>. Chinese. doi: <pub-id pub-id-type="doi">10.3779/j.issn.1009-3419.2023.102.26</pub-id></citation>
</ref>
<ref id="ref23">
<label>23.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gugulothu</surname> <given-names>V</given-names></name> <name><surname>Balaji</surname> <given-names>S</given-names></name></person-group>. <article-title>An early prediction and classification of lung nodule diagnosis on CT images based on hybrid deep learning techniques[J]</article-title>. <source>Multimedia Tools and Applications</source>. (<year>2022</year>):<fpage>1</fpage>&#x2013;<lpage>21</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11042-023-15802-2</pub-id></citation>
</ref>
<ref id="ref24">
<label>24.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mazzone</surname> <given-names>PJ</given-names></name> <name><surname>Lam</surname> <given-names>L</given-names></name></person-group>. <article-title>Evaluating the Patient With a Pulmonary Nodule: A Review</article-title>. <source>JAMA.</source> (<year>2022</year>) <volume>327</volume>:<fpage>264</fpage>&#x2013;<lpage>73</lpage>. doi: <pub-id pub-id-type="doi">10.1001/jama.2021.24287</pub-id></citation>
</ref>
<ref id="ref25">
<label>25.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gould</surname> <given-names>MK</given-names></name> <name><surname>Donington</surname> <given-names>J</given-names></name> <name><surname>Lynch</surname> <given-names>WR</given-names></name> <name><surname>Mazzone</surname> <given-names>PJ</given-names></name> <name><surname>Midthun</surname> <given-names>DE</given-names></name> <name><surname>Naidich</surname> <given-names>DP</given-names></name> <etal/></person-group>. <article-title>Evaluation of individuals with pulmonary nodules: when is it lung cancer? Diagnosis and management of lung cancer, 3rd ed: American College of Chest Physicians evidence-based clinical practice guidelines</article-title>. <source>Chest.</source> (<year>2013</year>) <volume>143</volume>:<fpage>e93S</fpage>&#x2013;<lpage>e120S</lpage>. doi: <pub-id pub-id-type="doi">10.1378/chest.12-2351</pub-id></citation>
</ref>
<ref id="ref26">
<label>26.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kazerooni</surname> <given-names>EA</given-names></name> <name><surname>Armstrong</surname> <given-names>MR</given-names></name> <name><surname>Amorosa</surname> <given-names>JK</given-names></name> <name><surname>Hernandez</surname> <given-names>D</given-names></name> <name><surname>Liebscher</surname> <given-names>LA</given-names></name> <name><surname>Nath</surname> <given-names>H</given-names></name> <etal/></person-group>. <article-title>ACR CT Accreditation Program and the Lung Cancer Screening Program Designation</article-title>. <source>J Am Coll Radiol.</source> (<year>2016</year>) <volume>13</volume>:<fpage>R30</fpage>&#x2013;<lpage>4</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jacr.2015.12.010</pub-id></citation>
</ref>
<ref id="ref27">
<label>27.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lambin</surname> <given-names>P</given-names></name> <name><surname>Rios-Velazquez</surname> <given-names>E</given-names></name> <name><surname>Leijenaar</surname> <given-names>R</given-names></name> <name><surname>Carvalho</surname> <given-names>S</given-names></name> <name><surname>van Stiphout</surname> <given-names>RG</given-names></name> <name><surname>Granton</surname> <given-names>P</given-names></name> <etal/></person-group>. <article-title>Radiomics: extracting more information from medical images using advanced feature analysis</article-title>. <source>Eur J Cancer.</source> (<year>2012</year>) <volume>48</volume>:<fpage>441</fpage>&#x2013;<lpage>6</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ejca.2011.11.036</pub-id></citation>
</ref>
<ref id="ref28">
<label>28.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Onishi</surname> <given-names>Y</given-names></name> <name><surname>Teramoto</surname> <given-names>A</given-names></name> <name><surname>Tsujimoto</surname> <given-names>M</given-names></name> <name><surname>Tsukamoto</surname> <given-names>T</given-names></name> <name><surname>Saito</surname> <given-names>K</given-names></name> <name><surname>Toyama</surname> <given-names>H</given-names></name> <etal/></person-group>. <article-title>Multiplanar analysis for pulmonary nodule classification in CT images using deep convolutional neural network and generative adversarial networks</article-title>. <source>Int J Comput Assist Radiol Surg.</source> (<year>2020</year>) <volume>15</volume>:<fpage>173</fpage>&#x2013;<lpage>8</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11548-019-02092-z</pub-id></citation>
</ref>
<ref id="ref29">
<label>29.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gao</surname> <given-names>N</given-names></name> <name><surname>Tian</surname> <given-names>S</given-names></name> <name><surname>Li</surname> <given-names>X</given-names></name> <name><surname>Huang</surname> <given-names>J</given-names></name> <name><surname>Wang</surname> <given-names>J</given-names></name> <name><surname>Chen</surname> <given-names>S</given-names></name> <etal/></person-group>. <article-title>Three-Dimensional Texture Feature Analysis of Pulmonary Nodules in CT Images: Lung Cancer Predictive Models Based on Support Vector Machine Classifier</article-title>. <source>J Digit Imaging.</source> (<year>2020</year>) <volume>33</volume>:<fpage>414</fpage>&#x2013;<lpage>22</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10278-019-00238-8</pub-id></citation>
</ref>
<ref id="ref30">
<label>30.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hua</surname> <given-names>KL</given-names></name> <name><surname>Hsu</surname> <given-names>CH</given-names></name> <name><surname>Hidayati</surname> <given-names>SC</given-names></name> <name><surname>Cheng</surname> <given-names>WH</given-names></name> <name><surname>Chen</surname> <given-names>YJ</given-names></name></person-group>. <article-title>Computer-aided classification of lung nodules on computed tomography images via deep learning technique</article-title>. <source>Onco Targets Ther.</source> (<year>2015</year>) <volume>8</volume>:<fpage>2015</fpage>&#x2013;<lpage>22</lpage>. doi: <pub-id pub-id-type="doi">10.2147/OTT.S80733</pub-id></citation>
</ref>
<ref id="ref31">
<label>31.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>Q</given-names></name> <name><surname>Zeng</surname> <given-names>A</given-names></name> <name><surname>Zhu</surname> <given-names>M</given-names></name> <name><surname>Song</surname> <given-names>L</given-names></name></person-group>. <article-title>Dual inhibition of EGFRVEGF: An effective approach to the treatment of advanced nonsmall cell lung cancer with EGFR mutation (Review)</article-title>. <source>Int J Oncol.</source> (<year>2023</year>) <volume>62</volume>:<fpage>26</fpage>. doi: <pub-id pub-id-type="doi">10.3892/ijo.2023.5474</pub-id></citation>
</ref>
<ref id="ref32">
<label>32.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname> <given-names>Y</given-names></name> <name><surname>Guo</surname> <given-names>S</given-names></name> <name><surname>Deng</surname> <given-names>J</given-names></name> <name><surname>Shen</surname> <given-names>J</given-names></name> <name><surname>Du</surname> <given-names>F</given-names></name> <name><surname>Wu</surname> <given-names>X</given-names></name> <etal/></person-group>. <article-title>VEGF/VEGFR-Targeted Therapy and Immunotherapy in Non-small Cell Lung Cancer: Targeting the Tumor Microenvironment</article-title>. <source>Int J Biol Sci.</source> (<year>2022</year>) <volume>18</volume>:<fpage>3845</fpage>&#x2013;<lpage>58</lpage>. doi: <pub-id pub-id-type="doi">10.7150/ijbs.70958</pub-id></citation>
</ref>
<ref id="ref33">
<label>33.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>He</surname> <given-names>X</given-names></name> <name><surname>Wang</surname> <given-names>M</given-names></name></person-group>. <article-title>Application Value of Serum TK1 and PCDGF, CYFRA21-1, NSE, and CEA plus Enhanced CT Scan in the Diagnosis of Nonsmall Cell Lung Cancer and Chemotherapy Monitoring[J]</article-title>. <source>Journal of oncology</source>. (<year>2022</year>) <volume>2022</volume>:<fpage>8800787</fpage>. doi: <pub-id pub-id-type="doi">10.1155/2022/8800787</pub-id></citation>
</ref>
<ref id="ref34">
<label>34.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ren</surname> <given-names>S</given-names></name> <name><surname>Zhang</surname> <given-names>S</given-names></name> <name><surname>Jiang</surname> <given-names>T</given-names></name> <name><surname>He</surname> <given-names>Y</given-names></name> <name><surname>Ma</surname> <given-names>Z</given-names></name> <name><surname>Cai</surname> <given-names>H</given-names></name> <etal/></person-group>. <article-title>Early detection of lung cancer by using an autoantibody panel in Chinese population</article-title>. <source>Oncoimmunology.</source> (<year>2017</year>) <volume>7</volume>:<fpage>e1384108</fpage>. doi: <pub-id pub-id-type="doi">10.1080/2162402X.2017.1384108</pub-id></citation>
</ref>
<ref id="ref35">
<label>35.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Luo</surname> <given-names>B</given-names></name> <name><surname>Mao</surname> <given-names>G</given-names></name> <name><surname>Ma</surname> <given-names>H</given-names></name> <name><surname>Chen</surname> <given-names>S</given-names></name></person-group>. <article-title>The role of seven autoantibodies in lung cancer diagnosis</article-title>. <source>J Thorac Dis.</source> (<year>2021</year>) <volume>13</volume>:<fpage>3660</fpage>&#x2013;<lpage>8</lpage>. doi: <pub-id pub-id-type="doi">10.21037/jtd-21-835</pub-id></citation>
</ref>
<ref id="ref36">
<label>36.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>L</given-names></name> <name><surname>Chang</surname> <given-names>N</given-names></name> <name><surname>Yang</surname> <given-names>T</given-names></name> <name><surname>Lang</surname> <given-names>Y</given-names></name> <name><surname>Zhang</surname> <given-names>Y</given-names></name> <name><surname>Che</surname> <given-names>Y</given-names></name> <etal/></person-group>. <article-title>Development of Diagnosis Model for Early Lung Nodules Based on a Seven Autoantibodies Panel and Imaging Features</article-title>. <source>Front Oncol.</source> (<year>2022</year>) <volume>12</volume>:<fpage>883543</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fonc.2022.883543</pub-id></citation>
</ref>
<ref id="ref37">
<label>37.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>Z</given-names></name> <name><surname>Zhang</surname> <given-names>F</given-names></name> <name><surname>Jiang</surname> <given-names>J</given-names></name> <name><surname>Zhao</surname> <given-names>C</given-names></name> <name><surname>Zhu</surname> <given-names>L</given-names></name> <name><surname>Liu</surname> <given-names>C</given-names></name> <etal/></person-group>. <article-title>Early detection of lung cancer in a real-world cohort <italic>via</italic> tumor-associated immune autoantibody and imaging combination</article-title>. <source>Front Oncol.</source> (<year>2023</year>) <volume>13</volume>:<fpage>1166894</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fonc.2023.1166894</pub-id></citation>
</ref>
</ref-list>
</back>
</article>