<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Oncol.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Oncology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Oncol.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2234-943X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fonc.2026.1668102</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Radiomics-machine learning model for predicting invasiveness of subcentimeter subsolid lung adenocarcinoma: a validation study with external cohort and SHAP interpretability</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Feng</surname><given-names>Wenfeng</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Chang</surname><given-names>Ruiting</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Li</surname><given-names>Tiezhi</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname><given-names>Xiaolong</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Gao</surname><given-names>Zhihong</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3318521/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Yang</surname><given-names>Xu</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Yin</surname><given-names>Yuling</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Zuo</surname><given-names>Yuqiang</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3138193/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Medical Imaging Center, The Second Hospital of Hebei Medical University</institution>, <city>Shijiazhuang</city>, <state>Hebei</state>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>Department of Imaging Center, Harrison International Peace Hospital</institution>, <city>Hengshui</city>, <state>Hebei</state>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff3"><label>3</label><institution>Thoracic surgery, The Second Hospital of Hebei Medical University</institution>, <city>Shijiazhuang</city>, <state>Hebei</state>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff4"><label>4</label><institution>Information Center, The Second Hospital of Hebei Medical University</institution>,&#xa0;<city>Shijiazhuang</city>, <state>Hebei</state>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff5"><label>5</label><institution>Physical Examination Center, The Second Hospital of Hebei Medical University</institution>, <city>Shijiazhuang</city>, <state>Hebei</state>,&#xa0;<country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Yuqiang Zuo, <email xlink:href="mailto:28104668@hebmu.edu.cn">28104668@hebmu.edu.cn</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-03-26">
<day>26</day>
<month>03</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>16</volume>
<elocation-id>1668102</elocation-id>
<history>
<date date-type="received">
<day>17</day>
<month>07</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>09</day>
<month>03</month>
<year>2026</year>
</date>
<date date-type="rev-recd">
<day>06</day>
<month>03</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Feng, Chang, Li, Wang, Gao, Yang, Yin and Zuo.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Feng, Chang, Li, Wang, Gao, Yang, Yin and Zuo</copyright-holder>
<license>
<ali:license_ref start_date="2026-03-26">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Background</title>
<p>Preoperative discrimination of invasive adenocarcinoma (IAC) from pre-invasive lesions in subcentimeter subsolid nodules (SSNs) remains challenging using conventional computed tomography (CT). We aimed to develop and validate an interpretable radiomics-machine learning (ML) model for predicting invasiveness by leveraging SHapley Additive exPlanations (SHAP).</p>
</sec>
<sec>
<title>Methods</title>
<p>In this two-center retrospective study, 177 patients from Hospital 1 (training and internal validation) and 83 patients from Hospital 2 (independent external validation) with surgically confirmed lung adenocarcinoma manifesting as SSNs (&#x2264;1 cm) were enrolled. Radiomic features were then extracted from preoperative CT using the uAI Research Portal. Following a reproducibility assessment (intraclass correlation coefficient &gt;0.75), the minimum Redundancy Maximum Relevance (mRMR) and Least Absolute Shrinkage and Selection Operator (LASSO) regression were applied to select the most predictive features. Three ML classifiers: logistic regression (LR), random forest (RF) and support vector machine (SVM) were trained and validated using a 7:3 cohort split, and the best-performing model was further evaluated in the external validation cohort. Model performance was evaluated by the area under the receiver operating characteristic curve (AUC), sensitivity, specificity, F1 score, calibration, and decision curve analysis (DCA). SHAP analysis was employed to provide global and local model interpretability.</p>
</sec>
<sec>
<title>Results</title>
<p>A set of ten radiomic features was selected to predict invasiveness (IAC prevalence: 44.6%). The LR model demonstrated optimal performance during internal validation (AUC: 0.842; sensitivity: 79.2%; specificity: 73.3%; F1 score: 0.745) and exhibited superior generalizability compared to both the RF and SVM models. In the external validation cohort, the LR model maintained robust diagnostic performance, with an AUC of 0.778 (95%CI: 0.673-0.862), confirming its cross-institutional generalizability. The DCA and PRC curves further confirmed its clinical utility and stability across different institutions. SHAP analysis identified wavelet_HLL_glszm_LowGrayLevelZoneEmphasis (an indicator of necrosis), original_shape_Flatness (reflecting morphological irregularity), and log_firstorder_LoG.Minimum (suggestive of air-trapping) as top predictors of invasiveness. Decision curve analysis confirmed the model&#x2019;s superior clinical utility over empirical management strategies.</p>
</sec>
<sec>
<title>Conclusion</title>
<p>The developed radiomics-LR model robustly predicts invasiveness in subcentimeter SSNs and provides biologically plausible explanations through SHAP. Its balanced performance and inherent interpretability support its potential integration into clinical workflow to aid in surgical decision-making.</p>
</sec>
</abstract>
<kwd-group>
<kwd>decision curve analysis</kwd>
<kwd>explainable artificial intelligence</kwd>
<kwd>lung adenocarcinoma</kwd>
<kwd>machine learning</kwd>
<kwd>radiomics</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This study is supported by the Medical Science Research Project of Hebei (No.20241753).</funding-statement>
</funding-group>
<counts>
<fig-count count="10"/>
<table-count count="3"/>
<equation-count count="0"/>
<ref-count count="34"/>
<page-count count="14"/>
<word-count count="6090"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Cancer Imaging and Image-directed Interventions</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Lung cancer is the most common malignant tumor and the leading cause of cancer-related mortality worldwide (<xref ref-type="bibr" rid="B1">1</xref>). Non-small cell lung cancer (NSCLC) constitutes approximately 85% of all lung cancer cases, with adenocarcinoma being the most prevalent subtype (<xref ref-type="bibr" rid="B2">2</xref>). The widespread adoption of low-dose computed tomography (LDCT) for lung cancer screening has substantially increased the detection of pulmonary nodules, particularly subcentimeter subsolid nodules (SSNs) (<xref ref-type="bibr" rid="B3">3</xref>, <xref ref-type="bibr" rid="B4">4</xref>). SSNs, encompassing both pure ground-glass nodules and part-solid nodules, exhibit heterogeneous biological behaviors, ranging from pre-invasive lesions (e.g., atypical adenomatous hyperplasia) to invasive adenocarcinomas (IAC) (<xref ref-type="bibr" rid="B5">5</xref>).</p>
<p>Accurately characterizing these nodules is critical, as stage I NSCLC (including stage IB disease) demonstrates remarkable heterogeneity in prognosis. Even after complete resection, the 5-year overall survival for stage IB NSCLC is approximately 73%, with recurrence rates as high as 18-29% (<xref ref-type="bibr" rid="B6">6</xref>, <xref ref-type="bibr" rid="B7">7</xref>). Accurate preoperative prediction of invasiveness in SSNs remains a diagnostic challenge, as their imaging characteristics frequently overlap across different pathological subtypes. Conventional radiological assessment depends on subjective morphological characteristics such as size, margin, and solid component proportion, which often suffer from limited diagnostic precision (<xref ref-type="bibr" rid="B8">8</xref>). Furthermore, the role of adjuvant therapy in early-stage NSCLC like stage IB remains a subject of debate (<xref ref-type="bibr" rid="B9">9</xref>), highlighting the need for better tools to identify high-risk patients who might benefit from adjuvant therapy or more extensive resection.</p>
<p>Radiomics, which is an emerging field in quantitative imaging analysis, enables high-throughput extraction of minable data from medical images, capturing subtle patterns imperceptible to the human eye (<xref ref-type="bibr" rid="B10">10</xref>). When combined with machine learning (ML), radiomics facilitates the development of robust predictive models for nodule characterization (<xref ref-type="bibr" rid="B11">11</xref>). Recently, studies have endeavored to predict the invasiveness of subsolid nodules using advanced methodologies. For instance, Zuo et&#xa0;al. developed a combined nomogram integrating deep-learning-assisted CT texture features, achieving robust performance (<xref ref-type="bibr" rid="B12">12</xref>). Similarly, Li et&#xa0;al. demonstrated the superior performance of a stacking ensemble machine learning model that combines radiomic signatures with clinical-radiological features (<xref ref-type="bibr" rid="B13">13</xref>). These studies underscore the potential of quantitative imaging analysis. However, the specific challenge of preoperatively predicting invasiveness in SSNs, which are particularly elusive on conventional CT, remains less explored. Furthermore, the comprehensive integration of explainable AI (XAI) techniques, such as SHAP, is still needed to provide transparent, biologically plausible explanations for model predictions in this specific context (<xref ref-type="bibr" rid="B14">14</xref>). Therefore, this study aims to develop an interpretable radiomics-machine learning model specifically for SSNs, with a strong emphasis on leveraging SHAP analysis to elucidate the model&#x2019;s decision-making process and enhance clinical trustworthiness.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Patient cohorts</title>
<p>A retrospective cohort study was conducted at our institution involving 177 consecutive patients with pathologically confirmed lung adenocarcinoma (LUAD) who underwent surgical resection. Inclusion criteria included: (I) preoperative chest CT imaging, (II) availability of complete clinical and pathological data, (III) &#x2264;14-day interval between preoperative chest CT imaging and surgery, and (IV) radiologically confirmed presence of SSNs on CT imaging. Exclusion criteria were: (I) inadequate CT image quality, (II) incomplete data records, (III) history of neoadjuvant chemotherapy or radiotherapy, and (IV) non-LUAD pathological diagnosis. The study was approved by the Institutional Ethics Committee of the Second Hospital of Hebei Medical University (approval number: 2023-R384) and conducted in accordance with the Declaration of Helsinki. Written informed consent was waived due to the retrospective design. Patients were randomly stratified into training (70%, n=123) and internal validation (30%, n=54) cohorts using a 7:3 ratio. A flowchart detailing patient enrollment is presented in <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>. To evaluate the generalizability of the model, an independent external validation cohort was collected from Harrison International Peace Hospital between 2023 and 2024. A total of 83 patients with subcentimeter SSNs were enrolled using the same inclusion and exclusion criteria as the primary cohort.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Flowchart of study participant enrollment.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-16-1668102-g001.tif">
<alt-text content-type="machine-generated">Flowchart illustrating patient selection for a study of LUAD in a hospital from January 2022 to December 2024. Of 279 patients, inclusion required preoperative chest CT imaging, full clinical and pathological data, imaging within 14 days of surgery, and presence of SSNs. Exclusion applied to poor imaging quality, incomplete data, pre-procedural biopsy or tissue sampling, or pathology not confirming LUAD, leaving 177 patients with 177 SSNs. These were divided into Trian Cohort (one hundred twenty-three), Test Cohort (fifty-four), Pre-IAC Cohort (ninety-eight), and IAC Cohort (seventy-nine).</alt-text>
</graphic></fig>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>CT image acquisition</title>
<p>All participants underwent <bold>preoperative standard-dose</bold> chest CT scans within 2 weeks before surgery. Standard-dose protocols were employed to ensure optimal image quality for the assessment of subcentimeter lesions. During CT acquisition, patients were positioned supine with arms elevated above their heads. The scanning range extended from the thoracic inlet to the adrenal gland region. Helical CT continuous scanning was performed during deep inspiration breath-hold. The CT systems included a GE Optima 660 64-slice helical CT (United States) and a Philips iCT 256-slice helical CT (Netherlands). To minimize potential confounding factors arising from inter-scanner variability, the external validation cohort from Hospital 2 was specifically derived from patients scanned using the same CT platforms as the primary cohort. All CT examinations were performed following the manufacturers&#x2019; default clinical protocols, identical to those described for Hospital 1: (tube voltage 120 kV, tube current 100&#x2013;300 mA with adaptive modulation technology, matrix size 512&#xd7;512, slice thickness 5.0 mm, and spacing 5.0 mm, with reconstruction layer thicknesses of 1.0 mm or 1.25 mm). Thin-section reconstructions (&lt;1.5 mm thickness) using high-resolution and standard algorithms generated lung and mediastinal window images respectively. Notably, all CT images used for radiomic feature extraction were reconstructed using sharp kernels (Lung window algorithms) to optimize the visualization of fine-grained nodular structures. To minimize the impact of reconstruction-induced noise on feature extraction, a standardized preprocessing workflow (including resampling and normalization) was applied as described in Section 2.3. Image analysis utilized: lung window settings (width 1,500 HU, level 600 HU) and mediastinal window settings (width 350 HU, level 40 HU).</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>ROI segmentation and radiomic feature extraction</title>
<p>The overall radiomics workflow is illustrated in <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>. The integrated workflow, including region of interest (ROI) segmentation, radiomic feature extraction, feature selection, and ML model development. Radiomic features were extracted using the uAI Research Portal (<ext-link ext-link-type="uri" xlink:href="https://urp.united-imaging.com/">https://urp.united-imaging.com/</ext-link>; United Imaging Intelligence) (<xref ref-type="bibr" rid="B15">15</xref>). The extraction engine of this platform is based on the PyRadiomics framework, which is compliant with the Imaging Biomarker Standardization Initiative (IBSI) standards, ensuring the reproducibility and standardization of the extracted biomarkers. Processing commenced with automated ROI outlining by the uAI platform, which was subsequently manually revised, and finally confirmed by a single board-certified radiologist with 15 years of experience in thoracic CT. Delineated ROIs encompassed the entire tumor area while systematically excluding blood vessels, bronchi, and pleura where this was feasible. To minimize heterogeneity bias stemming from variable CT scanners or acquisition parameters, standardization procedures were applied post-segmentation:</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Workflow of the radiomic analysis pipeline.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-16-1668102-g002.tif">
<alt-text content-type="machine-generated">Diagram illustrating a radiomics workflow for medical image analysis, including four main steps: ROI segmentation with CT lung images, feature selection with bar and line graphs, model evaluation with ROC, calibration, and precision-recall curves, and model interpretation using SHAP summary and feature importance plots.</alt-text>
</graphic></fig>
<p>(1) gray-level normalization using (window level: -500)/window width 1,500), (2) voxel resampling to 1&#xd7;1&#xd7;1 mm&#xb3; using B-spline interpolation, and (3) gray-level discretization with a bin width of 25. Subsequent radiomic feature extraction proceeded from the processed ROIs. Radiomic features were extracted using the uAI Research Portal (<ext-link ext-link-type="uri" xlink:href="https://urp.united-imaging.com/">https://urp.united-imaging.com/</ext-link>; United Imaging Intelligence). The extraction engine of this platform is based on the PyRadiomics framework, which is compliant with the Imaging Biomarker Standardization Initiative (IBSI) standards, ensuring the reproducibility and standardization of the extracted biomarkers</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Assessment of radiomic feature reproducibility</title>
<p>To ensure the reliability and stability of the extracted radiomic features, we assessed their inter-observer reproducibility. A subset of 30 patients was randomly selected from the entire cohort. The regions of interest (ROIs) for these patients were independently re-segmented by a second radiologist with over 10 years of experience in thoracic imaging, who was blinded to the initial segmentation results and pathological diagnoses. The Intra-class Correlation Coefficient (ICC) was then calculated using a two-way random-effects model for absolute agreement [ICC (2, 1)] to quantify the consistency of feature values between the two observers. Features with an ICC value greater than 0.75 were considered to have excellent reproducibility and were retained for subsequent analysis, while features with ICC &#x2264; 0.75 were excluded to minimize variability introduced by segmentation differences.</p>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>Radiomic feature selection</title>
<p>A total of 2,264 radiomic features were extracted from SSN ROIs, encompassing 14 shape features, 450 first-order features, 400 Grey Level Size Zone Matrix (GLSZM) features, 525 Grey Level Co-occurrence Matrix (GLCM) features, 350 Grey Level Dependence Matrix (GLDM) features, 400 Grey Level Run Length Matrix (GLRLM) features, and 125 Neighborhood Gray-Level Difference Matrix (NGLDM) features. Following reproducibility assessment, 365 features (16.12%) demonstrated good reproducibility (ICC&gt;0.75). Next, the mRMR method was utilized to select the top 14 features with the highest relevance to the pathological invasiveness and the lowest redundancy. Finally, LASSO regressions with 10-fold cross-validation was performed to further refine the feature set. The optimal regularization parameter (&#x3bb;=0.242) was determined based on the minimum binomial deviance criteria, which ultimately identified 10 key radiomic features for the construction of the signature (<xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>).</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Radiomics feature selection using LASSO logistic regression. <bold>(A)</bold> LASSO coefficient path plot (based on ROI): Ten-fold cross-validation for tunning the LASSO regularization parameter (&#x3b1;). The minimum cross-validation error occurred at log (&#x3b1;)=0.0242, selected as the optimal value. The final LASSO logistic regression model was fitted using this optimal &#x3b1;. <bold>(B)</bold> LASSO coefficient profile plot (based on ROI): Trajectories of the radiomics features coefficients as log (&#x3b1;) decreases. The number of features with non-zero coefficients decrease with increasing regularization strength, demonstrating LASSO&#x2019;s feature selection property. <bold>(C)</bold> Selected radiomics features after LASSO Screening (based on ROI): The subset of 10 features retained by the LASSO model using the optimal &#x3b1; determined in <bold>(A)</bold>, selected to prevent overfitting.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-16-1668102-g003.tif">
<alt-text content-type="machine-generated">Panel A shows a line chart with error bars depicting AUC values versus the negative log of alpha, indicating model performance across feature quantities. Panel B presents coefficient trajectories for ten features with a dashed line marking the selected alpha, each colored and labeled in the legend. Panel C contains a horizontal bar graph illustrating the coefficients’ magnitude and sign for the ten selected features, each clearly labeled on the left.</alt-text>
</graphic></fig>
</sec>
<sec id="s2_6">
<label>2.6</label>
<title>Model development&#x200b;, hyperparameter tuning and validation</title>
<p>Three machine learning classifiers: logistic regression (LR), random forest (RF), and support vector machine (SVM), were trained using the 10-feature signature. To ensure optimal performance and mitigate overfitting, a systematic hyperparameter tuning process was employed via 5-fold cross-validation on the training cohort, which maintained a sufficient number of samples in each validation fold for reliable performance estimation. Separately, a 10-fold cross-validation was chosen for LASSO feature selection to ensure the stability of the penalty parameter (&#x3bb;) identification.</p>
<p>LR: We utilized the L2 (Ridge) regularization to prevent overfitting. The hyperparameter C, which is the inverse of regularization strength, was optimized. A grid search was performed over a range of values: C = [0.001, 0.01, 0.1, 1, 10, 100].</p>
<p>RF: Key hyperparameters were tuned to balance model complexity and generalization. The optimization process involved: n_estimators: The number of trees in the forest, tested at [50, 100, 200]. max_depth: The maximum depth of the tree, explored with values [3, 5, 7, None]. min_samples_split: The minimum number of samples required to split an internal node, tested at (<xref ref-type="bibr" rid="B2">2</xref>, <xref ref-type="bibr" rid="B5">5</xref>, <xref ref-type="bibr" rid="B10">10</xref>). SVM: A linear kernel was selected for the SVM classifier due to its suitability for high-dimensional data, and to maintain model interpretability. The hyperparameter C was optimized over the same range as for LR: [0.001, 0.01, 0.1, 1, 10, 100]. The optimal hyperparameters for each model were defined as the configuration yielding the highest mean AUC during 5-fold cross-validation on the training set. The final models were then refit using the entire training cohort with these optimal parameters. To ensure the methodological rigor and transparency of our radiomics workflow, the study was designed and reported following the Radiomics Quality Score (RQS). The detailed item-by-item calculation is provided in <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table&#xa0;1</bold></xref>. Model performance was evaluated using AUC, sensitivity, specificity, F1 score, ROC curves, calibration curves, precision-recall curves, and DCA.</p>
</sec>
<sec id="s2_7">
<label>2.7</label>
<title>Model interpretability&#x200b; SHAP analysis provided global interpretability by means of a summary plot,</title>
<p>illustrating feature contributions to the LR model predictions. Features were ranked by importance, with colored dots representing individual LUAD patients (yellow = high risk, purple = low risk) across rows corresponding to each feature. This visualization demonstrated how predicted risk levels varied according to feature-specific contributions. Complementary waterfall plots facilitated local interpretability by depicting feature-wise impacts on individual predictions.</p>
</sec>
<sec id="s2_8">
<label>2.8</label>
<title>Statistical analysis</title>
<p>All analyses were performed using R (v4.1.3). Continuous variables were assessed for normality using Shapiro-Wilk tests and reported as the mean &#xb1; standard deviation (normally distributed) or median (interquartile range, IQR) (non-normally distributed). Group comparisons used independent <italic>t</italic>-tests or Mann-Whitney U tests, respectively. Categorical variables were compared using &#x3c7;&#xb2; or Fisher&#x2019;s exact tests. Univariable and multivariable logistic regression identified independent predictors of IAC. Model performance was evaluated using receiver operating characteristic (ROC) curves (area under the curve, AUC), calibration curves (assessing predictive accuracy), precision recall curve (visualizing the tradeoff between precision-positive predictive value and recall sensitivity across different classification thresholds), and decision curve analysis (DCA) to quantify clinical utility. Model discrimination was compared using the DeLong test. Statistical significance was defined as two-sided <italic>p</italic> &lt; 0.05.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Patient characteristics</title>
<p>The study enrolled 177 patients with LUAD (mean age: 52.24 years &#xb1;11.10[SD]; 40 males), who were divided into a training cohort (<italic>n</italic> = 123) and an internal validation cohort (<italic>n</italic> = 54). IAC was pathologically confirmed in 79 patients (44.63%). The baseline characteristics were comparable between cohorts (<xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>).</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Clinical characteristics of patients.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Characteristic</th>
<th valign="middle" colspan="3" align="center">Training cohort (n=123)</th>
<th valign="middle" colspan="3" align="center">Validation cohort (n=54)</th>
</tr>
<tr>
<th valign="middle" align="center">Non-IAC (n=67)</th>
<th valign="middle" align="center">IAC (n=56)</th>
<th valign="middle" align="center"><italic>p</italic> value</th>
<th valign="middle" align="center">Non-IAC (n=31)</th>
<th valign="middle" align="center">IAC (n=23)</th>
<th valign="middle" align="center"><italic>p</italic> value</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">Gender, n (%)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.489<sup>a</sup></td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.957 <sup>a</sup></td>
</tr>
<tr>
<td valign="middle" align="center">Female</td>
<td valign="middle" align="center">57(85.07)</td>
<td valign="middle" align="center">45(80.36)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">20(64.52)</td>
<td valign="middle" align="center">15(65.22)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Male</td>
<td valign="middle" align="center">10(14.93)</td>
<td valign="middle" align="center">11(19.64)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">11(35.48)</td>
<td valign="middle" align="center">8(34.78)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Age, median (IQR)</td>
<td valign="middle" align="center">50.00(18.00)</td>
<td valign="middle" align="center">56.00(13.75)</td>
<td valign="middle" align="center">0.031 <sup>b</sup></td>
<td valign="middle" align="center">46.00(19.00)</td>
<td valign="middle" align="center">57.00(20.00)</td>
<td valign="middle" align="center">0.069 <sup>b</sup></td>
</tr>
<tr>
<td valign="middle" align="center">BMI, median (IQR)</td>
<td valign="middle" align="center">24.14(6.28)</td>
<td valign="middle" align="center">25.61(5.78)</td>
<td valign="middle" align="center">0.178 <sup>b</sup></td>
<td valign="middle" align="center">24.34(3.32)</td>
<td valign="middle" align="center">26.35(4.03)</td>
<td valign="middle" align="center">0.019 <sup>b</sup></td>
</tr>
<tr>
<td valign="middle" align="center">Max diameter, median (IQR)</td>
<td valign="middle" align="center">8.00(2.80)</td>
<td valign="middle" align="center">8.50(3.00)</td>
<td valign="middle" align="center">0.401<sup>b</sup></td>
<td valign="middle" align="center">8.00(3.00)</td>
<td valign="middle" align="center">9.00(2.00)</td>
<td valign="middle" align="center">0.002 <sup>b</sup></td>
</tr>
<tr>
<td valign="middle" align="center">Family history of lung cancer</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.372 <sup>a</sup></td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.426<sup>c</sup></td>
</tr>
<tr>
<td valign="middle" align="center">No</td>
<td valign="middle" align="center">59(88.06)</td>
<td valign="middle" align="center">52(92.86)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">28(90.32)</td>
<td valign="middle" align="center">22(95.65)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Yes</td>
<td valign="middle" align="center">8(11.94)</td>
<td valign="middle" align="center">4(7.14)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">3(9.68)</td>
<td valign="middle" align="center">1(4.35)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Smoking</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.258 <sup>a</sup></td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.741<sup>a</sup></td>
</tr>
<tr>
<td valign="middle" align="center">No</td>
<td valign="middle" align="center">58(86.57)</td>
<td valign="middle" align="center">52(92.86)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">24(77.42)</td>
<td valign="middle" align="center">19(82.61)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Yes</td>
<td valign="middle" align="center">9(13.43)</td>
<td valign="middle" align="center">4(7.14)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">7(22.58)</td>
<td valign="middle" align="center">4(17.39)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Drinking</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.529 <sup>a</sup></td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">1.000<sup>c</sup></td>
</tr>
<tr>
<td valign="middle" align="center">No</td>
<td valign="middle" align="center">62(92.54)</td>
<td valign="middle" align="center">50(89.29)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">28(90.32)</td>
<td valign="middle" align="center">20(86.96)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Yes</td>
<td valign="middle" align="center">5(7.46)</td>
<td valign="middle" align="center">6(10.71)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">3(9.68)</td>
<td valign="middle" align="center">3(13.04)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Location (upper lobe)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.180<sup>a</sup></td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.975<sup>a</sup></td>
</tr>
<tr>
<td valign="middle" align="center">No</td>
<td valign="middle" align="center">22(32.84)</td>
<td valign="middle" align="center">25(44.64)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">19(61.29)</td>
<td valign="middle" align="center">14(60.87)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Yes</td>
<td valign="middle" align="center">45(67.16)</td>
<td valign="middle" align="center">31(55.36)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">12(38.71)</td>
<td valign="middle" align="center">9(39.13)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Tumor-Lung interface</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.289<sup>c</sup></td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.253<sup>c</sup></td>
</tr>
<tr>
<td valign="middle" align="center">Ill defined</td>
<td valign="middle" align="center">6(8.96)</td>
<td valign="middle" align="center">2(3.57)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">3(9.68)</td>
<td valign="middle" align="center">0(0.00)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Well difined</td>
<td valign="middle" align="center">61(91.04)</td>
<td valign="middle" align="center">54(96.43)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">28(90.32)</td>
<td valign="middle" align="center">23(100.00)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Lobulated sign</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.393 <sup>a</sup></td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">No</td>
<td valign="middle" align="center">29(43.28)</td>
<td valign="middle" align="center">20(35.71)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">23(74.19)</td>
<td valign="middle" align="center">10(43.48)</td>
<td valign="middle" align="center">0.022<sup>a</sup></td>
</tr>
<tr>
<td valign="middle" align="center">Yes</td>
<td valign="middle" align="center">38(56.72)</td>
<td valign="middle" align="center">36(64.29)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">8(25.81)</td>
<td valign="middle" align="center">13(56.52)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Spiculated sign</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.294 <sup>a</sup></td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.052<sup>a</sup></td>
</tr>
<tr>
<td valign="middle" align="center">No</td>
<td valign="middle" align="center">41(59.42)</td>
<td valign="middle" align="center">29(51.79)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">24(77.42)</td>
<td valign="middle" align="center">12(52.17)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Yes</td>
<td valign="middle" align="center">26(40.58)</td>
<td valign="middle" align="center">27(48.21)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">7(22.58)</td>
<td valign="middle" align="center">11(47.83)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">VCS</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.015 <sup>a</sup></td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.055<sup>a</sup></td>
</tr>
<tr>
<td valign="middle" align="center">No</td>
<td valign="middle" align="center">53(79.10)</td>
<td valign="middle" align="center">33(58.93)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">25(80.65)</td>
<td valign="middle" align="center">13(56.52)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Yes</td>
<td valign="middle" align="center">14(20.90)</td>
<td valign="middle" align="center">23(41.07)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">6(19.35)</td>
<td valign="middle" align="center">10(43.48)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Pleural indentation sign</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.847 <sup>a</sup></td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.724<sup>a</sup></td>
</tr>
<tr>
<td valign="middle" align="center">No</td>
<td valign="middle" align="center">43(64.18)</td>
<td valign="middle" align="center">35(62.50)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">19(61.29)</td>
<td valign="middle" align="center">13(56.52)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Yes</td>
<td valign="middle" align="center">24(35.82)</td>
<td valign="middle" align="center">21(37.50)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">12(38.71)</td>
<td valign="middle" align="center">10(43.48)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Vacuole sign</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.734 <sup>a</sup></td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.005<sup>a</sup></td>
</tr>
<tr>
<td valign="middle" align="center">No</td>
<td valign="middle" align="center">52(77.61)</td>
<td valign="middle" align="center">42(75.00)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">27(87.10)</td>
<td valign="middle" align="center">12(52.17)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Yes</td>
<td valign="middle" align="center">15(22.39)</td>
<td valign="middle" align="center">14(25.00)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">4(12.90)</td>
<td valign="middle" align="center">11(47.83)</td>
<td valign="middle" align="center"/>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>a, Chi-square test; b, Mann-Whiteney U test; c, Fisher Exact Chi-square test.</p></fn>
<fn>
<p>IAC, Invasive Adenocarcinoma; IQR, &#x200b;Interquartile Range; CVS, Convergence Vascular Sign.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Radiomic prediction model development and validation</title>
<p>Ten radiomic features were used to train three ML classifiers (LR, RFC, and SVM) to construct and validate the radiomic ML models. Performance metrics of the radiomic models are shown in <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>, <xref ref-type="fig" rid="f4"><bold>Figures&#xa0;4</bold></xref>&#x2013;<xref ref-type="fig" rid="f7"><bold>7</bold></xref>. To evaluate model stability and mitigate potential overfitting given the cohort size, the performance of all three classifiers was also assessed using 5-fold cross-validation on the entire dataset (n=177). The mean AUCs from the cross-validation were: LR: 0.835 &#xb1; 0.055 (95% CI: 0.767-0.903), RF: 0.806 &#xb1; 0.066 (95% CI: 0.724-0.888), and SVM: 0.809 &#xb1; 0.075 (95% CI: 0.716-0.902). The strong concordance between the cross-validation results and the performance on the held-out internal validation cohort (<xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>) underscores the robustness and generalizability of our models, particularly the LR classifier. The LR model was selected as the optimal predictor based on four key criteria: (1) superior generalizability: minimal AUC decline from training to testing (AUC decline: 0.031 vs. RF: 0.089; SVM: 0.068), indicating superior robustness against overfitting. (2) clinically balanced performance: harmonized sensitivity (0.792) and specificity (0.733), mitigating false negative (critical for cancer diagnosis) and false positive risks; (3) robust clinical utility: highest F1 score (0.745) and narrower AUC 95%CI: 0.738&#x2013;0.946 vs. RF/SVM; (4) compatibility with clinical interpretability frameworks: linear architecture enables transparent SHAP-based explanations. The detailed distribution of correct and incorrect classifications is illustrated in the confusion matrix for the internal test set (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;2A</bold></xref>).</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Radiomic model performance.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Methods</th>
<th valign="middle" colspan="2" align="center">AUC (95%CI)</th>
<th valign="middle" colspan="2" align="center">Sensitivity</th>
<th valign="middle" colspan="2" align="center">Specificity</th>
<th valign="middle" colspan="2" align="center">Accuracy</th>
<th valign="middle" colspan="2" align="center">F1 score</th>
</tr>
<tr>
<th valign="middle" align="center">Train</th>
<th valign="middle" align="center">Test</th>
<th valign="middle" align="center">Train</th>
<th valign="middle" align="center">Test</th>
<th valign="middle" align="center">Train</th>
<th valign="middle" align="center">Test</th>
<th valign="middle" align="center">Train</th>
<th valign="middle" align="center">Test</th>
<th valign="middle" align="center">Train</th>
<th valign="middle" align="center">Test</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">LR</td>
<td valign="middle" align="center">0.873(0.809-0.938)</td>
<td valign="middle" align="center">0.842(0.738-0.946)</td>
<td valign="middle" align="center">0.727</td>
<td valign="middle" align="center">0.792</td>
<td valign="middle" align="center">0.868</td>
<td valign="middle" align="center">0.733</td>
<td valign="middle" align="center">0.805</td>
<td valign="middle" align="center">0.759</td>
<td valign="middle" align="center">0.769</td>
<td valign="middle" align="center">0.745</td>
</tr>
<tr>
<td valign="middle" align="center">RF</td>
<td valign="middle" align="center">0.913(0.864-0.961)</td>
<td valign="middle" align="center">0.824(0.715-0.932)</td>
<td valign="middle" align="center">0.891</td>
<td valign="middle" align="center">0.833</td>
<td valign="middle" align="center">0.721</td>
<td valign="middle" align="center">0.600</td>
<td valign="middle" align="center">0.797</td>
<td valign="middle" align="center">0.704</td>
<td valign="middle" align="center">0.797</td>
<td valign="middle" align="center">0.714</td>
</tr>
<tr>
<td valign="middle" align="center">SVM</td>
<td valign="middle" align="center">0.896(0.839-0.952)</td>
<td valign="middle" align="center">0.828(0.718-0.937)</td>
<td valign="middle" align="center">0.727</td>
<td valign="middle" align="center">0.708</td>
<td valign="middle" align="center">0.708</td>
<td valign="middle" align="center">0.853</td>
<td valign="middle" align="center">0.797</td>
<td valign="middle" align="center">0.685</td>
<td valign="middle" align="center">0.762</td>
<td valign="middle" align="center">0.667</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>LR, Logistic Regression; RF, Random Forest; SVM, Support Vector Machine; AUC, Area Under Curve.</p></fn>
</table-wrap-foot>
</table-wrap>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>The ROC curve for the different ML models in the <bold>(A)</bold> training cohort and <bold>(B)</bold> internal validation cohort.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-16-1668102-g004.tif">
<alt-text content-type="machine-generated">Two side-by-side ROC curve plots labeled A and B compare the performance of three classification models—logistic regression (blue), random forest (red), and support vector machine (green)—with area under the curve values listed in matching colors for each model. Panel A shows higher AUC values for all models compared to panel B.</alt-text>
</graphic></fig>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>The calibration curve for the different ML models in the <bold>(A)</bold> training cohort and <bold>(B)</bold> internal validation cohort.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-16-1668102-g005.tif">
<alt-text content-type="machine-generated">Two side-by-side calibration plots labeled A and B compare observed versus predicted probabilities for three models: logistic regression (blue), random forest (red), and support vector machine (green). Both plots show curves for each model, a black dashed diagonal line representing ideal calibration, and axes ranging from zero to one for predicted and observed probability. A legend at the bottom right of each plot identifies the models by color.</alt-text>
</graphic></fig>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>The precision-recall curve of different ML models in the <bold>(A)</bold> training cohort and <bold>(B)</bold> internal validation cohort.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-16-1668102-g006.tif">
<alt-text content-type="machine-generated">Side-by-side data visualization showing two precision-recall (PR) curve line graphs labeled A and B. Each graph compares logistic regression (blue), random forest (red), and support vector machine (green) models. Graph A shows higher PR performance with AUPR scores: LR 0.84, RF 0.90, SVM 0.88. Graph B shows slightly lower AUPR scores: LR 0.82, RF 0.79, SVM 0.80. Precision is on the y-axis, recall on the x-axis.</alt-text>
</graphic></fig>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>The decision curve analysis for the different ML models in the <bold>(A)</bold> training cohort and <bold>(B)</bold> internal validation cohort showed that the LR provided a higher net benefit than RF, SVM models across most threshold probabilities.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-16-1668102-g007.tif">
<alt-text content-type="machine-generated">Panel A and Panel B present line graphs comparing standardized net benefit versus high risk threshold for three models: logistic regression in blue, random forest in red, and support vector machine in green, along with two reference lines labeled All in gray and None in black, with slight variations in the curves between the panels.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Statistical comparison of model performance</title>
<p>To objectively evaluate the relative performance of the classifiers, DeLong&#x2019;s test was used to compare their ROC curves derived from the internal validation cohort. The results of these pairwise comparisons are summarized in <xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref>. No statistically significant difference in AUC was found between the top-performing LR model and the RF model (p = 0.589). Similarly, the difference in AUC between the LR and SVM models was not statistically significant (p = 0.372). The comparison between RF and SVM also yielded a non-significant result (p = 0.889).</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Statistical comparison of classifier performance in the validation cohort using DeLong&#x2019;s test.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Comparison (validation cohort)</th>
<th valign="middle" align="left">Z</th>
<th valign="middle" align="left">p value</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">LR vs. RF</td>
<td valign="middle" align="left">0.540</td>
<td valign="middle" align="left">0.5889</td>
</tr>
<tr>
<td valign="middle" align="left">LR vs. SVM</td>
<td valign="middle" align="left">0.893</td>
<td valign="middle" align="left">0.3716</td>
</tr>
<tr>
<td valign="middle" align="left">RF vs. SVM</td>
<td valign="middle" align="left">0.140</td>
<td valign="middle" align="left">0.8888</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Performance in the external validation cohort</title>
<p>The optimal model (LR) was tested on the external cohort to evaluate its institutional robustness. Despite being trained on Center 1 data, the model achieved an AUC of 0.778, with a sensitivity of 73.91% and specificity of 71.67% (<xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8</bold></xref>). To further visualize the model&#x2019;s diagnostic accuracy across centers, the confusion matrix for the external validation cohort is presented in <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;2B</bold></xref>. These results, presented via independent ROC, PRC, and DCA curves (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;1</bold></xref>), demonstrate that our radiomics-ML pipeline is highly effective when applied to external data acquired under standardized scanning conditions.</p>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>Model interpretability using SHAP. <bold>(A)</bold> SHAP bar chart: Feature importance ranking based on mean absolute SHAP values. <bold>(B)</bold> SHAP bees warm plot: Distribution of feature impacts on model output (prediction probability); yellow/purple indicate higher/lower feature values, respectively.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-16-1668102-g008.tif">
<alt-text content-type="machine-generated">Bar chart (A) displays SHAP summary values for a logistic regression model, ranking features by mean absolute SHAP value, while scatter plot (B) visualizes individual SHAP feature impacts with color gradients indicating low to high feature values. Both plots focus on feature importance in model predictions.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Model interpretation</title>
<p>SHAP analysis was performed to quantify feature importance and provide both global and local model interpretability (<xref ref-type="fig" rid="f9"><bold>Figure&#xa0;9</bold></xref>). The top predictive features were ranked by their mean absolute SHAP values in a bar chart (<xref ref-type="fig" rid="f9"><bold>Figure&#xa0;9A</bold></xref>), while a beeswarm plot was utilized to illustrate the directionality of feature impacts, where yellow and purple dots represent higher and lower feature values, respectively (<xref ref-type="fig" rid="f9"><bold>Figure&#xa0;9B</bold></xref>). Additionally, force plots were generated to visualize feature-level contributions to individual predictions (<xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10</bold></xref>). Through this analysis, SHAP identified wavelet_glszm_wavelet.HLL.LowGrayLevelZoneEmphasis, original_shape_Flatness, and log_firstorder_log.sigma.4.0.mm.3D.Minimum as the top discriminative features.</p>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>Performance evaluation of the logistic regression (LR) model in the external validation cohort. <bold>(A)</bold> Receiver operating characteristic (ROC) curve demonstrating the discriminative ability of the model (AUC = 0.778). <bold>(B)</bold> Calibration curve illustrating the agreement between predicted and observed probabilities of invasiveness. <bold>(C)</bold> Precision-recall (PR) curve showing the trade-off between precision and recall (PR = 0.64). <bold>(D)</bold> Decision curve analysis (DCA) evaluating the clinical net benefit of the model across a range of threshold probabilities.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-16-1668102-g009.tif">
<alt-text content-type="machine-generated">Panel A shows a blue ROC curve for a binary classifier with an area under the curve of zero point seven seven eight. Panel B is a calibration plot comparing predicted and observed probabilities with a blue calibration line and black dashed reference. Panel C displays a blue precision-recall curve with area under the curve zero point six four. Panel D is a decision curve showing standardized net benefit versus high risk threshold for Class 1 probability, with reference lines labeled All and None.</alt-text>
</graphic></fig>
<fig id="f10" position="float">
<label>Figure&#xa0;10</label>
<caption>
<p>SHAP force plots illustrating the contribution of individual features to the final prediction probability for two representative cases. SHAP values elucidated personalized risk profiles, aiding clinicians in tailored treatment planning. <bold>(A)</bold> A 34-year-old female patient with minimally invasive adenocarcinoma (MIA), showing a lower predicted probability of invasiveness. <bold>(B)</bold> A 50-year-old male patient with invasive adenocarcinoma (IAC), showing a higher predicted probability of invasiveness.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-16-1668102-g010.tif">
<alt-text content-type="machine-generated">SHAP waterfall plots labeled A and B compare feature contributions to machine learning predictions. Plot A shows negative feature impacts in magenta, leading to a final prediction of 0.132, while plot B displays positive contributions in yellow, resulting in a prediction of 0.684. Both plots annotate individual feature effects and expected value, E[f(x)] = 0.444, along the prediction axis.</alt-text>
</graphic></fig>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>Our study addresses two critical gaps in the radiomics literature on pulmonary nodules. First, we specifically targeted the diagnostic challenge of SSNs&#x2014;a population increasingly detected by LDCT screening yet notably underrepresented in prior research, which often prioritized larger, solid lesions (<xref ref-type="bibr" rid="B16">16</xref>, <xref ref-type="bibr" rid="B17">17</xref>). The management of these minute nodules remains highly controversial due to the limitations of subjective CT assessment (<xref ref-type="bibr" rid="B18">18</xref>, <xref ref-type="bibr" rid="B19">19</xref>), creating a pressing unmet clinical need. Second, and more importantly, we moved beyond the common &#x201c;black-box&#x201d; paradigm by making model interpretability a cornerstone of our approach. Through the comprehensive integration of SHAP analysis, we not only predict invasiveness but also elucidate the underlying reasoning by identifying the most predictive features and explaining their directionality and potential biological significance. This commitment to transparency is a critical step toward building clinical trust and facilitating the integration of artificial intelligence tools into real-world decision-making processes (<xref ref-type="bibr" rid="B20">20</xref>, <xref ref-type="bibr" rid="B21">21</xref>).</p>
<p>Our findings directly address the limitations of conventional CT assessment for SSNs (<xref ref-type="bibr" rid="B16">16</xref>, <xref ref-type="bibr" rid="B22">22</xref>, <xref ref-type="bibr" rid="B23">23</xref>). Although qualitative features such as vacuoles and vascular convergence showed statistical association with invasiveness in univariate analysis (<xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>), their diagnostic consistency was inadequate across cohorts. Radiomics objectively quantified an otherwise imperceptible tumor heterogeneity, yielding significantly improved diagnostic precision. The robust performance of the LR model (AUC 0.842) underscores radiomics&#x2019; potential to augment subjective evaluation, particularly for nodules displaying equivocal morphological features. The LR model in our study demonstrated balanced and robust performance (AUC: 0.842) in the internal validation, a result comparable to the advanced models reported for larger SSNs, such as the deep-learning-assisted nomogram by Zuo et&#xa0;al. (<xref ref-type="bibr" rid="B12">12</xref>) and the stacking ensemble classifier by Li et&#xa0;al. (<xref ref-type="bibr" rid="B13">13</xref>). This indicates that a well-constructed radiomics model can achieve high diagnostic accuracy without necessarily resorting to complex architectures. A critical finding from our statistical comparison was that DeLong&#x2019;s test confirmed no significant difference in AUC between the top-performing LR model and the RF or SVM models. This statistical equivalence is pivotal, as it shifts the model selection criteria from pure discriminatory power to a holistic evaluation of generalizability, stability, and clinical applicability. Consequently, the selection of the LR model is strongly justified by its optimal balance of three factors. First, it exhibited superior generalizability, evidenced by the minimal AUC decline from training to validation (&#x394;AUC 0.031), which was relatively less than that of RF (0.089) or SVM (0.068), demonstrating robust resistance to overfitting (<xref ref-type="bibr" rid="B24">24</xref>, <xref ref-type="bibr" rid="B25">25</xref>). Second, it achieved a clinically essential equilibrium between sensitivity (0.792) and specificity (0.733), effectively mitigating the risks of both under-treatment and unnecessary intervention, as further validated by its high F1 score (0.745) (<xref ref-type="bibr" rid="B26">26</xref>). Finally, and crucially, unlike complex &#x201c;black-box&#x201d; models, LR&#x2019;s linear architecture provides an inherently transparent foundation (<xref ref-type="bibr" rid="B27">27</xref>), which we seamlessly integrated with SHAP analysis. This integration moves beyond mere prediction to explain the &#x201c;why&#x201d; behind each decision, providing both global and local explanations that are fundamental for building clinician trust and fostering clinical adoption (<xref ref-type="bibr" rid="B28">28</xref>). This combination of non-inferior predictive power, proven stability, and inherent explainability makes LR the most clinically pragmatic choice for deployment. A notable strength of our study is the implementation of hardware-level standardization for the external validation. By selecting an external cohort scanned with identical CT models and default parameters, we successfully isolated the biological and pathological variations from potential technical artifacts. The stable performance of the LR model across both centers suggests that our radiomics signatures are intrinsically linked to the tumor&#x2019;s invasive potential rather than specific imaging noise.</p>
<p>The integration of SHAP analysis, a core novelty of our work, yielded unprecedented insights into the biological mechanisms captured by our model. The top-ranked features suggested potential links to tumor pathobiology: wavelet_HLL_glszm_LowGrayLevelZoneEmphasis: This feature quantified the spatial clustering of low-attenuation voxels in the wavelet-transformed HLL (horizontal-high, vertical-low, diagonal-low) sub-band. Higher values indicated larger contiguous regions of low CT attenuation (-600 to -800 HU). In IAC, aggressive tumor behaviors induced necrotic zones from rapid growth exceeding angiogenesis and microcystic fusion due to parenchymal collapse. These processes created macroscale low-attenuation clusters that were visible in the HLL sub-band, which emphasized horizontal structural disruptions. Consequently, elevated feature values correlated with advanced invasiveness (<xref ref-type="bibr" rid="B29">29</xref>).</p>
<p>Original_shape_Flatness: this radiomic feature measured the three-dimensional (3D) planarity or flatness of an ROI, with values on a scale of 0 to 1. Higher values (approaching 1) indicated a more planar or isotropic shape, geometrically approximating a cube or sphere. Conversely, lower values (approaching 0) corresponded to a more elongated, rod-like morphology. This feature primarily described the 3D geometric properties of the ROI, specifically its spatial directional dimensionality and deviation from sphericity. This may reflect the ROI&#x2019;s growth pattern and interactions with surrounding tissues.</p>
<p>Evidence indicates that morphological features, including flatness, serve as effective differentiators between lung adenocarcinoma pathological subtypes (AIS/MIA vs. IAC) (<xref ref-type="bibr" rid="B30">30</xref>&#x2013;<xref ref-type="bibr" rid="B32">32</xref>). The characteristically more irregular morphology of IAC, stemming from its aggressive growth patterns (destructive growth, complex structures such as micropapillary/solid components, and infiltration along structures), could manifest through alterations in flatness: (1) a decrease in overall flatness (suggesting a trend towards elongation), and (2) localized increases in flatness in specific areas (e.g., flat patches along the pleura). Critically, an overall increase in morphological complexity represented a more critical marker of invasiveness.</p>
<p>The log_firstorder_log.sigma.4.0.mm.3D_Minimum: this feature represented the minimum intensity value after Laplacian of Gaussian (LoG) filtering at &#x3c3;=4mm scale. Lower values (more negative) indicated extremely low-attenuation foci (~-900 to -1000 HU). In pre/minimally invasive lesions (AIS/MIA), preserved alveolar airspaces generate isolated voxels with near-air density, and &#x201c;bubble-like&#x201d; pseudo-cavitations formed due to lepidic growth without destruction. The LoG filter (&#x3c3;=4mm) amplified these submillimeter air-trapping zones. As invasiveness increased, tumor cells filled airspaces, reducing extreme low-attenuation voxels. Thus, lower minimum values (more negative) predicted lower invasiveness. Consistent with the findings of Zuo et&#xa0;al. (<xref ref-type="bibr" rid="B12">12</xref>). and Li et&#xa0;al. (<xref ref-type="bibr" rid="B13">13</xref>)., our SHAP analysis also identified features related to tumor morphology and patient demographics as top contributors. Specifically, the importance of features like wavelet_HLL_glszm_LowGrayLevelZoneEmphasis (potentially indicating necrosis) and original_shape_Flatness (reflecting morphological irregularity) in our model aligns with the principle that radiomics captures critical tumor heterogeneity. The concordance in key predictors, such as morphological complexity, across independent studies strengthens the biological plausibility of radiomic models. These SHAP-derived insights transcended simple feature lists, elucidating how specific quantitative image characteristics mechanistically contributed to invasiveness prediction. Future histopathological correlation studies linking these features to specific tumor microenvironment characteristics (e.g., fibroblast proliferation, architectural distortion, microvascular patterns) are essential.</p>
<p>The &#x201c;black box&#x201d; nature of numerous ML models remains a significant barrier to clinical adoption (<xref ref-type="bibr" rid="B15">15</xref>). Our integration of SHAP explicitly addressed this limitation. Global summaries provided clinicians with key radio-phenotypic determinants of SSN invasiveness, potentially refining human pattern recognition. Local explanations using force plots personalize predictions for individual patients and visually delineate the contribution of each feature towards the nodule-specific risk score. This transparency facilitated clinician-patient communication, supported tailored clinical decision-making (e.g., surgical planning versus active surveillance), and cultivated trust among radiologists and clinicians (<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B34">34</xref>). Furthermore, DCA validated the model&#x2019;s net clinical benefit over a broad range of threshold probabilities, highlighting its practical advantage over binary treat-all or treat-nothing strategies. In terms of methodological rigor, our study design adheres to the core recommendations of the Radiomics Quality Score (RQS) framework (detailed in <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table&#xa0;1</bold></xref>). By implementing rigorous feature stability screening (ICC &gt; 0.75), multi-step dimensionality reduction, and, importantly, independent external validation, we have endeavored to ensure the reproducibility and clinical relevance of our radiomic signature.</p>
<p>This study has several limitations that warrant consideration. First, although we successfully incorporated an independent external validation cohort, the retrospective nature of the study may still introduce selection bias. Since the model was developed and validated using data from a single institution with specific CT scanners and imaging protocols, the performance and stability of both the radiomic features and the predictive model may vary when applied to external cohorts with different acquisition parameters and demographic characteristics. Second, although SHAP analysis improves interpretability, the biological plausibility of the top radiomic features&#x2014;while informed by existing literature&#x2014;remains hypothetical and requires further validation through histopathological correlation. Future studies incorporating precise region-of-interest matching between CT imaging and histopathological samples (e.g., via tissue microarrays or detailed pathologic mapping) are necessary to confirm the underlying biological mechanisms of these image-based biomarkers. Third, to ensure model parsimony and mitigate overfitting risks given the sample size, clinical variables (e.g., age, smoking history) and semantic radiological features were not incorporated. Their inclusion in future iterations may improve model comprehensiveness and predictive power. Third, due to the retrospective nature of the data collection, we were unable to explicitly evaluate the performance differences between the specific CT scanner models (GE and Philips) used in our primary cohort. Although we applied rigorous IBSI-recommended image preprocessing (e.g., 1&#xd7;1&#xd7;1 mm&#xb3; spatial resampling and gray-level normalization) to minimize inter-scanner variability prior to feature extraction, future prospective studies should track specific hardware parameters to conduct comprehensive inter-vendor subgroup analyses. Finally, although logistic regression was selected for its favorable performance and interpretability, more complex yet explainable approaches&#x2014;such as explainable boosting machines or deep learning-based radiomic models&#x2014;may capture subtler patterns and should be investigated in larger, multi-center datasets.</p>
<p>While our study has been strengthened by the inclusion of an independent external validation cohort from a second center, it remains a retrospective analysis. Future prospective, large-scale multicenter studies are still warranted to further confirm the model&#x2019;s robustness and clinical utility across more diverse clinical settings and imaging platforms prior to widespread implementation.</p>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusions</title>
<p>This study establishes a robust and interpretable radiomics-LR model for invasiveness prediction in subcentimeter SSNs. By combining rigorous feature extraction, ML validation, and SHAP explanations, this tool provides biological insights and clinical decision support. Prospective multi-center validation is warranted for clinical translation.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Material</bold></xref>. Further inquiries can be directed to the corresponding author.</p></sec>
<sec id="s7" sec-type="ethics-statement">
<title>Ethics statement</title>
<p>The studies involving humans were approved by Institutional Ethics Committee of the Second Hospital of Hebei Medical University. The studies were conducted in accordance with the local legislation and institutional requirements. Written informed consent for participation was not required from the participants or the participants&#x2019; legal guardians/next of kin in accordance with the national legislation and institutional requirements.</p></sec>
<sec id="s8" sec-type="author-contributions">
<title>Author contributions</title>
<p>WF: Conceptualization, Writing &#x2013; original draft. RC:  Validation, Writing &#x2013; original draft. TL: Data curation, Writing &#x2013; original draft. XW: Software, Visualization, Writing &#x2013; original draft. ZG: Methodology, Writing &#x2013; original draft. XY: Resources, Writing &#x2013; original draft. YY: Investigation, Software, Writing &#x2013; original draft. YZ: Conceptualization, Writing &#x2013; review &amp; editing.</p></sec>
<sec id="s10" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s11" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s12" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<sec id="s13" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fonc.2026.1668102/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fonc.2026.1668102/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="DataSheet1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
<supplementary-material xlink:href="Image1.jpeg" id="SF1" mimetype="image/jpeg"/>
<supplementary-material xlink:href="Image2.jpeg" id="SF2" mimetype="image/jpeg"/></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bray</surname> <given-names>F</given-names></name>
<name><surname>Laversanne</surname> <given-names>M</given-names></name>
<name><surname>Sung</surname> <given-names>H</given-names></name>
<name><surname>Ferlay</surname> <given-names>J</given-names></name>
<name><surname>Siegel</surname> <given-names>RL</given-names></name>
<name><surname>Soerjomataram</surname> <given-names>I</given-names></name>
<etal/>
</person-group>. 
<article-title>Global cancer statistics 2022: GLOBOCAN estimates of incidence and mortality worldwide for 36 cancers in 185 countries</article-title>. <source>CA Cancer J Clin</source>. (<year>2024</year>) <volume>74</volume>:<fpage>229</fpage>&#x2013;<lpage>263</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3322/caac.21834</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<label>2</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Neumann</surname> <given-names>JM</given-names></name>
<name><surname>Freitag</surname> <given-names>H</given-names></name>
<name><surname>Hartmann</surname> <given-names>JS</given-names></name>
<name><surname>Niehaus</surname> <given-names>K</given-names></name>
<name><surname>Galanis</surname> <given-names>M</given-names></name>
<name><surname>Griesshammer</surname> <given-names>M</given-names></name>
<etal/>
</person-group>. 
<article-title>Subtyping non-small cell lung cancer by histology-guided spatial metabolomics</article-title>. <source>J Cancer Res Clin Oncol</source>. (<year>2022</year>) <volume>148</volume>:<fpage>351</fpage>&#x2013;<lpage>360</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00432-021-03834-w</pub-id>, PMID: <pub-id pub-id-type="pmid">34839410</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<label>3</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>de Morais</surname> <given-names>AQ</given-names></name>
<name><surname>da Silva</surname> <given-names>TPF</given-names></name>
<name><surname>Braga</surname> <given-names>JCD</given-names></name>
<name><surname>Teixeira</surname> <given-names>DFD</given-names></name>
<name><surname>Barbosa</surname> <given-names>PNVP</given-names></name>
<name><surname>Haddad</surname> <given-names>FJ</given-names></name>
<etal/>
</person-group>. 
<article-title>Factors associated with subcentimeter pulmonary nodule outcomes followed with computed tomography imaging in oncology patients</article-title>. <source>Eur J Radiol Open</source>. (<year>2020</year>) <volume>7</volume>:<fpage>100266</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ejro.2020.100266</pub-id>, PMID: <pub-id pub-id-type="pmid">33024797</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<label>4</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Fra-Fern&#xe1;ndez</surname> <given-names>S</given-names></name>
<name><surname>Gorospe-Saras&#xfa;a</surname> <given-names>L</given-names></name>
<name><surname>Caba&#xf1;ero-S&#xe1;nchez</surname> <given-names>A</given-names></name>
<name><surname>Mu&#xf1;oz-Molina</surname> <given-names>G</given-names></name>
<name><surname>Caballero-Silva</surname> <given-names>U</given-names></name>
<name><surname>Moreno-Mata</surname> <given-names>N</given-names></name>
</person-group>. 
<article-title>Subsolid pulmonary nodules: why not &#x201c;watch andwait&#x201d;</article-title>? <source>Ann Transl Med</source>. (<year>2024</year>) <volume>12</volume>:<fpage>3</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.21037/atm-23-1794</pub-id> <pub-id pub-id-type="pmid">38304902</pub-id> PMC10777245, PMID: 38304902
</mixed-citation>
</ref>
<ref id="B5">
<label>5</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wu</surname> <given-names>H</given-names></name>
<name><surname>Zhang</surname> <given-names>X</given-names></name>
<name><surname>Zhong</surname> <given-names>Z</given-names></name>
</person-group>. 
<article-title>Exploration of CT-based discrimination and diagnosis of various pathological types of ground glass nodules in the lungs</article-title>. <source>BMC Med Imaging</source>. (<year>2025</year>) <volume>25</volume>:<fpage>119</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12880-025-01653-w</pub-id>, PMID: <pub-id pub-id-type="pmid">40229674</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<label>6</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Goldstraw</surname> <given-names>P</given-names></name>
<name><surname>Chansky</surname> <given-names>K</given-names></name>
<name><surname>Crowley</surname> <given-names>J</given-names></name>
<name><surname>Rami-Porta</surname> <given-names>R</given-names></name>
<name><surname>Asamura</surname> <given-names>H</given-names></name>
<name><surname>Eberhardt</surname> <given-names>WE</given-names></name>
<etal/>
</person-group>. 
<article-title>The IASLC lung cancer staging project: proposals for revision of the TNM stage groupings in the forthcoming (Eighth) edition of the TNM classification for lung cancer</article-title>. <source>J Thorac Oncol</source>. (<year>2016</year>) <volume>11</volume>:<fpage>39</fpage>&#x2013;<lpage>51</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jtho.2015.09.009</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<label>7</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Varlotto</surname> <given-names>JM</given-names></name>
<name><surname>Recht</surname> <given-names>A</given-names></name>
<name><surname>Flickinger</surname> <given-names>JC</given-names></name>
<name><surname>Medford-Davis</surname> <given-names>LN</given-names></name>
<name><surname>Dyer</surname> <given-names>AM</given-names></name>
<name><surname>DeCamp</surname> <given-names>MM</given-names></name>
</person-group>. 
<article-title>Varying recurrence rates and risk factors associated with different definitions of local recurrence in patients with surgically resected, stage I nonsmall cell lung cancer</article-title>. <source>Cancer</source>. (<year>2010</year>) <volume>116</volume>:<fpage>2390</fpage>&#x2013;<lpage>2400</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/cncr.25047</pub-id>, PMID: <pub-id pub-id-type="pmid">20225332</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<label>8</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Peng</surname> <given-names>M</given-names></name>
</person-group>. 
<article-title>Classification of pulmonary nodules in the era of precision medicine</article-title>. <source>Lancet Dig Health</source>. (<year>2023</year>) <volume>5</volume>:<page-range>e633&#x2013;4</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/S2589-7500(23)00154-1</pub-id>, PMID: <pub-id pub-id-type="pmid">37567794</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<label>9</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zuo</surname> <given-names>Z</given-names></name>
<name><surname>Zhang</surname> <given-names>G</given-names></name>
<name><surname>Song</surname> <given-names>P</given-names></name>
<name><surname>Yang</surname> <given-names>J</given-names></name>
<name><surname>Li</surname> <given-names>S</given-names></name>
<name><surname>Zhong</surname> <given-names>Z</given-names></name>
<etal/>
</person-group>. 
<article-title>Survival nomogram for stage IB non-small-cell lung cancer patients, based on the SEER database and an external validation cohort</article-title>. <source>Ann Surg Oncol</source>. (<year>2021</year>) <volume>28</volume>:<fpage>3941</fpage>&#x2013;<lpage>3950</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1245/s10434-020-09362-0</pub-id>, PMID: <pub-id pub-id-type="pmid">33249521</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<label>10</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>McCague</surname> <given-names>C</given-names></name>
<name><surname>Ramlee</surname> <given-names>S</given-names></name>
<name><surname>Reinius</surname> <given-names>M</given-names></name>
<name><surname>Selby</surname> <given-names>I</given-names></name>
<name><surname>Hulse</surname> <given-names>D</given-names></name>
<name><surname>Piyatissa</surname> <given-names>P</given-names></name>
<etal/>
</person-group>. 
<article-title>Introduction to radiomics for a clinical audience</article-title>. <source>Clin Radiol</source>. (<year>2023</year>) <volume>78</volume>:<fpage>83</fpage>&#x2013;<lpage>98</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.crad.2022.08.149</pub-id>, PMID: <pub-id pub-id-type="pmid">36639175</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<label>11</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Warkentin</surname> <given-names>MT</given-names></name>
<name><surname>Al-Sawaihey</surname> <given-names>H</given-names></name>
<name><surname>Lam</surname> <given-names>S</given-names></name>
<name><surname>Liu</surname> <given-names>G</given-names></name>
<name><surname>Diergaarde</surname> <given-names>B</given-names></name>
<name><surname>Yuan</surname> <given-names>JM</given-names></name>
<etal/>
</person-group>. 
<article-title>Radiomics analysis to predict pulmonary nodule malignancy using machine learning approaches</article-title>. <source>Thorax</source>. (<year>2024</year>) <volume>79</volume>:<fpage>307</fpage>&#x2013;<lpage>315</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1136/thorax-2023-220226</pub-id>, PMID: <pub-id pub-id-type="pmid">38195644</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<label>12</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zuo</surname> <given-names>Z</given-names></name>
<name><surname>Zeng</surname> <given-names>W</given-names></name>
<name><surname>Peng</surname> <given-names>K</given-names></name>
<name><surname>Mao</surname> <given-names>Y</given-names></name>
<name><surname>Wu</surname> <given-names>Y</given-names></name>
<name><surname>Zhou</surname> <given-names>Y</given-names></name>
<etal/>
</person-group>. 
<article-title>Development of a novel combined nomogram integrating deep-learning-assisted CT texture and clinical-radiological features to predict the invasiveness of clinical stage IA part-solid lung adenocarcinoma: a multicentre study</article-title>. <source>Clin Radiol</source>. (<year>2023</year>) <volume>78</volume>:<fpage>e698</fpage>&#x2013;<lpage>e706</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.crad.2023.07.002</pub-id>, PMID: <pub-id pub-id-type="pmid">37487842</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<label>13</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>Y</given-names></name>
<name><surname>Ding</surname> <given-names>J</given-names></name>
<name><surname>Wu</surname> <given-names>K</given-names></name>
<name><surname>Qi</surname> <given-names>W</given-names></name>
<name><surname>Lin</surname> <given-names>S</given-names></name>
<name><surname>Chen</surname> <given-names>G</given-names></name>
<etal/>
</person-group>. 
<article-title>Ensemble machine learning classifiers combining CT radiomics and clinical-radiological features for preoperative prediction of pathological invasiveness in lung adenocarcinoma presenting as part-solid nodules: A multicenter retrospective study</article-title>. <source>Technol Cancer Res Treat</source>. (<year>2025</year>) <volume>24</volume>:<fpage>15330338251351365</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1177/15330338251351365</pub-id>, PMID: <pub-id pub-id-type="pmid">40525253</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<label>14</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Petch</surname> <given-names>J</given-names></name>
<name><surname>Di</surname> <given-names>S</given-names></name>
<name><surname>Nelson</surname> <given-names>W</given-names></name>
</person-group>. 
<article-title>Opening the black box: the promise and limitations of explainable machine learning in cardiology</article-title>. <source>Can J Cardiol</source>. (<year>2022</year>) <volume>38</volume>:<page-range>204&#x2013;13</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.cjca.2021.09.004</pub-id>, PMID: <pub-id pub-id-type="pmid">34534619</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<label>15</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wu</surname> <given-names>J</given-names></name>
<name><surname>Xia</surname> <given-names>Y</given-names></name>
<name><surname>Wang</surname> <given-names>X</given-names></name>
<name><surname>Wei</surname> <given-names>Y</given-names></name>
<name><surname>Liu</surname> <given-names>A</given-names></name>
<name><surname>Innanje</surname> <given-names>A</given-names></name>
<etal/>
</person-group>. 
<article-title>uRP: An integrated research platform for one-stop analysis of medical images</article-title>. <source>Front Radiol</source>. (<year>2023</year>) <volume>3</volume>:<elocation-id>1153784</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fradi.2023.1153784</pub-id>, PMID: <pub-id pub-id-type="pmid">37492386</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<label>16</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hu</surname> <given-names>B</given-names></name>
<name><surname>Ren</surname> <given-names>W</given-names></name>
<name><surname>Feng</surname> <given-names>Z</given-names></name>
<name><surname>Li</surname> <given-names>M</given-names></name>
<name><surname>Li</surname> <given-names>X</given-names></name>
<name><surname>Han</surname> <given-names>R</given-names></name>
<etal/>
</person-group>. 
<article-title>Correlation between CT imaging characteristics and pathological diagnosis for subcentimeter pulmonary nodules</article-title>. <source>Thorac Cancer</source>. (<year>2022</year>) <volume>13</volume>:<fpage>1067</fpage>&#x2013;<lpage>1075</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/1759-7714.14363</pub-id>, PMID: <pub-id pub-id-type="pmid">35212152</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<label>17</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chen</surname> <given-names>C</given-names></name>
<name><surname>Geng</surname> <given-names>Q</given-names></name>
<name><surname>Song</surname> <given-names>G</given-names></name>
<name><surname>Zhang</surname> <given-names>Q</given-names></name>
<name><surname>Wang</surname> <given-names>Y</given-names></name>
<name><surname>Sun</surname> <given-names>D</given-names></name>
<etal/>
</person-group>. 
<article-title>A comprehensive nomogram combining CT-based radiomics with clinical features for differentiation of benign and malignant lung subcentimeter solid nodules</article-title>. <source>Front Oncol</source>. (<year>2023</year>) <volume>13</volume>:<elocation-id>1066360</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fonc.2023.1066360</pub-id>, PMID: <pub-id pub-id-type="pmid">37007065</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<label>18</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zuo</surname> <given-names>YQ</given-names></name>
<name><surname>Liu</surname> <given-names>Q</given-names></name>
<name><surname>Li</surname> <given-names>TZ</given-names></name>
<name><surname>Gao</surname> <given-names>ZH</given-names></name>
<name><surname>Yang</surname> <given-names>X</given-names></name>
<name><surname>Yin</surname> <given-names>YL</given-names></name>
<etal/>
</person-group>. 
<article-title>Computed tomography radiomics of intratumoral and peritumoral microenvironments for identifying the invasiveness of subcentimeter lung adenocarcinomas</article-title>. <source>BMC Med Imaging</source>. (<year>2025</year>) <volume>25</volume>:<fpage>331</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12880-025-01882-z</pub-id>, PMID: <pub-id pub-id-type="pmid">40826397</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<label>19</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wu</surname> <given-names>F</given-names></name>
<name><surname>Tian</surname> <given-names>SP</given-names></name>
<name><surname>Jin</surname> <given-names>X</given-names></name>
<name><surname>Jing</surname> <given-names>R</given-names></name>
<name><surname>Yang</surname> <given-names>YQ</given-names></name>
<name><surname>Jin</surname> <given-names>M</given-names></name>
<etal/>
</person-group>. 
<article-title>CT and histopathologic characteristics of lung adenocarcinoma with pure ground-glass nodules 10mm or less in diameter</article-title>. <source>Eur Raidol</source>. (<year>2017</year>) <volume>27</volume>:<page-range>4037&#x2013;43</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00330-017-4829-5</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<label>20</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Huff</surname> <given-names>DT</given-names></name>
<name><surname>Weisman</surname> <given-names>AJ</given-names></name>
<name><surname>Jeraj</surname> <given-names>R</given-names></name>
</person-group>. 
<article-title>Interpretation and visualization techniques for deep learning models in medical imaging</article-title>. <source>Phys Med Biol</source>. (<year>2021</year>) <volume>66</volume>:<fpage>04TR01</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1088/1361-6560/abcd17</pub-id>, PMID: <pub-id pub-id-type="pmid">33227719</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<label>21</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Vellido</surname> <given-names>A</given-names></name>
</person-group>. 
<article-title>The importance of interpretability and visualization in machine learning for applications in medicine and health care</article-title>. <source>Neural Comput Applic</source>. (<year>2020</year>) <volume>32)</volume>:<page-range>18069&#x2013;83</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00521-019-04051-w</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<label>22</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>J</given-names></name>
<name><surname>Qi</surname> <given-names>L</given-names></name>
<name><surname>Xu</surname> <given-names>Q</given-names></name>
<name><surname>Chen</surname> <given-names>J</given-names></name>
<name><surname>Cui</surname> <given-names>S</given-names></name>
<name><surname>Li</surname> <given-names>F</given-names></name>
<etal/>
</person-group>. 
<article-title>A self-supervised learning-based fine-grained classification model for distinguishing malignant from benign subcentimeter solid pulmonary nodules</article-title>. <source>Acad Radiol</source>. (<year>2024</year>) <volume>31</volume>:<fpage>4687</fpage>&#x2013;<lpage>4695</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.acra.2024.05.002</pub-id>, PMID: <pub-id pub-id-type="pmid">38777719</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<label>23</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>J</given-names></name>
<name><surname>Qi</surname> <given-names>L</given-names></name>
<name><surname>Wang</surname> <given-names>Y</given-names></name>
<name><surname>Li</surname> <given-names>F</given-names></name>
<name><surname>Chen</surname> <given-names>J</given-names></name>
<name><surname>Cheng</surname> <given-names>S</given-names></name>
<etal/>
</person-group>. 
<article-title>Diagnostic performance of a deep learning-based method in differentiating Malignant from benign subcentimeter (&#x2264;10 mm) solid pulmonary nodules</article-title>. <source>J Thorac Dis</source>. (<year>2023</year>) <volume>15</volume>:<page-range>5475&#x2013;84</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.21037/jtd-23-985</pub-id>, PMID: <pub-id pub-id-type="pmid">37969262</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<label>24</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Park</surname> <given-names>JE</given-names></name>
<name><surname>Kim</surname> <given-names>HS</given-names></name>
<name><surname>Kim</surname> <given-names>D</given-names></name>
<name><surname>Park</surname> <given-names>SY</given-names></name>
<name><surname>Kim</surname> <given-names>JY</given-names></name>
<name><surname>Cho</surname> <given-names>SJ</given-names></name>
<etal/>
</person-group>. 
<article-title>A systematic review reporting quality of radiomics research in neuro-oncology: toward clinical utility and quality improvement using high-dimensional imaging features</article-title>. <source>BMC Cancer</source>. (<year>2020</year>) <volume>20</volume>:<fpage>29</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12885-019-6504-5</pub-id>, PMID: <pub-id pub-id-type="pmid">31924170</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<label>25</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>An</surname> <given-names>C</given-names></name>
<name><surname>Park</surname> <given-names>YW</given-names></name>
<name><surname>Ahn</surname> <given-names>SS</given-names></name>
<name><surname>Han</surname> <given-names>K</given-names></name>
<name><surname>Kim</surname> <given-names>H</given-names></name>
<name><surname>Lee</surname> <given-names>SK</given-names></name>
</person-group>. 
<article-title>Radiomics machine learning study with a small sample size: Single random training-test set split may lead to unreliable results</article-title>. <source>PLoS One</source>. (<year>2021</year>) <volume>16</volume>:<elocation-id>e0256152</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0256152</pub-id>, PMID: <pub-id pub-id-type="pmid">34383858</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<label>26</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Faes</surname> <given-names>L</given-names></name>
<name><surname>Liu</surname> <given-names>X</given-names></name>
<name><surname>Wagner</surname> <given-names>SK</given-names></name>
<name><surname>Fu</surname> <given-names>DJ</given-names></name>
<name><surname>Balaskas</surname> <given-names>K</given-names></name>
<name><surname>Sim</surname> <given-names>DA</given-names></name>
<etal/>
</person-group>. 
<article-title>A clinician&#x2019;s guide to artificial intelligence: how to critically appraise machine learning studies</article-title>. <source>Transl Vis Sci Technol</source>. (<year>2020</year>) <volume>9</volume>:<fpage>7</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1167/tvst.9.2.7</pub-id>, PMID: <pub-id pub-id-type="pmid">32704413</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<label>27</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Kelly</surname> <given-names>CJ</given-names></name>
<name><surname>Karthikesalingam</surname> <given-names>A</given-names></name>
<name><surname>Suleyman</surname> <given-names>M</given-names></name>
<name><surname>Corrado</surname> <given-names>G</given-names></name>
<name><surname>King</surname> <given-names>D</given-names></name>
</person-group>. 
<article-title>Key challenges for delivering clinical impact with artificial intelligence</article-title>. <source>BMC Med</source>. (<year>2019</year>) <volume>17</volume>:<fpage>195</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12916-019-1426-2</pub-id>, PMID: <pub-id pub-id-type="pmid">31665002</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<label>28</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ponce-Bobadilla</surname> <given-names>AV</given-names></name>
<name><surname>Schmitt</surname> <given-names>V</given-names></name>
<name><surname>Maier</surname> <given-names>CS</given-names></name>
<name><surname>Mensing</surname> <given-names>S</given-names></name>
<name><surname>Stodtmann</surname> <given-names>S</given-names></name>
</person-group>. 
<article-title>Practical guide to SHAP analysis: Explaining supervised machine learning model predictions in drug development</article-title>. <source>Clin Transl Sci</source>. (<year>2024</year>) <volume>17</volume>:<elocation-id>e70056</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/cts.70056</pub-id> <pub-id pub-id-type="pmid">39463176</pub-id> PMC11513550, PMID: 39463176
</mixed-citation>
</ref>
<ref id="B29">
<label>29</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bibault</surname> <given-names>JE</given-names></name>
<name><surname>Xing</surname> <given-names>L</given-names></name>
<name><surname>Giraud</surname> <given-names>P</given-names></name>
<name><surname>El Ayachy</surname> <given-names>R</given-names></name>
<name><surname>Giraud</surname> <given-names>N</given-names></name>
<name><surname>Decazes</surname> <given-names>P</given-names></name>
<etal/>
</person-group>. 
<article-title>Radiomics: A primer for the radiation oncologist</article-title>. <source>Cancer Radiother</source>. (<year>2020</year>) <volume>24</volume>:<fpage>403</fpage>&#x2013;<lpage>410</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.canrad.2020.01.011</pub-id>, PMID: <pub-id pub-id-type="pmid">32265157</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<label>30</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wu</surname> <given-names>FZ</given-names></name>
<name><surname>Wu</surname> <given-names>YJ</given-names></name>
<name><surname>Tang</surname> <given-names>EK</given-names></name>
</person-group>. 
<article-title>An integrated nomogram combined semantic-radiomic features to predict invasive pulmonary adenocarcinomas in subjects with persistent subsolid nodules</article-title>. <source>Quant Imaging Med Surg</source>. (<year>2023</year>) <volume>13</volume>:<page-range>654&#x2013;68</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.21037/qims-22-308</pub-id>, PMID: <pub-id pub-id-type="pmid">36819273</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<label>31</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ye</surname> <given-names>T</given-names></name>
<name><surname>Wu</surname> <given-names>H</given-names></name>
<name><surname>Wang</surname> <given-names>S</given-names></name>
<name><surname>Li</surname> <given-names>Q</given-names></name>
<name><surname>Gu</surname> <given-names>Y</given-names></name>
<name><surname>Ma</surname> <given-names>J</given-names></name>
<etal/>
</person-group>. 
<article-title>Radiologic identification of pathologic tumor invasion in patients with lung adenocarcinoma</article-title>. <source>JAMA Netw Open</source>. (<year>2023</year>) <volume>6</volume>:<elocation-id>e2337889</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1001/jamanetworkopen.2023.37889</pub-id>, PMID: <pub-id pub-id-type="pmid">37843862</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<label>32</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>Q</given-names></name>
<name><surname>Ba</surname> <given-names>W</given-names></name>
<name><surname>Yin</surname> <given-names>K</given-names></name>
<name><surname>Shen</surname> <given-names>J</given-names></name>
<name><surname>Jiang</surname> <given-names>G</given-names></name>
<name><surname>Liang</surname> <given-names>Y</given-names></name>
<etal/>
</person-group>. 
<article-title>Predicting lung adenocarcinoma invasiveness by measurement of pure ground-glass nodule roundness by using multiplanar reformation: a retrospective analysis</article-title>. <source>Clin Radiol</source>. (<year>2022</year>) <volume>77</volume>:<fpage>e20</fpage>&#x2013;<lpage>e26</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.crad.2021.10.007</pub-id>, PMID: <pub-id pub-id-type="pmid">34772486</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<label>33</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Huang</surname> <given-names>AA</given-names></name>
<name><surname>Huang</surname> <given-names>SY</given-names></name>
</person-group>. 
<article-title>Increasing transparency in machine learning through bootstrap simulation and shapely additive explanations</article-title>. <source>PloS One</source>. (<year>2023</year>) <volume>18</volume>:<elocation-id>e0281922</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0281922</pub-id>, PMID: <pub-id pub-id-type="pmid">36821544</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<label>34</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Goodwin</surname> <given-names>NL</given-names></name>
<name><surname>Choong</surname> <given-names>JJ</given-names></name>
<name><surname>Hwang</surname> <given-names>S</given-names></name>
<name><surname>Pitts</surname> <given-names>K</given-names></name>
<name><surname>Bloom</surname> <given-names>L</given-names></name>
<name><surname>Islam</surname> <given-names>A</given-names></name>
<etal/>
</person-group>. 
<article-title>Simple Behavioral Analysis (SimBA) as a platform for explainable machine learning in behavioral neuroscience</article-title>. <source>Nat Neurosci</source>. (<year>2024</year>) <volume>27</volume>:<fpage>1411</fpage>&#x2013;<lpage>1424</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41593-024-01649-9</pub-id>, PMID: <pub-id pub-id-type="pmid">38778146</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3017529">Sunyi Zheng</ext-link>, Tianjin Medical University Cancer Institute and Hospital, China</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/962662">Zhichao Zuo</ext-link>, Xiangtan Central Hospital, China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2127522">Liliana Caldeira</ext-link>, University Hospital of Cologne, Germany</p></fn>
</fn-group>
</back>
</article>