<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="EN" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Med.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Medicine</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Med.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2296-858X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmed.2026.1741424</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Exploratory analysis of exhaled volatile organic compounds for binary discrimination between lung cancer, pneumonia, and healthy controls using machine learning</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Wang</surname> <given-names>Jing</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Li</surname> <given-names>Haitian</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/3271463/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Yue</surname> <given-names>Jianshen</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Song</surname> <given-names>Yamei</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname> <given-names>Ning</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Guo</surname> <given-names>Wei</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Cai</surname> <given-names>Zhigang</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2674479/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Department of Pulmonary and Critical Care Medicine, The People&#x2019;s Hospital of Hengshui</institution>, <city>Hengshui</city>, <country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>Department of Infectious Disease, The People&#x2019;s Hospital of Hengshui</institution>, <city>Hengshui</city>, <country country="cn">China</country></aff>
<aff id="aff3"><label>3</label><institution>The First Department of Pulmonary and Critical Care Medicine, The People&#x2019;s Hospital of Cangzhou</institution>, <city>Cangzhou</city>, <country country="cn">China</country></aff>
<aff id="aff4"><label>4</label><institution>The First Department of Pulmonary and Critical Care Medicine, The Second Hospital of Hebei Medical University</institution>, <city>Shijiazhuang</city>, <country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>&#x002A;</label>Correspondence: Zhigang Cai, <email xlink:href="mailto:zhigang_cai@hebmu.edu.cn">zhigang_cai@hebmu.edu.cn</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-23">
<day>23</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>13</volume>
<elocation-id>1741424</elocation-id>
<history>
<date date-type="received">
<day>07</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>01</day>
<month>02</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>04</day>
<month>02</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2026 Wang, Li, Yue, Song, Wang, Guo and Cai.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Wang, Li, Yue, Song, Wang, Guo and Cai</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-23">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Background</title>
<p>Lung cancer remains a major cause of cancer-related mortality worldwide, while pneumonia is one of the most prevalent infectious diseases, with acute pneumonia being highly common globally. Despite continuous advancements in diagnostic technology and the successive launch of new anti-infective drugs, the incidence and mortality rates of pneumonia remain high. Exhaled breath volatile organic compounds (VOCs) have been proposed as non-invasive indicators of disease-related metabolic and pathophysiological alterations. Lung cancer and pneumonia often present with similar nodules or consolidation shadows on chest imaging, leading to frequent diagnostic overlap and delays. This uncertainty can cause lung cancer patients to miss the optimal treatment window or result in unnecessary invasive examinations for pneumonia patients. The current gold standard for definitive diagnosis relies on invasive methods, but it has drawbacks such as operational risks, patient discomfort, radiation exposure, and high costs. Therefore, this study was designed as an exploratory, proof-of-concept investigation to examine whether VOC profiles exhibit distinguishable patterns between lung cancer, pneumonia, and healthy individuals using pairwise binary analytical approaches.</p>
</sec>
<sec>
<title>Methods</title>
<p>Exhaled breath samples were collected from participants with lung cancer (<italic>N</italic> = 180), pneumonia (<italic>N</italic> = 228), and healthy controls (<italic>N</italic> = 180). Samples were analyzed using a micro gas chromatography system coupled with a mass spectrometry detector (micro-GC&#x2013;MSD). Univariate statistical analyses were performed to screen for VOCs showing differential abundance between groups. Multivariate analyses were subsequently conducted using five machine learning algorithms to evaluate the discriminative performance of VOC-based models in pairwise binary comparisons between lung cancer and healthy controls, pneumonia and healthy controls, and lung cancer and pneumonia.</p>
</sec>
<sec>
<title>Results</title>
<p>Multiple VOCs demonstrated statistically significant differences between groups, although substantial overlap in distributions was observed. Compared with healthy controls, three VOCs (heptane, propane, 1-(methylthio)-, and styrene) showed lower levels and two VOCs (2-hexanone, 6-hydroxy- and o-xylene) showed higher levels in the lung cancer group. In the pneumonia group, six VOCs (1,4-pentadiene, toluene, butyl acetate, p-xylene, D-limonene, and isobutyl nonyl carbonate) were elevated, while one VOC (heptane, 2,2,4,6,6-pentamethyl-) was reduced compared with healthy controls. In pairwise comparisons between lung cancer and pneumonia, seven VOCs showed lower concentrations in the lung cancer group. With area under the receiver operating characteristic curve (AUC) values of 0.980 for lung cancer versus healthy controls, 0.956 for pneumonia versus healthy controls, and 0.983 for lung cancer versus pneumonia.</p>
</sec>
<sec>
<title>Conclusion</title>
<p>This exploratory study demonstrates that exhaled breath VOC profiles, analyzed via machine learning, yield statistically distinguishable signals in pairwise comparisons between lung cancer, pneumonia, and healthy individuals. These results provide preliminary evidence that breath analysis could address the critical clinical challenge of differentiating radiographically similar conditions non-invasively. The presented methodology and dataset establish a foundational framework for characterizing disease-specific metabolic signatures. However, the findings remain hypothesis-generating. Definitive evaluation of clinical utility necessitates subsequent studies employing multiclass modeling, validation in independent and prospective cohorts, and direct assessment of diagnostic impact in real-world triage scenarios.</p>
</sec>
</abstract>
<kwd-group>
<kwd>exhaled breath analysis</kwd>
<kwd>exploratory study</kwd>
<kwd>lung cancer</kwd>
<kwd>machine learning</kwd>
<kwd>pneumonia</kwd>
<kwd>volatile organic compounds</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work was supported by the Hengshui Science and Technology Plan Project of China (No. 2021014079Z).</funding-statement>
</funding-group>
<counts>
<fig-count count="1"/>
<table-count count="3"/>
<equation-count count="0"/>
<ref-count count="30"/>
<page-count count="10"/>
<word-count count="6961"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Pulmonary Medicine</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="S1">
<title>Background</title>
<p>Lung cancer remains the leading cause of cancer-related mortality worldwide and is among the most frequently diagnosed malignant tumors. According to the International Agency for Research on Cancer (IARC), approximately 20 million new cancer cases and 9.7 million cancer-related deaths were reported globally in 2022, with lung cancer accounting for about 2.5 million new cases (12.4%) and 1.8 million deaths (18.7%), representing the highest incidence and mortality among all malignancies (<xref ref-type="bibr" rid="B1">1</xref>). East Asia bears a particularly high disease burden, contributing nearly half of global lung cancer cases, with China alone accounting for more than 40%. Despite advances in imaging, pathology, and systemic therapies, overall survival remains poor, largely because most cases are identified at advanced stages. These epidemiological trends have motivated continued efforts to improve strategies for earlier disease characterization and risk stratification.</p>
<p>In clinical practice, lung cancer assessment relies primarily on low-dose computed tomography (LDCT), bronchoscopy, and tissue biopsy. Although LDCT has been widely adopted in screening programs, it is associated with a high false-positive rate, reported to be approximately 27%, leading to unnecessary follow-up procedures and patient anxiety. Invasive approaches such as bronchoscopy and biopsy are limited by procedural risks and reduced sensitivity for small or peripherally located lesions. Moreover, early clinical manifestations of lung cancer, including cough, dyspnea, and chest discomfort, are non-specific and frequently overlap with those of pulmonary infections. Pneumonia itself remains highly prevalent and is commonly evaluated using chest imaging, which may be constrained by radiation exposure, contrast-related risks, and limited suitability for certain populations. Notably, pneumonic-type lung cancer can present radiographic features that closely resemble pneumonia, increasing the likelihood of diagnostic uncertainty and delayed clinical decision-making.</p>
<p>Against this background, increasing attention has been directed toward non-invasive approaches capable of capturing disease-associated biological information beyond conventional imaging. Volatile organic compounds (VOCs) present in exhaled breath arise from a combination of endogenous metabolic processes and exogenous exposures. Endogenous VOCs are generated through cellular metabolism and transported to the alveoli via blood&#x2013;gas exchange, whereas exogenous VOCs may originate from diet, environmental exposure, occupational factors, or microbial activity within the oral and gastrointestinal microbiota (<xref ref-type="bibr" rid="B2">2</xref>). Because exhaled breath collection is non-invasive, repeatable, and associated with minimal patient burden, breath-based VOC analysis has emerged as a valuable research tool for investigating metabolic alterations linked to respiratory diseases.</p>
<p>Since the pioneering work by Gordon et al. (<xref ref-type="bibr" rid="B3">3</xref>), numerous studies have explored breath VOC profiles in lung cancer and other respiratory conditions using analytical platforms such as gas chromatography&#x2013;mass spectrometry (GC&#x2013;MS), proton transfer reaction&#x2013;mass spectrometry (PTR&#x2013;MS), and electronic nose (eNose) technologies (<xref ref-type="bibr" rid="B4">4</xref>&#x2013;<xref ref-type="bibr" rid="B6">6</xref>). Reported findings suggest that VOC patterns may differ between disease states, including chronic obstructive pulmonary disease, asthma, lung cancer, and COVID-19 (<xref ref-type="bibr" rid="B7">7</xref>). However, existing studies vary substantially in analytical techniques, feature selection strategies, and statistical modeling approaches, leading to inconsistent results. Importantly, many investigations rely on pairwise binary comparisons and often infer diagnostic potential without implementing or validating multiclass classification frameworks. As a result, the clinical interpretability and generalizability of VOC-based findings remain uncertain.</p>
<p>In this context, the present study was designed as an exploratory, proof-of-concept investigation to examine whether exhaled breath VOC profiles exhibit statistically distinguishable patterns between individuals with lung cancer, pneumonia, and healthy controls. Using micro-GC&#x2013;MS analysis combined with univariate statistics and machine learning&#x2013;based binary classification models, we aimed to characterize VOC differences and assess their discriminative performance in pairwise comparisons. This work seeks to contribute to a better understanding of disease-associated VOC signatures.</p>
</sec>
<sec id="S2" sec-type="materials|methods">
<title>Materials and methods</title>
<sec id="S2.SS1">
<title>Participants</title>
<p>From May 15, 2023 to May 15, 2025, a total of 588 participants were enrolled at Hengshui People&#x2019;s Hospital, including patients with lung cancer (<italic>n</italic> = 180), patients with pneumonia (<italic>n</italic> = 228), and healthy controls (<italic>n</italic> = 180). Patients were recruited from clinical departments of the hospital, while healthy controls were recruited from individuals undergoing routine physical examinations at the hospital&#x2019;s health screening center. Recruitment and breath sample collection were conducted at Hengshui People&#x2019;s Hospital in accordance with protocols approved by the institutional Ethics Committee. All participants provided written informed consent and completed a standardized questionnaire.</p>
<p>All participants underwent pulmonary function testing, and only individuals with normal lung function were included to exclude chronic obstructive pulmonary disease and other conditions associated with impaired ventilation.</p>
<p>Patients with lung cancer were eligible if they were aged 18&#x2013;80 years, had imaging findings highly suggestive of malignancy, and had a histopathological diagnosis of non-small cell lung cancer. Exclusion criteria included confirmation of small cell lung cancer, severe pulmonary dysfunction preventing breath sampling, a history of other primary malignancies, or severe systemic diseases.</p>
<p>Patients with pneumonia were eligible if they were aged &#x2265; 18 years and had a diagnosis of pulmonary infection based on clinical presentation, laboratory findings, imaging, and microbiological evidence, with no prior treatment initiated at the time of breath sampling. Exclusion criteria included pulmonary lesions attributable to autoimmune or vascular diseases (e.g., sarcoidosis, Wegener&#x2019;s granulomatosis), heart failure, lung cancer, severe systemic comorbidities, multisystem infections, or critical illness precluding participation.</p>
<p>Healthy controls were eligible if they were aged 18&#x2013;80 years, had no abnormal findings on chest imaging, no recent respiratory infections, and no history of malignancy.</p>
</sec>
<sec id="S2.SS2">
<title>Exhaled breath collection</title>
<p>Participants were instructed to avoid alcohol, caffeine, garlic, chili, smoking, and other potential VOC-interfering substances for at least 12 h prior to sampling, and to refrain from eating or drinking for 1 h before sample collection. Immediately before sampling, participants rinsed their mouths with water for 1 min to reduce oral contaminants. These pre-collection restrictions were waived for pneumonia patients requiring urgent clinical care.</p>
<p>Breath sampling was conducted in a controlled laboratory environment. Participants were instructed to avoid speaking or coughing during collection. Using a medical-grade disposable mask connected directly to the sampling inlet and constructed from inert polymer materials, participants inhaled normally through the nose and exhaled slowly through the mouth for a total sampling duration of 3 min. This nasal inhalation&#x2013;oral exhalation protocol was adopted to reduce ambient air contamination and preferentially sample lower respiratory tract gas.</p>
<p>Between participants, the analytical system was flushed with nitrogen to eliminate residual compounds and prevent cross-contamination.</p>
</sec>
<sec id="S2.SS3">
<title>Instrumentation and VOC analysis</title>
<p>Exhaled breath samples were analyzed using a micro gas chromatography system (CXBA-Alpha, ChromX Health Co., Ltd.) equipped with an integrated detection module. The system consisted of a &#x03BC;PCI chip for VOC capture and thermal desorption, a 10-m microcolumn chip for temperature-programmed chromatographic separation, and a &#x03BC;PID detector for real-time signal acquisition. Approximately 600 mL of exhaled breath was introduced directly into the system without the use of gas storage bags.</p>
<p>Water vapor was removed using a Nafion drying tube prior to VOC enrichment. VOCs were concentrated on the &#x03BC;PCI chip and immediately thermally desorbed into the microcolumn for separation and detection.</p>
<p>To enable compound identification, 20% of samples were simultaneously diverted to adsorption tubes and analyzed offline using a mass spectrometry detector (MSD). Mass spectral data were processed using MassHunter Qualitative Analysis software (version 10.0) and matched against the NIST 2017 (v2.3) mass spectral library.</p>
</sec>
<sec id="S2.SS4">
<title>Data preprocessing and peak area quantification</title>
<p>Chromatographic data were processed using Python (v3.9.19). Each chromatogram underwent spike removal, baseline correction, detector overload correction, and windowed Gaussian smoothing. Peaks were detected using a second-derivative&#x2013;based algorithm, with potential co-eluting clusters identified automatically. Overlapping peaks were resolved using Gaussian mixture model fitting, and the integrated peak area of each resolved compound was calculated as the quantitative measure.</p>
<p>Peaks were aligned across samples based on retention-time similarity to construct a VOC abundance matrix. VOCs were retained if detected in at least 80% of all samples or in at least 80% of samples within any single clinical group. Samples were excluded if fewer than 80% of retained VOCs were successfully quantified.</p>
<p>To correct for day-to-day instrumental variability, a daily correction factor (&#x03BA;) was calculated as the mean peak area of six anchor compounds (isoprene, n-non-ane, &#x03B1;-pinene, n-decane, (R)-(+)-limonene, and n-undecane). All VOC peak areas were normalized using the formula: V&#x2032; = 100 &#x00D7; V/&#x03BA;.</p>
</sec>
<sec id="S2.SS5">
<title>Quality control</title>
<p>Routine quality control was performed every 3 days using standard gas mixtures containing known concentrations of n-heptane, p-xylene, and styrene stored in a Masu canister. Background air was collected for 3 min prior to QC sample analysis. Samples failing QC criteria were reanalyzed. Instrumental drift correction was performed automatically by the system and verified by an engineer.</p>
</sec>
<sec id="S2.SS6">
<title>Feature selection</title>
<p>Univariate and multivariate approaches were applied for exploratory feature screening. All feature selection steps (including univariate statistical tests and VIP-based filtering) were strictly performed within the training set. Specifically, the dataset was first divided into training and test sets according to a stratified random sampling strategy, and feature selection was subsequently conducted on the training set. This process effectively avoids optimistic bias caused by prematurely using test set information. The Mann&#x2013;Whitney U test was used for pairwise group comparisons, with false discovery rate (FDR) correction applied for multiple testing. VOCs with adjusted <italic>p</italic> &#x003C; 0.05 were considered statistically significant.</p>
<p>Orthogonal partial least squares discriminant analysis (OPLS-DA) was used as an exploratory multivariate method. Variables with a variable importance in projection (VIP) score &#x003E; 1 were considered to contribute substantially to group separation.</p>
</sec>
<sec id="S2.SS7">
<title>Machine learning analysis</title>
<p>Machine learning analyses were conducted exclusively using pairwise binary classification frameworks: lung cancer vs. healthy controls, pneumonia vs. healthy controls, and lung cancer vs. pneumonia. Five algorithms were evaluated: logistic regression, support vector classification, k-nearest neighbors, random forest, and extreme gradient boosting. Multiclass classification was intentionally not pursued in order to avoid inflated performance estimates in the absence of sufficient external validation.</p>
<p>Data were partitioned using stratified sampling to preserve class proportions. The dataset is stratified random sampling, which divides it into a training set and an independent test set, to ensure that the proportion of each classification label is consistent in both subsets. To ensure reproducibility, we set a random seed of 42 for the entire analysis process, covering data splitting, model training, and hyperparameter optimization. Performance metrics (such as accuracy, AUC, etc.) are calculated based on the independent test set obtained from a single data split, and the final performance report is not based on repeated sampling. However, to assess the stability of the performance estimate, we further use the Bootstrap resampling method (<italic>n</italic> = 1000) to build 95% confidence intervals for each metric, providing more robust results.</p>
<p>Performance metrics included area under the ROC curve (AUC), accuracy, sensitivity, specificity, F1 score, positive predictive value, and negative predictive value. Non-parametric bootstrapping with 1,000 resamples was used to estimate 95% confidence intervals.</p>
</sec>
<sec id="S2.SS8">
<title>Statistical analysis</title>
<p>Statistical analyses were performed using SPSS 26.0 and R (version 4.4.2). Normality was assessed using the Shapiro&#x2013;Wilk test. Non-normally distributed variables were summarized as median (interquartile range). Categorical variables were compared using chi-square or Fisher&#x2019;s exact tests as appropriate. All statistical tests were two-sided, and <italic>p</italic> &#x003C; 0.05 was considered statistically significant.</p>
</sec>
</sec>
<sec id="S3" sec-type="results">
<title>Results</title>
<sec id="S3.SS1">
<title>Participant characteristics</title>
<p>A total of 588 participants were included in the analysis, comprising 180 patients with lung cancer, 228 patients with pneumonia, and 180 healthy controls. Demographic and clinical characteristics of the study population are summarized in <xref ref-type="table" rid="T1">Table 1</xref>. Overall, the three groups were comparable with respect to sex distribution, age, body mass index (BMI), smoking status, and alcohol consumption.</p>
<table-wrap position="float" id="T1">
<label>TABLE 1</label>
<caption><p>Demographic characteristics of the participants.</p></caption>
<table cellspacing="5" cellpadding="5" frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left">Variable</th>
<th valign="top" align="center">Lung cancer (<italic>N</italic> = 180)</th>
<th valign="top" align="center">Pneumonia (<italic>N</italic> = 288)</th>
<th valign="top" align="center">Healthy participants (<italic>N</italic> = 180)</th>
<th valign="top" align="center"><italic>P</italic>-values</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Sex (male)</td>
<td valign="top" align="center">96 (53.3)</td>
<td valign="top" align="center">120 (52.6)</td>
<td valign="top" align="center">101 (56.1)</td>
<td valign="top" align="center">0.052</td>
</tr>
<tr>
<td valign="top" align="left">Age, years</td>
<td valign="top" align="center">53.85 &#x00B1; 17.94</td>
<td valign="top" align="center">55.28 &#x00B1; 17.66</td>
<td valign="top" align="center">51.06 &#x00B1; 16.91</td>
<td valign="top" align="center">0.051</td>
</tr>
<tr>
<td valign="top" align="left">Body mass index, kg/m<sup>2</sup></td>
<td valign="top" align="center">23.3 &#x00B1; 6.3</td>
<td valign="top" align="center">22.8 &#x00B1; 4.8</td>
<td valign="top" align="center">23.2 &#x00B1; 4.1</td>
<td valign="top" align="center">0.555</td>
</tr>
<tr>
<td valign="top" align="left" colspan="5"><bold>Smoking status</bold></td>
</tr>
<tr>
<td valign="top" align="left">Never</td>
<td valign="top" align="center">95 (52.8)</td>
<td valign="top" align="center">125 (54.8)</td>
<td valign="top" align="center">96 (53.3)</td>
<td valign="top" align="center">0.973</td>
</tr>
<tr>
<td valign="top" align="left">Current</td>
<td valign="top" align="center">54 (30.0)</td>
<td valign="top" align="center">67 (29.4)</td>
<td valign="top" align="center">48 (26.7)</td>
<td valign="top" align="center">0.755</td>
</tr>
<tr>
<td valign="top" align="left">Quit within past year</td>
<td valign="top" align="center">31 (17.2)</td>
<td valign="top" align="center">36 (15.8)</td>
<td valign="top" align="center">36 (20.0)</td>
<td valign="top" align="center">0.536</td>
</tr>
<tr>
<td valign="top" align="left" colspan="5"><bold>Alcohol use</bold></td>
</tr>
<tr>
<td valign="top" align="left">Never</td>
<td valign="top" align="center">93 (51.7)</td>
<td valign="top" align="center">124 (54.4)</td>
<td valign="top" align="center">96 (53.3)</td>
<td valign="top" align="center">0.861</td>
</tr>
<tr>
<td valign="top" align="left">Occasionally</td>
<td valign="top" align="center">69 (38.3)</td>
<td valign="top" align="center">70 (30.7)</td>
<td valign="top" align="center">48 (26.7)</td>
<td valign="top" align="center">0.054</td>
</tr>
<tr>
<td valign="top" align="left">Regular</td>
<td valign="top" align="center">18 (10.0)</td>
<td valign="top" align="center">34 (14.9)</td>
<td valign="top" align="center">36 (20.0)</td>
<td valign="top" align="center">0.383</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn><p>Data are (%) or mean &#x00B1; SD.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>Male participants accounted for 53.3% of the lung cancer group, 52.6% of the pneumonia group, and 56.1% of the healthy control group. Mean age was 53.85 &#x00B1; 17.94 years in the lung cancer group, 55.28 &#x00B1; 17.66 years in the pneumonia group, and 51.06 &#x00B1; 16.91 years in healthy controls. Mean BMI values were similar across groups. Smoking status and alcohol consumption patterns were also comparable. With respect to smoking status, approximately half of participants in each group reported never having smoked, including 52.8% in the lung cancer group, 54.8% in the pneumonia group, and 53.3% among healthy participants. Current smokers comprised 30.0% of the lung cancer group, 29.4% of the pneumonia group, and 26.7% of the healthy participant group. Participants who had quit smoking within the past year accounted for 17.2%, 15.8%, and 20.0% of the lung cancer, pneumonia, and healthy groups, respectively. Approximately half of participants reported no alcohol use, including 51.7% of patients with lung cancer, 54.4% of patients with pneumonia, and 53.3% of healthy participants. Occasional alcohol use was reported by 38.3% of the lung cancer group, 30.7% of the pneumonia group, and 26.7% of healthy participants, whereas regular alcohol consumption was reported by 10.0%, 14.9%, and 20.0% of participants in the respective groups.</p>
</sec>
<sec id="S3.SS2">
<title>Differential VOC profiles in pairwise comparisons</title>
<p>Univariate analyses with FDR correction, complemented by exploratory multivariate screening, identified several VOCs that differed statistically between groups in pairwise comparisons (<xref ref-type="table" rid="T2">Table 2</xref>). However, across all comparisons, substantial overlap in VOC distributions was observed, as illustrated by boxplots (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figures 1</xref>&#x2013;<xref ref-type="supplementary-material" rid="DS1">3</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Results</xref>), indicating that no single compound provided clear separation at the individual level.</p>
<table-wrap position="float" id="T2">
<label>TABLE 2</label>
<caption><p>Differential VOCs identified in three comparison groups and their characteristics.</p></caption>
<table cellspacing="5" cellpadding="5" frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left">VOC_ID</th>
<th valign="top" align="left"><italic>P</italic>-value</th>
<th valign="top" align="left">Fold change</th>
<th valign="top" align="left">VIP</th>
<th valign="top" align="left">AUC</th>
<th valign="top" align="left">Molecular name</th>
<th valign="top" align="left">Molecular formula</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" colspan="7"><bold>Lung cancer vs. healthy participants</bold></td>
</tr>
<tr>
<td valign="top" align="left">VOC@510.013</td>
<td valign="top" align="left">0.000956</td>
<td valign="top" align="left">1.136551</td>
<td valign="top" align="left">1.65696</td>
<td valign="top" align="left">0.757</td>
<td valign="top" align="left">Heptane</td>
<td valign="top" align="left">C<sub>7</sub>H<sub>16</sub></td>
</tr>
<tr>
<td valign="top" align="left">VOC@529.907</td>
<td valign="top" align="left">0.02323</td>
<td valign="top" align="left">1.145522</td>
<td valign="top" align="left">2.12289</td>
<td valign="top" align="left">0.694</td>
<td valign="top" align="left">1-Methylthio-Propane</td>
<td valign="top" align="left">C<sub>4</sub>H<sub>10</sub>S</td>
</tr>
<tr>
<td valign="top" align="left">VOC@564.784</td>
<td valign="top" align="left">0.037502</td>
<td valign="top" align="left">0.619769</td>
<td valign="top" align="left">1.48415</td>
<td valign="top" align="left">0.589</td>
<td valign="top" align="left">6-Hydroxy-2-Hexanone</td>
<td valign="top" align="left">C<sub>6</sub>H<sub>12</sub>O<sub>2</sub></td>
</tr>
<tr>
<td valign="top" align="left">VOC@668.997</td>
<td valign="top" align="left">0.007528</td>
<td valign="top" align="left">1.473895</td>
<td valign="top" align="left">1.243582</td>
<td valign="top" align="left">0.662</td>
<td valign="top" align="left">Styrene</td>
<td valign="top" align="left">C<sub>8</sub>H<sub>8</sub></td>
</tr>
<tr>
<td valign="top" align="left">VOC@772.812</td>
<td valign="top" align="left">0.010957</td>
<td valign="top" align="left">0.663253</td>
<td valign="top" align="left">1.531225</td>
<td valign="top" align="left">0.664</td>
<td valign="top" align="left">o-Xylene</td>
<td valign="top" align="left">C<sub>8</sub>H<sub>10</sub></td>
</tr>
<tr>
<td valign="top" align="left" colspan="7"><bold>Pneumonia vs. healthy participants</bold></td>
</tr>
<tr>
<td valign="top" align="left">VOC@495.218</td>
<td valign="top" align="left">0.000167</td>
<td valign="top" align="left">5.43221</td>
<td valign="top" align="left">1.34295</td>
<td valign="top" align="left">0.82</td>
<td valign="top" align="left">1,4-Pentadiene</td>
<td valign="top" align="left">C<sub>5</sub>H<sub>8</sub></td>
</tr>
<tr>
<td valign="top" align="left">VOC@564.677</td>
<td valign="top" align="left">0.000006</td>
<td valign="top" align="left">3.488783</td>
<td valign="top" align="left">1.480356</td>
<td valign="top" align="left">0.823</td>
<td valign="top" align="left">Toluene</td>
<td valign="top" align="left">C<sub>7</sub>H<sub>8</sub></td>
</tr>
<tr>
<td valign="top" align="left">VOC@604.254</td>
<td valign="top" align="left">0.001514</td>
<td valign="top" align="left">2.125165</td>
<td valign="top" align="left">1.238068</td>
<td valign="top" align="left">0.723</td>
<td valign="top" align="left">Butyl acetate</td>
<td valign="top" align="left">C<sub>6</sub>H<sub>12</sub>O<sub>2</sub></td>
</tr>
<tr>
<td valign="top" align="left">VOC@637.426</td>
<td valign="top" align="left">0.001257</td>
<td valign="top" align="left">2.409148</td>
<td valign="top" align="left">1.213088</td>
<td valign="top" align="left">0.712</td>
<td valign="top" align="left">p-Xylene</td>
<td valign="top" align="left">C<sub>8</sub>H<sub>10</sub></td>
</tr>
<tr>
<td valign="top" align="left">VOC@823.009</td>
<td valign="top" align="left">0.000775</td>
<td valign="top" align="left">2.94702</td>
<td valign="top" align="left">1.423677</td>
<td valign="top" align="left">0.706</td>
<td valign="top" align="left">D-Limonene</td>
<td valign="top" align="left">C<sub>10</sub>H<sub>16</sub></td>
</tr>
<tr>
<td valign="top" align="left">VOC@862.358</td>
<td valign="top" align="left">0.000162</td>
<td valign="top" align="left">14.969418</td>
<td valign="top" align="left">1.430806</td>
<td valign="top" align="left">0.711</td>
<td valign="top" align="left">Iso-butyl nonyl carbonate</td>
<td valign="top" align="left">C<sub>14</sub>H<sub>28</sub>O<sub>3</sub></td>
</tr>
<tr>
<td valign="top" align="left" colspan="7"><bold>Lung cancer vs. pneumonia</bold></td>
</tr>
<tr>
<td valign="top" align="left">VOC@474.085</td>
<td valign="top" align="left">0.008075</td>
<td valign="top" align="left">0.44858</td>
<td valign="top" align="left">1.011842</td>
<td valign="top" align="left">0.76</td>
<td valign="top" align="left">2-Methylbutane</td>
<td valign="top" align="left">C<sub>5</sub>H<sub>12</sub></td>
</tr>
<tr>
<td valign="top" align="left">VOC@495.218</td>
<td valign="top" align="left">0.003852</td>
<td valign="top" align="left">0.334084</td>
<td valign="top" align="left">1.035689</td>
<td valign="top" align="left">0.766</td>
<td valign="top" align="left">1,4-Pentadiene</td>
<td valign="top" align="left">C<sub>5</sub>H<sub>8</sub></td>
</tr>
<tr>
<td valign="top" align="left">VOC@564.677</td>
<td valign="top" align="left">0.000004</td>
<td valign="top" align="left">0.280476</td>
<td valign="top" align="left">1.494594</td>
<td valign="top" align="left">0.809</td>
<td valign="top" align="left">Toluene</td>
<td valign="top" align="left">C<sub>7</sub>H<sub>8</sub></td>
</tr>
<tr>
<td valign="top" align="left">VOC@637.426</td>
<td valign="top" align="left">0.002517</td>
<td valign="top" align="left">0.420881</td>
<td valign="top" align="left">1.270734</td>
<td valign="top" align="left">0.737</td>
<td valign="top" align="left">p-Xylene</td>
<td valign="top" align="left">C<sub>8</sub>H<sub>10</sub></td>
</tr>
<tr>
<td valign="top" align="left">VOC@772.889</td>
<td valign="top" align="left">0.0049</td>
<td valign="top" align="left">0.24997</td>
<td valign="top" align="left">1.166884</td>
<td valign="top" align="left">0.756</td>
<td valign="top" align="left">o-Xylene</td>
<td valign="top" align="left">C<sub>8</sub>H<sub>10</sub></td>
</tr>
<tr>
<td valign="top" align="left">VOC@823.009</td>
<td valign="top" align="left">0.002798</td>
<td valign="top" align="left">0.39717</td>
<td valign="top" align="left">1.497614</td>
<td valign="top" align="left">0.691</td>
<td valign="top" align="left">&#x03B1;-Pinene</td>
<td valign="top" align="left">C<sub>10</sub>H<sub>16</sub></td>
</tr>
<tr>
<td valign="top" align="left">VOC@862.358</td>
<td valign="top" align="left">0.000196</td>
<td valign="top" align="left">0.078595</td>
<td valign="top" align="left">1.40654</td>
<td valign="top" align="left">0.711</td>
<td valign="top" align="left">Iso-butyl nonyl carbonate</td>
<td valign="top" align="left">C<sub>14</sub>H<sub>28</sub>O<sub>3</sub></td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn><p>VOC_ID, compound identification number; <italic>P</italic>-value, significance <italic>P</italic>-value for intergroup comparison; Fold Change, concentration change fold of disease group compared to control group (&#x003E;1 indicates upregulation, &#x003C;1 indicates downregulation); VIP, variable importance projection value (from OPLS-DA model); AUC, area under the curve when the compound is used as a single diagnostic marker. Screening criteria: <italic>P</italic> &#x003C; 0.05 and VIP &#x003E; 1.0.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>When comparing lung cancer patients with healthy controls, a limited subset of VOCs showed statistically significant differences in abundance. Similarly, comparisons between pneumonia and healthy controls, as well as between lung cancer and pneumonia, revealed multiple VOCs with differential expression. These differences were modest in magnitude and consistently characterized by overlapping interquartile ranges across groups. Importantly, the observed VOC patterns reflected group-level shifts rather than distinct disease-specific signatures, consistent with the exploratory nature of the analysis.</p>
<p>Although statistically significant VOC differences were detected between groups, marked overlap in distributions underscores that these findings represent exploratory group-level patterns rather than definitive biomarker signals.</p>
</sec>
<sec id="S3.SS3">
<title>Performance of machine learning models in binary classification</title>
<p>Machine learning analyses were conducted using pairwise binary classification frameworks to assess whether combinations of VOCs could improve group discrimination. Model performance metrics are summarized in <xref ref-type="table" rid="T3">Table 3</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Table 1</xref>, with ROC curves shown in <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
<table-wrap position="float" id="T3">
<label>TABLE 3</label>
<caption><p>Comparison between training and test set AUC.</p></caption>
<table cellspacing="5" cellpadding="5" frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left">Model</th>
<th valign="top" align="left">Training_AUC</th>
<th valign="top" align="left">Testing_AUC</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" colspan="3"><bold>Lung cancer vs. healthy volunteers</bold></td>
</tr>
<tr>
<td valign="top" align="left">LR</td>
<td valign="top" align="left">0.801</td>
<td valign="top" align="left">0.887</td>
</tr>
<tr>
<td valign="top" align="left">KNN</td>
<td valign="top" align="left">0.97</td>
<td valign="top" align="left">0.98</td>
</tr>
<tr>
<td valign="top" align="left">RF</td>
<td valign="top" align="left">0.998</td>
<td valign="top" align="left">0.98</td>
</tr>
<tr>
<td valign="top" align="left">SVC</td>
<td valign="top" align="left">0.79</td>
<td valign="top" align="left">0.857</td>
</tr>
<tr>
<td valign="top" align="left">XGBoost</td>
<td valign="top" align="left">0.947</td>
<td valign="top" align="left">0.875</td>
</tr>
<tr>
<td valign="top" align="left" colspan="3"><bold>Pneumonia vs. healthy volunteers</bold></td>
</tr>
<tr>
<td valign="top" align="left">LR</td>
<td valign="top" align="left">0.88</td>
<td valign="top" align="left">0.849</td>
</tr>
<tr>
<td valign="top" align="left">KNN</td>
<td valign="top" align="left">0.976</td>
<td valign="top" align="left">0.956</td>
</tr>
<tr>
<td valign="top" align="left">RF</td>
<td valign="top" align="left">0.992</td>
<td valign="top" align="left">0.942</td>
</tr>
<tr>
<td valign="top" align="left">SVC</td>
<td valign="top" align="left">0.87</td>
<td valign="top" align="left">0.862</td>
</tr>
<tr>
<td valign="top" align="left">XGBoost</td>
<td valign="top" align="left">0.925</td>
<td valign="top" align="left">0.886</td>
</tr>
<tr>
<td valign="top" align="left" colspan="3"><bold>Lung cancer vs. pneumonia</bold></td>
</tr>
<tr>
<td valign="top" align="left">LR</td>
<td valign="top" align="left">0.933</td>
<td valign="top" align="left">0.913</td>
</tr>
<tr>
<td valign="top" align="left">KNN</td>
<td valign="top" align="left">0.980</td>
<td valign="top" align="left">0.967</td>
</tr>
<tr>
<td valign="top" align="left">RF</td>
<td valign="top" align="left">0.989</td>
<td valign="top" align="left">0.983</td>
</tr>
<tr>
<td valign="top" align="left">SVC</td>
<td valign="top" align="left">0.907</td>
<td valign="top" align="left">0.889</td>
</tr>
<tr>
<td valign="top" align="left">XGBoost</td>
<td valign="top" align="left">0.930</td>
<td valign="top" align="left">0.932</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn><p>LR, logistic regression; KNN, k-nearest neighbors; RF, random forest; SVC, support vector machine; XGBoost, extreme gradient boosting. AUC, area under the receiver operating characteristic curve.</p></fn>
</table-wrap-foot>
</table-wrap>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption><p>Receiver operating characteristic (ROC) curves of differences of VOCs. Based on the selected characteristic VOCs and baseline characteristics, this study constructed five machine learning models to distinguish between different groups. The effectiveness of these models was evaluated by generating receiver operating characteristic (ROC) curves and calculating the area under the ROC curve (AUC). <bold>(A)</bold> Lung cancer vs. healthy participants. <bold>(B)</bold> Pneumonia vs. healthy participants. <bold>(C)</bold> Lung cancer vs. pneumonia.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmed-13-1741424-g001.tif">
<alt-text content-type="machine-generated">Receiver operating characteristic (ROC) curve comparing five machine learning models: Logistic Regression, KNN, Random Forest, SVM, and XGBoost. The plot shows sensitivity versus one minus specificity. The area under the curve (AUC) values with confidence intervals are listed for each model, with Random Forest (green) achieving the highest AUC of zero point nine eight three.</alt-text>
</graphic>
</fig>
<p>Across all pairwise comparisons, multivariate models achieved statistically robust discrimination, with AUC values consistently exceeding those expected by chance. While individual algorithms varied in performance, no single modeling approach uniformly outperformed others across all comparisons. Instead, model discrimination reflected the combined contribution of multiple VOC features, rather than reliance on any single compound.</p>
<p>Notably, despite favorable AUC values, overlap in predicted probabilities between groups was observed in all models, indicating limited separation at the individual subject level. These findings support the potential utility of VOC-based profiles for group discrimination in controlled settings, while emphasizing the current limitations for individual-level clinical classification.</p>
<p>Multivariate VOC-based models improved group-level discrimination compared with univariate analyses, but overlapping prediction distributions highlight the exploratory and hypothesis-generating nature of these results.</p>
</sec>
</sec>
<sec id="S4" sec-type="discussion">
<title>Discussion</title>
<p>In this study, we conducted an exploratory investigation into the feasibility of using a portable VOC detection device combined with machine learning algorithms to perform pairwise discrimination among three groups: lung cancer, pneumonia, and healthy individuals, based on exhaled breath analysis. The results demonstrate that this approach can differentiate between lung cancer and healthy controls, as well as between pneumonia and healthy controls, in pairwise binary comparisons, with preliminary classification performance reflected in the AUC, sensitivity, specificity, and accuracy metrics. It should be emphasized that this was an internal validation study based exclusively on binary classification models; no multi-class modeling or external validation was performed at this stage. These findings suggest the potential utility of breath-based VOC analysis as a non-invasive exploratory tool for further investigation in clinical differentiation scenarios. However, its applicability as a clinical diagnostic tool requires further validation through independent cohorts, multi-class studies, and rigorous clinical trials.</p>
<p>This study shows that models constructed based on volatile organic compounds achieved high AUC values in the binary classification of lung cancer and pneumonia, demonstrating their discriminative ability in distinguishing these two conditions. However, a high AUC value does not necessarily mean it can be directly applied to clinical diagnostic scenarios. In real clinical diagnostics, decision-making often involves multiclassification, different disease prevalence rates, and different misclassification risks, thus requiring a more comprehensive and interpretable evaluation. While AUC provides a measure of overall discriminative ability, it cannot directly reveal the clinical distribution of classification errors. In real clinical environments, practicality is determined by the complete confusion matrix, which reveals the specific nature of classification errors. Special attention should be paid to false positives (e.g., pneumonia patients misclassified as lung cancer, leading to unnecessary invasive examinations) and false negatives (e.g., lung cancer patients misclassified as pneumonia, resulting in critical diagnostic delays). Additionally, post-test probabilities, particularly the positive predictive value (PPV), highly depend on the prevalence of the target disease in the intended population. Therefore, it is crucial to report the specificity, sensitivity, and trade-offs for each category in clinical contexts. Disease prevalence has a critical impact on the clinical utility of model predictions. In populations with a low prevalence of lung cancer, even if the model has high sensitivity and specificity, the positive predictive value may still be low, leading to a high false-positive rate and unnecessary further examinations and patient anxiety. Thus, when evaluating whether a model is suitable for the target scenario, the expected disease prevalence in that context must be analyzed.</p>
<p>A key methodological consideration in breathomics research is the influence of physiological and pathological confounders. To minimize potential bias, we carefully controlled for age, sex, smoking status, and comorbidities across study groups. This is particularly important because several VOCs, including isoprene, alkanes, and methylated alkanes, have been reported to correlate with age-related metabolic changes (<xref ref-type="bibr" rid="B8">8</xref>&#x2013;<xref ref-type="bibr" rid="B10">10</xref>). In many previous studies, case groups were significantly older than controls, which may have confounded VOC-based discrimination. Similarly, smoking is a well-established risk factor for lung cancer and a major determinant of breath VOC composition. Matching smoking history between groups was therefore essential to reduce smoking-related bias. In addition, comorbid conditions unrelated to the target pulmonary diseases may independently alter VOC profiles; thus, harmonizing comorbidity distributions across groups was critical for identifying disease-associated VOC signatures rather than non-specific metabolic changes.</p>
<p>The present findings further highlight the advantages of integrating portable micro-GC-MS technology with machine learning models. Compared with conventional VOC analysis platforms such as GC-MS, PTR-MS, and electronic noses&#x2013;which typically require complex infrastructure, high costs, and prolonged analysis times&#x2013;the portable micro-GC system enables rapid, real-time VOC detection at the point of care. This portability and repeatability make the approach particularly attractive for longitudinal monitoring, disease screening, and potential deployment in resource-limited clinical environments.</p>
<p>Consistent with previous reports, the VOCs identified in lung cancer patients in this study predominantly belonged to the classes of alkanes, alkenes, ketones, and benzene derivatives (<xref ref-type="bibr" rid="B11">11</xref>). These compounds have been widely associated with altered lipid metabolism and oxidative stress in malignant tissues. From a mechanistic perspective, saturated and unsaturated hydrocarbons, such as heptane, 2-methylbutane, and 1,4-pentadiene, are thought to arise from lipid peroxidation of polyunsaturated fatty acids (PUFAs) within cell membrane phospholipids. Tumor cells are characterized by increased reactive oxygen species (ROS) production, which can induce oxidative damage to membrane lipids, proteins, and DNA. Enhanced lipid peroxidation may therefore plausibly contribute to elevated levels of volatile hydrocarbons detected in the exhaled breath of lung cancer patients. Likewise, ketone bodies and their derivatives may reflect altered mitochondrial &#x03B2;-oxidation of fatty acids, a metabolic pathway frequently upregulated in cancer cells (<xref ref-type="bibr" rid="B12">12</xref>). While these mechanisms provide a biologically plausible explanation, the present study was not designed to establish direct causal relationships between specific VOCs and underlying metabolic pathways.</p>
<p>In contrast, the pneumonia group exhibited a distinct VOC profile, with several compounds significantly elevated compared with healthy controls. Many of these VOCs have been previously associated with inflammatory responses and infectious processes. Aromatic hydrocarbons such as toluene and p-xylene have been reported to correlate with immune activation and inflammation (<xref ref-type="bibr" rid="B8">8</xref>, <xref ref-type="bibr" rid="B13">13</xref>). Butyl acetate has been linked to acute lung injury and may reflect oxidative stress or membrane disruption during inflammatory responses (<xref ref-type="bibr" rid="B9">9</xref>). D-limonene, a terpenoid compound with reported anti-inflammatory properties, may also reflect host immune responses to pulmonary infection. Collectively, these findings suggest that inflammatory and infectious processes contribute to disease-specific VOC signatures in pneumonia, although the precise biological origins of individual compounds remain to be fully elucidated.</p>
<p>The ability of VOCs to distinguish lung cancer histological subtypes remains controversial. Previous studies have reported inconsistent findings, ranging from no detectable differences between subtypes (<xref ref-type="bibr" rid="B10">10</xref>), to statistically significant differences in selected VOCs (<xref ref-type="bibr" rid="B14">14</xref>), to subtype-dependent VOC profiles without clear statistical correlations (<xref ref-type="bibr" rid="B15">15</xref>). In the present study, subgroup analyses were limited by sample size, and no definitive conclusions regarding histological subtype discrimination could be drawn. These inconsistencies across studies underscore the need for larger, well-powered cohorts and standardized analytical pipelines to clarify the relationship between VOC profiles and tumor histology.</p>
<p>Volatile organic compounds detected in exhaled breath may originate from endogenous host metabolism, microbial metabolism, or host&#x2013;pathogen interactions during infection or inflammation (<xref ref-type="bibr" rid="B16">16</xref>). In infectious lung diseases, invading microorganisms can generate a wide range of VOCs, including hydrocarbons, alcohols, ketones, and nitrogen- or sulfur-containing compounds. Previous studies have demonstrated pathogen-specific VOC patterns in pulmonary infections (<xref ref-type="bibr" rid="B17">17</xref>&#x2013;<xref ref-type="bibr" rid="B21">21</xref>). In this study, seven VOCs were identified as discriminatory between pneumonia patients and healthy controls. Several of these compounds&#x2013;such as pentadiene (<xref ref-type="bibr" rid="B22">22</xref>), toluene (<xref ref-type="bibr" rid="B23">23</xref>), p-xylene, limonene (<xref ref-type="bibr" rid="B24">24</xref>), and heptane&#x2013;have been reported previously, whereas isobutyl nonyl carbonate was identified for the first time in this context. This compound is widely used as an industrial raw material and is not known to be synthesized endogenously, suggesting an exogenous origin (<xref ref-type="bibr" rid="B25">25</xref>). The most likely sources include disinfectants in hospital settings, volatiles from medical equipment, or personal care items for patients (<xref ref-type="supplementary-material" rid="DS1">Supplementary Table 2</xref>). Given that its source is independent of the underlying pathophysiological process, isobutylcarbonate should be considered a potential contaminant. We hypothesize that during pulmonary infection, increased metabolic activity and ROS production may alter the absorption, metabolism, or clearance of exogenous VOCs, leading to elevated detectable levels. This interpretation remains speculative and warrants further investigation.</p>
<p>Distinguishing early-stage lung cancer from pneumonia is clinically challenging because of overlapping symptoms and imaging features. When directly comparing lung cancer and pneumonia, the present study identified disease-specific differences in VOC profiles. Exhaled breath contains thousands of metabolites derived from host tissues, the respiratory tract, and associated microbiota. Both commensal and pathogenic microorganisms can produce VOCs across diverse chemical classes, some of which may serve as disease biomarkers (<xref ref-type="bibr" rid="B26">26</xref>). Aromatic hydrocarbons have been reported to exhibit high diagnostic value for pulmonary infections, including COVID-19 severity stratification (<xref ref-type="bibr" rid="B27">27</xref>), and are generally considered to be of exogenous origin (<xref ref-type="bibr" rid="B28">28</xref>). Our findings are consistent with these observations. In addition, fungal VOCs&#x2013;such as terpenes including &#x03B1;-pinene and limonene&#x2013;have been associated with pulmonary fungal infections (<xref ref-type="bibr" rid="B29">29</xref>, <xref ref-type="bibr" rid="B30">30</xref>). The increased concentration of &#x03B1;-pinene observed in the pneumonia group in this study may therefore reflect microbial contributions to the breath VOC profile. This study aims to distinguish broad metabolic differences rather than VOCs targeting specific pathogens. Therefore, the next research is necessary to further analyze these VOC signals in a prospective cohort with clear etiological diagnosis, to identify which are the products of the host&#x2019;s general inflammatory response to infection and which are direct markers of specific microbial metabolism. This will greatly enhance the specificity of VOC diagnostic tools, enabling them not only to distinguish lung cancer from pneumonia but also to further differentiate the cause of infection within pneumonia, thereby achieving more precise clinical decision-making.</p>
<p>Although several VOCs showed statistically significant differences between groups, these findings should be interpreted cautiously. Statistical significance alone does not guarantee clinical discriminative value. Substantial overlap in VOC distributions between groups was observed, indicating that individual VOCs are unlikely to function as reliable standalone biomarkers. This highlights the importance of multi-marker approaches and integrative modeling strategies. Accordingly, the present study emphasizes the use of multivariate machine learning models rather than reliance on single VOCs. The identified VOCs should be viewed as candidate features contributing to composite diagnostic models, whose performance and generalizability must be validated in independent cohorts.</p>
<p>There have been studies that have shown that certain alkanes in some lung cancer patients&#x2019; exhalation levels decrease, while aromatic compounds increase in lung inflammation. This is consistent with our research. The differences in the types of hydrocarbons and esters between pneumonia and lung cancer have also been proven in previous studies, supporting the idea that inflammation and tumor microenvironment may have different effects on specific VOC metabolism. However, the types of VOCs that were not found in previous studies may be due to differences in lung cancer classification, different pathogens, or variations in sample collection methods, analysis platforms, or statistical strategies. In this study, the lung cancer-specific VOC patterns may be more complex in the analysis of pneumonia confounding factors, and some overlapping compounds may show different changes in different controls. This can help optimize the selection of model features. However, further validation of the reproducibility of key compounds and exploration of combination biomarkers and standardized analysis workflows are still needed to improve the specificity of disease diagnosis.</p>
<p>Although the model constructed in this study showed high AUC in internal validation, we did not evaluate the calibration performance of the model. This means that the risk probability output of the model may not accurately reflect the true risk, especially when the prevalence of the target population is different from that of this study cohort. The future need to jointly validate and optimize the model&#x2019;s discriminative and calibration performance in the external cohort.</p>
<p>Several limitations of this study should be acknowledged. First, the strict inclusion and exclusion criteria limit the generalizability of the findings. The results should be considered proof-of-concept and may not directly extend to patients with chronic respiratory diseases such as chronic obstructive pulmonary disease. Future studies should include more heterogeneous populations to assess model robustness in real-world settings. Second, although logistic regression was used for binary classification, other machine learning models capable of multi-class classification were also explored. The focus on binary differentiation between lung cancer and pneumonia reflects a clinically relevant diagnostic question; however, broader multi-disease classification frameworks warrant further investigation. Third, despite favorable model performance, the biological roles of individual VOCs in disease pathophysiology remain incompletely understood. Mechanistic studies are needed to clarify the sources and functional relevance of these compounds.</p>
<p>In conclusion, this study provides preliminary evidence that combining machine learning with exhaled VOC analysis offers a viable approach for the non-invasive differentiation of lung cancer and pneumonia from healthy controls. The methodological framework established here represents a foundational step toward addressing the clinical challenge of distinguishing these radiographically overlapping conditions. However, these findings are hypothesis-generating. Their potential to facilitate earlier diagnosis or improve clinical decision-making requires rigorous validation through multiclass modeling, external testing in independent cohorts, and prospective evaluation in real-world triage settings before any clinical application can be considered.</p>
</sec>
</body>
<back>
<sec id="S5" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in this study are included in this article/<xref ref-type="supplementary-material" rid="DS1">Supplementary material</xref>, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="S6" sec-type="ethics-statement">
<title>Ethics statement</title>
<p>The studies involving humans were approved by Ethics Committee of Hengshui People&#x2019;s Hospital. The studies were conducted in accordance with the local legislation and institutional requirements. The participants provided their written informed consent to participate in this study.</p>
</sec>
<sec id="S7" sec-type="author-contributions">
<title>Author contributions</title>
<p>JW: Writing &#x2013; original draft, Methodology, Writing &#x2013; review &#x0026; editing. HL: Investigation, Data curation, Writing &#x2013; review &#x0026; editing. JY: Formal analysis, Validation, Writing &#x2013; review &#x0026; editing, Visualization. YS: Supervision, Writing &#x2013; review &#x0026; editing, Visualization. NW: Validation, Writing &#x2013; review &#x0026; editing, Supervision. WG: Supervision, Writing &#x2013; review &#x0026; editing, Visualization, Validation. ZC: Formal analysis, Data curation, Supervision, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec id="S9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="S10" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was used in the creation of this manuscript. The article was written using the generative artificial intelligence tool DeepSeek, which assisted in translating and refining the text with the help of Baidu Translate. All content generated by AI has been independently verified by the author. The author is responsible for the final content, viewpoint, and data accuracy of the entire article.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec id="S11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="S12" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fmed.2026.1741424/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fmed.2026.1741424/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.docx" id="DS1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bray</surname> <given-names>F</given-names></name> <name><surname>Laversanne</surname> <given-names>M</given-names></name> <name><surname>Sung</surname> <given-names>H</given-names></name> <name><surname>Ferlay</surname> <given-names>J</given-names></name> <name><surname>Siegel</surname> <given-names>RL</given-names></name> <name><surname>Soerjomataram</surname> <given-names>I</given-names></name><etal/></person-group> <article-title>Global cancer statistics 2022: globocan estimates of incidence and mortality worldwide for 36 cancers in 185 countries.</article-title> <source><italic>CA Cancer J Clin</italic>.</source> (<year>2024</year>) <volume>74</volume>:<fpage>229</fpage>&#x2013;<lpage>63</lpage>. <pub-id pub-id-type="doi">10.3322/caac.21834</pub-id> <pub-id pub-id-type="pmid">38572751</pub-id></mixed-citation></ref>
<ref id="B2">
<label>2.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>van Oort</surname> <given-names>PM</given-names></name> <name><surname>Povoa</surname> <given-names>P</given-names></name> <name><surname>Schnabel</surname> <given-names>R</given-names></name> <name><surname>Dark</surname> <given-names>P</given-names></name> <name><surname>Artigas</surname> <given-names>A</given-names></name> <name><surname>Bergmans</surname> <given-names>DCJJ</given-names></name><etal/></person-group> <article-title>The potential role of exhaled breath analysis in the diagnostic process of pneumonia-a systematic review.</article-title> <source><italic>J Breath Res</italic>.</source> (<year>2018</year>) <volume>12</volume>:<fpage>024001</fpage>. <pub-id pub-id-type="doi">10.1088/1752-7163/aaa499</pub-id> <pub-id pub-id-type="pmid">29292698</pub-id></mixed-citation></ref>
<ref id="B3">
<label>3.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gordon</surname> <given-names>SM</given-names></name> <name><surname>Szidon</surname> <given-names>JP</given-names></name> <name><surname>Krotoszynski</surname> <given-names>BK</given-names></name> <name><surname>Gibbons</surname> <given-names>RD</given-names></name> <name><surname>O&#x2019;Neill</surname> <given-names>HJ</given-names></name></person-group>. (<year>1985</year>). <article-title>Volatile organic compounds in exhaled air from patients with lung cancer.</article-title> <source><italic>Clin. Chem</italic></source>. <volume>31</volume>:<fpage>1278</fpage>&#x2013;<lpage>82</lpage>.</mixed-citation></ref>
<ref id="B4">
<label>4.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mohan</surname> <given-names>D</given-names></name> <name><surname>Keir</surname> <given-names>HR</given-names></name> <name><surname>Richardson</surname> <given-names>H</given-names></name> <name><surname>Mayhew</surname> <given-names>D</given-names></name> <name><surname>Boyer</surname> <given-names>J</given-names></name> <name><surname>van der Schee</surname> <given-names>MP</given-names></name><etal/></person-group> <article-title>Exhaled volatile organic com-pounds and lung microbiome in COPD:a pilot randomised controlledtrial.</article-title> <source><italic>ERJOpen Res.</italic></source> (<year>2021</year>) <volume>7</volume>:<fpage>253</fpage>&#x2013;<lpage>2021</lpage>. <pub-id pub-id-type="doi">10.1183/23120541.00253-2021</pub-id> <pub-id pub-id-type="pmid">34616836</pub-id></mixed-citation></ref>
<ref id="B5">
<label>5.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Righettoni</surname> <given-names>M</given-names></name> <name><surname>Schmid</surname> <given-names>A</given-names></name> <name><surname>Amann</surname> <given-names>A</given-names></name> <name><surname>Pratsinis</surname> <given-names>SE</given-names></name></person-group>. <article-title>Correlations between blood glucose and breath components from portable gas sensors and PTR-TOF-MS.</article-title> <source><italic>J Breath Res</italic>.</source> (<year>2013</year>) <volume>7</volume>:<fpage>037110</fpage>. <pub-id pub-id-type="doi">10.1088/1752-7155/7/3/037110</pub-id> <pub-id pub-id-type="pmid">23959908</pub-id></mixed-citation></ref>
<ref id="B6">
<label>6.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>De Vincentis</surname> <given-names>A</given-names></name> <name><surname>Pennazza</surname> <given-names>G</given-names></name> <name><surname>Santonico</surname> <given-names>M</given-names></name> <name><surname>Vespasiani-Gentilucci</surname> <given-names>U</given-names></name> <name><surname>Galati</surname> <given-names>G</given-names></name> <name><surname>Gallo</surname> <given-names>P</given-names></name><etal/></person-group> <article-title>Breath-print analysis by e-nose may refine risk stratification for adverse outcomes in cirrhotic patients.</article-title> <source><italic>Liver Int</italic>.</source> (<year>2016</year>) <volume>37</volume>:<fpage>242</fpage>&#x2013;<lpage>50</lpage>. <pub-id pub-id-type="doi">10.1111/liv.13214</pub-id> <pub-id pub-id-type="pmid">27496750</pub-id></mixed-citation></ref>
<ref id="B7">
<label>7.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ratiu</surname> <given-names>IA</given-names></name> <name><surname>Ligor</surname> <given-names>T</given-names></name> <name><surname>Bocos-Bintintan</surname> <given-names>V</given-names></name> <name><surname>Mayhew</surname> <given-names>CA</given-names></name> <name><surname>Buszewski</surname> <given-names>B</given-names></name></person-group>. <article-title>Volatile organic compounds in exhaled breath as fingerprints of lung cancer, asthma and COPD.</article-title> <source><italic>J Clin Med</italic>.</source> (<year>2020</year>) <volume>10</volume>:<fpage>32</fpage>. <pub-id pub-id-type="doi">10.3390/jcm10010032</pub-id> <pub-id pub-id-type="pmid">33374433</pub-id></mixed-citation></ref>
<ref id="B8">
<label>8.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>X</given-names></name> <name><surname>Chang</surname> <given-names>Y</given-names></name> <name><surname>Xu</surname> <given-names>C</given-names></name> <name><surname>Li</surname> <given-names>Y</given-names></name> <name><surname>Wang</surname> <given-names>Y</given-names></name> <name><surname>Sun</surname> <given-names>Y</given-names></name><etal/></person-group> <article-title>Association of volatile organic compound levels with chronic obstructive pulmonary diseases in NHANES 2013-2016.</article-title> <source><italic>Sci Rep</italic>.</source> (<year>2024</year>) <volume>14</volume>:<fpage>16085</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-024-67210-7</pub-id> <pub-id pub-id-type="pmid">38992113</pub-id></mixed-citation></ref>
<ref id="B9">
<label>9.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>Y</given-names></name> <name><surname>Han</surname> <given-names>X</given-names></name> <name><surname>Li</surname> <given-names>J</given-names></name> <name><surname>Zhang</surname> <given-names>L</given-names></name> <name><surname>Liu</surname> <given-names>Y</given-names></name> <name><surname>Jin</surname> <given-names>R</given-names></name><etal/></person-group> <article-title>Associations between the compositional patterns of blood volatile organic compounds and chronic respiratory diseases and ages at onset in NHANES 2003-2012.</article-title> <source><italic>Chemosphere</italic>.</source> (<year>2023</year>) <volume>327</volume>:<fpage>138425</fpage>. <pub-id pub-id-type="doi">10.1016/j.chemosphere.2023.138425</pub-id> <pub-id pub-id-type="pmid">36931402</pub-id></mixed-citation></ref>
<ref id="B10">
<label>10.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ligor</surname> <given-names>T</given-names></name> <name><surname>Pater</surname> <given-names>&#x0141;</given-names></name> <name><surname>Buszewski</surname> <given-names>B</given-names></name></person-group>. <article-title>Application of an artificial neural network model for selection of potential lung cancer biomarkers.</article-title> <source><italic>J Breath Res</italic>.</source> (<year>2015</year>) <volume>9</volume>:<fpage>027106</fpage>. <pub-id pub-id-type="doi">10.1088/1752-7155/9/2/027106</pub-id> <pub-id pub-id-type="pmid">25944812</pub-id></mixed-citation></ref>
<ref id="B11">
<label>11.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fan</surname> <given-names>X</given-names></name> <name><surname>Zhong</surname> <given-names>R</given-names></name> <name><surname>Liang</surname> <given-names>H</given-names></name> <name><surname>Zhong</surname> <given-names>Q</given-names></name> <name><surname>Huang</surname> <given-names>H</given-names></name> <name><surname>He</surname> <given-names>J</given-names></name><etal/></person-group> <article-title>Exhaled VOC detection in lung cancer screening: a comprehensive meta-analysis.</article-title> <source><italic>BMC Cancer</italic>.</source> (<year>2024</year>) <volume>24</volume>:<fpage>775</fpage>. <pub-id pub-id-type="doi">10.1186/s12885-024-12537-7</pub-id> <pub-id pub-id-type="pmid">38937687</pub-id></mixed-citation></ref>
<ref id="B12">
<label>12.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hakim</surname> <given-names>M</given-names></name> <name><surname>Broza</surname> <given-names>YY</given-names></name> <name><surname>Barash</surname> <given-names>O</given-names></name> <name><surname>Peled</surname> <given-names>N</given-names></name> <name><surname>Phillips</surname> <given-names>M</given-names></name> <name><surname>Amann</surname> <given-names>A</given-names></name><etal/></person-group> <article-title>Volatile organic compounds of lung cancer and possible biochemical pathways.</article-title> <source><italic>Chem Rev</italic>.</source> (<year>2012</year>) <volume>112</volume>:<fpage>5949</fpage>&#x2013;<lpage>66</lpage>. <pub-id pub-id-type="doi">10.1021/cr300174a</pub-id> <pub-id pub-id-type="pmid">22991938</pub-id></mixed-citation></ref>
<ref id="B13">
<label>13.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Everson</surname> <given-names>F</given-names></name> <name><surname>Martens</surname> <given-names>DS</given-names></name> <name><surname>Nawrot</surname> <given-names>TS</given-names></name> <name><surname>Goswami</surname> <given-names>N</given-names></name> <name><surname>Mthethwa</surname> <given-names>M</given-names></name> <name><surname>Webster</surname> <given-names>I</given-names></name><etal/></person-group> <article-title>Personal exposure to NO2 and benzene in the Cape Town region of South Africa is associated with shorter leukocyte telomere length in women.</article-title> <source><italic>Environ Res</italic>.</source> (<year>2020</year>) <volume>182</volume>:<fpage>108993</fpage>. <pub-id pub-id-type="doi">10.1016/j.envres.2019.108993</pub-id> <pub-id pub-id-type="pmid">31830692</pub-id></mixed-citation></ref>
<ref id="B14">
<label>14.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fu</surname> <given-names>XA</given-names></name> <name><surname>Li</surname> <given-names>M</given-names></name> <name><surname>Knipp</surname> <given-names>RJ</given-names></name> <name><surname>Nantz</surname> <given-names>MH</given-names></name> <name><surname>Bousamra</surname> <given-names>M</given-names></name></person-group>. <article-title>Noninvasive detection of lung cancer using exhaled breath.</article-title> <source><italic>Cancer Med</italic>.</source> (<year>2014</year>) <volume>3</volume>:<fpage>174</fpage>&#x2013;<lpage>81</lpage>. <pub-id pub-id-type="doi">10.1002/cam4.162</pub-id> <pub-id pub-id-type="pmid">24402867</pub-id></mixed-citation></ref>
<ref id="B15">
<label>15.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Temerdashev</surname> <given-names>AZ</given-names></name> <name><surname>Gashimova</surname> <given-names>EM</given-names></name> <name><surname>Porkhanov</surname> <given-names>VA</given-names></name> <name><surname>Polyakov</surname> <given-names>IS</given-names></name> <name><surname>Perunov</surname> <given-names>DV</given-names></name> <name><surname>Dmitrieva</surname> <given-names>EV</given-names></name></person-group>. <article-title>Non-Invasive lung cancer diagnostics through metabolites in exhaled breath: influence of the disease variability and comorbidities.</article-title> <source><italic>Metabolites.</italic></source> (<year>2023</year>) <volume>13</volume>:<fpage>203</fpage>. <pub-id pub-id-type="doi">10.3390/metabo13020203</pub-id> <pub-id pub-id-type="pmid">36837822</pub-id></mixed-citation></ref>
<ref id="B16">
<label>16.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>van der Schee</surname> <given-names>MP</given-names></name> <name><surname>Paff</surname> <given-names>T</given-names></name> <name><surname>Brinkman</surname> <given-names>P</given-names></name> <name><surname>van Aalderen</surname> <given-names>WMC</given-names></name> <name><surname>Haarman</surname> <given-names>EG</given-names></name> <name><surname>Sterk</surname> <given-names>PJ</given-names></name></person-group>. <article-title>Breathomics in lung disease.</article-title> <source><italic>Chest</italic>.</source> (<year>2015</year>) <volume>147</volume>:<fpage>224</fpage>&#x2013;<lpage>31</lpage>. <pub-id pub-id-type="doi">10.1378/chest.14-0781</pub-id> <pub-id pub-id-type="pmid">25560860</pub-id></mixed-citation></ref>
<ref id="B17">
<label>17.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bos</surname> <given-names>LD</given-names></name> <name><surname>Sterk</surname> <given-names>PJ</given-names></name> <name><surname>Schultz</surname> <given-names>MJ</given-names></name></person-group>. <article-title>Volatile metabolites of pathogens: a systematic review.</article-title> <source><italic>PLoS Pathog</italic>.</source> (<year>2013</year>) <volume>9</volume>:<fpage>e1003311</fpage>. <pub-id pub-id-type="doi">10.1371/journal.ppat.1003311</pub-id> <pub-id pub-id-type="pmid">23675295</pub-id></mixed-citation></ref>
<ref id="B18">
<label>18.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Traxler</surname> <given-names>Yamei S</given-names></name> <name><surname>Barkowsky</surname> <given-names>G</given-names></name> <name><surname>Sa&#x00DF;</surname> <given-names>R</given-names></name> <name><surname>Klemenz</surname> <given-names>AC</given-names></name> <name><surname>Patenge</surname> <given-names>N</given-names></name> <name><surname>Kreikemeyer</surname> <given-names>B</given-names></name><etal/></person-group> <article-title>Volatile scents of influenza A and S. pyogenes (co-)infected cells.</article-title> <source><italic>Sci Rep</italic>.</source> (<year>2019</year>) <volume>9</volume>:<fpage>18894</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-019-55334-0</pub-id> <pub-id pub-id-type="pmid">31827195</pub-id></mixed-citation></ref>
<ref id="B19">
<label>19.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Abd El Qader</surname> <given-names>A</given-names></name> <name><surname>Lieberman</surname> <given-names>D</given-names></name> <name><surname>Shemer Avni</surname> <given-names>Y</given-names></name> <name><surname>Svobodin</surname> <given-names>N</given-names></name> <name><surname>Lazarovitch</surname> <given-names>T</given-names></name> <name><surname>Sagi</surname> <given-names>O</given-names></name><etal/></person-group> <article-title>Volatile organic compounds generated by cultures of bacteria and viruses associated with respiratory infections.</article-title> <source><italic>Biomed Chromatogr.</italic></source> (<year>2015</year>) <volume>29</volume>:<fpage>1783</fpage>&#x2013;<lpage>90</lpage>. <pub-id pub-id-type="doi">10.1002/bmc.3494</pub-id> <pub-id pub-id-type="pmid">26033043</pub-id></mixed-citation></ref>
<ref id="B20">
<label>20.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Traxler</surname> <given-names>S</given-names></name> <name><surname>Bischoff</surname> <given-names>AC</given-names></name> <name><surname>Sa&#x00DF;</surname> <given-names>R</given-names></name> <name><surname>Trefz</surname> <given-names>P</given-names></name> <name><surname>Gierschner</surname> <given-names>P</given-names></name> <name><surname>Brock</surname> <given-names>B</given-names></name><etal/></person-group> <article-title>VOC breath profile in spontaneously breathing awake swine during Influenza A infection.</article-title> <source><italic>Sci Rep</italic>.</source> (<year>2018</year>) <volume>8</volume>:<fpage>14857</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-018-33061-2</pub-id> <pub-id pub-id-type="pmid">30291257</pub-id></mixed-citation></ref>
<ref id="B21">
<label>21.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mellors</surname> <given-names>TR</given-names></name> <name><surname>Rees</surname> <given-names>CA</given-names></name> <name><surname>Franchina</surname> <given-names>FA</given-names></name> <name><surname>Burklund</surname> <given-names>A</given-names></name> <name><surname>Patel</surname> <given-names>C</given-names></name> <name><surname>Hathaway</surname> <given-names>LJ</given-names></name><etal/></person-group> <article-title>The volatile molecular profiles of seven <italic>Streptococcus pneumoniae</italic> serotypes.</article-title> <source><italic>J Chromatogr B Analyt Technol Biomed Life Sci</italic>.</source> (<year>2018</year>) <volume>1096</volume>:<fpage>208</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1016/j.jchromb.2018.08.032</pub-id> <pub-id pub-id-type="pmid">30179753</pub-id></mixed-citation></ref>
<ref id="B22">
<label>22.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Basanta</surname> <given-names>M</given-names></name> <name><surname>Jarvis</surname> <given-names>RM</given-names></name> <name><surname>Xu</surname> <given-names>Y</given-names></name> <name><surname>Blackburn</surname> <given-names>G</given-names></name> <name><surname>Tal-Singer</surname> <given-names>R</given-names></name> <name><surname>Woodcock</surname> <given-names>A</given-names></name><etal/></person-group> <article-title>Non-invasive metabolomic analysis of breath using differential mobility spectrometry in patients with chronic obstructive pulmonary disease and healthy smokers.</article-title> <source><italic>Analyst</italic>.</source> (<year>2010</year>) <volume>135</volume>:<fpage>315</fpage>&#x2013;<lpage>20</lpage>. <pub-id pub-id-type="doi">10.1039/b916374c</pub-id> <pub-id pub-id-type="pmid">20098764</pub-id></mixed-citation></ref>
<ref id="B23">
<label>23.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Amann</surname> <given-names>A</given-names></name> <name><surname>Costello Bde</surname> <given-names>L</given-names></name> <name><surname>Miekisch</surname> <given-names>W</given-names></name> <name><surname>Schubert</surname> <given-names>J</given-names></name> <name><surname>Buszewski</surname> <given-names>B</given-names></name> <name><surname>Pleil</surname> <given-names>J</given-names></name><etal/></person-group> <article-title>The human volatilome: volatile organic compounds (VOCs) in exhaled breath, skin emanations, urine, feces and saliva.</article-title> <source><italic>J Breath Res</italic>.</source> (<year>2014</year>) <volume>8</volume>:<fpage>034001</fpage>. <pub-id pub-id-type="doi">10.1088/1752-7155/8/3/034001</pub-id> <pub-id pub-id-type="pmid">24946087</pub-id></mixed-citation></ref>
<ref id="B24">
<label>24.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tian</surname> <given-names>J</given-names></name> <name><surname>Zhang</surname> <given-names>Q</given-names></name> <name><surname>Peng</surname> <given-names>M</given-names></name> <name><surname>Guo</surname> <given-names>L</given-names></name> <name><surname>Zhao</surname> <given-names>Q</given-names></name> <name><surname>Lin</surname> <given-names>W</given-names></name><etal/></person-group> <article-title>Exhaled volatile organic compounds as novel biomarkers for early detection of COPD, asthma, and PRISm: a cross-sectional study.</article-title> <source><italic>Respir Res</italic>.</source> (<year>2025</year>) <volume>26</volume>:<fpage>173</fpage>. <pub-id pub-id-type="doi">10.1186/s12931-025-03242-5</pub-id> <pub-id pub-id-type="pmid">40325477</pub-id></mixed-citation></ref>
<ref id="B25">
<label>25.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Meng</surname> <given-names>K</given-names></name></person-group>. <article-title>2019&#x2013;2020 developments in the global plastics industry (II): engineering plastics.</article-title> <source><italic>Plastics Industry.</italic></source> (<year>2021</year>) <volume>49</volume>:<fpage>1</fpage>&#x2013;<lpage>10</lpage>.</mixed-citation></ref>
<ref id="B26">
<label>26.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Beliz&#x00E1;rio</surname> <given-names>JE</given-names></name> <name><surname>Faintuch</surname> <given-names>J</given-names></name> <name><surname>Malpartida</surname> <given-names>MG</given-names></name></person-group>. <article-title>Breath biopsy and discovery of exclusive volatile organic compounds for diagnosis of infectious diseases.</article-title> <source><italic>Front Cell Infect Microbiol</italic>.</source> (<year>2021</year>) <volume>10</volume>:<fpage>564194</fpage>. <pub-id pub-id-type="doi">10.3389/fcimb.2020.564194</pub-id> <pub-id pub-id-type="pmid">33520731</pub-id></mixed-citation></ref>
<ref id="B27">
<label>27.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Esteban</surname> <given-names>P</given-names></name> <name><surname>Letona-Gimenez</surname> <given-names>S</given-names></name> <name><surname>Domingo</surname> <given-names>MP</given-names></name> <name><surname>Morte</surname> <given-names>E</given-names></name> <name><surname>Pellejero-Sagastizabal</surname> <given-names>G</given-names></name> <name><surname>Del Mar Encabo</surname> <given-names>M</given-names></name><etal/></person-group> <article-title>Combination of exhaled volatile organic compounds with serum biomarkers predicts respiratory infection severity.</article-title> <source><italic>Pulmonology</italic>.</source> (<year>2025</year>) <volume>31</volume>:<fpage>2477911</fpage>. <pub-id pub-id-type="doi">10.1080/25310429.2025.2477911</pub-id> <pub-id pub-id-type="pmid">40152323</pub-id></mixed-citation></ref>
<ref id="B28">
<label>28.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Oxner</surname> <given-names>M</given-names></name> <name><surname>Trang</surname> <given-names>A</given-names></name> <name><surname>Mehta</surname> <given-names>J</given-names></name> <name><surname>Forsyth</surname> <given-names>C</given-names></name> <name><surname>Swanson</surname> <given-names>B</given-names></name> <name><surname>Keshavarzian</surname> <given-names>A</given-names></name><etal/></person-group> <article-title>The versatility and diagnostic potential of VOC profiling for noninfectious diseases.</article-title> <source><italic>BME Front</italic>.</source> (<year>2023</year>) <volume>4</volume>:<fpage>0002</fpage>. <pub-id pub-id-type="doi">10.34133/bmef.0002</pub-id> <pub-id pub-id-type="pmid">37849665</pub-id></mixed-citation></ref>
<ref id="B29">
<label>29.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bennett</surname> <given-names>JW</given-names></name> <name><surname>Moore</surname> <given-names>GG</given-names></name></person-group>. <article-title>Fungal volatile organic compounds.</article-title> <source><italic>Curr Biol</italic>.</source> (<year>2025</year>) <volume>35</volume>:<fpage>R508</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1016/j.cub.2025.03.007</pub-id> <pub-id pub-id-type="pmid">40494305</pub-id></mixed-citation></ref>
<ref id="B30">
<label>30.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gashimova</surname> <given-names>E</given-names></name> <name><surname>Temerdashev</surname> <given-names>A</given-names></name> <name><surname>Perunov</surname> <given-names>D</given-names></name> <name><surname>Porkhanov</surname> <given-names>V</given-names></name> <name><surname>Polyakov</surname> <given-names>I</given-names></name></person-group>. <article-title>Diagnosis of lung cancer through exhaled breath: a comprehensive study.</article-title> <source><italic>Mol Diagn Ther</italic>.</source> (<year>2024</year>) <volume>28</volume>:<fpage>847</fpage>&#x2013;<lpage>60</lpage>. <pub-id pub-id-type="doi">10.1007/s40291-024-00744-8</pub-id> <pub-id pub-id-type="pmid">39299985</pub-id></mixed-citation></ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by"><p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/713396/overview">Venkata Ramireddy Narala</ext-link>, Yogi Vemana University, India</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by"><p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1316815/overview">Sergey N. Avdeev</ext-link>, I.M. Sechenov First Moscow State Medical University, Russia</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3295337/overview">Jorrit Van Poelgeest</ext-link>, Amsterdam UMC - Location VUMC, Netherlands</p></fn>
</fn-group>
</back>
</article>