<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Med.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Medicine</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Med.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2296-858X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmed.2026.1752113</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Development and internal validation of a machine learning&#x2013;based prediction model for pulmonary hypertension in COPD</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Wang</surname>
<given-names>Ruoyu</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn0001"><sup>&#x2020;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3290065"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Tan</surname>
<given-names>Jie</given-names>
</name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn0001"><sup>&#x2020;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Guangping</given-names>
</name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Pan</surname>
<given-names>Zhenyu</given-names>
</name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Guo</surname>
<given-names>Huiling</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Sun</surname>
<given-names>Wei</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3099511"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Wang</surname>
<given-names>Jing</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3229525"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Department of Respiratory and Critical Care Medicine, Beijing Chaoyang Hospital, Capital Medical University</institution>, <city>Beijing</city>, <country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>School of Information Engineering, Guangdong University of Technology</institution>, <city>Guangzhou</city>, <country country="cn">China</country></aff>
<aff id="aff3"><label>3</label><institution>Department of Radiology, Beijing Chaoyang Hospital, Capital Medical University</institution>, <city>Beijing</city>, <country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>&#x002A;</label>Correspondence: Jing Wang, <email xlink:href="mailto:wangjingdoc@126.com">wangjingdoc@126.com</email></corresp>
<fn fn-type="equal" id="fn0001">
<label>&#x2020;</label>
<p>These authors have contributed equally to this work</p>
</fn>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-18">
<day>18</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>13</volume>
<elocation-id>1752113</elocation-id>
<history>
<date date-type="received">
<day>22</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>22</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>02</day>
<month>02</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2026 Wang, Tan, Li, Pan, Guo, Sun and Wang.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Wang, Tan, Li, Pan, Guo, Sun and Wang</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-18">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Background</title>
<p>Chronic obstructive pulmonary disease (COPD) is frequently complicated by pulmonary hypertension (PH), which worsens prognosis, but early PH detection is limited by the invasiveness or suboptimal sensitivity of current diagnostic tools.</p>
</sec>
<sec>
<title>Methods</title>
<p>In this retrospective study, we analyzed 523 hospitalized patients with COPD from Beijing Chaoyang Hospital. After standardized preprocessing and recursive feature elimination, 18 routinely available noninvasive clinical and physiological variables were retained as predictors. Eight machine-learning algorithms were trained to predict PH and compared using area under the receiver operating characteristic curve (AUC), accuracy, sensitivity, specificity, F1 score, and decision-curve analysis; model interpretability was assessed with Shapley additive explanations (SHAP).</p>
</sec>
<sec>
<title>Results</title>
<p>The CatBoost model showed the best discrimination (AUC 0.848; accuracy 0.830; sensitivity 0.758; specificity 0.866; F1 0.746). SHAP analysis identified right ventricular diameter, pulmonary artery diameter, arterial partial pressure of carbon dioxide, right atrial transverse diameter, and age as the most influential predictors.</p>
</sec>
<sec>
<title>Conclusion</title>
<p>A CatBoost-based prediction model using readily obtainable noninvasive variables can estimate PH risk in COPD with good accuracy and provide transparent feature-level explanations, potentially facilitating earlier detection and risk-stratified management.</p>
</sec>
</abstract>
<kwd-group>
<kwd>CatBoost algorithm</kwd>
<kwd>chronic obstructive pulmonary disease</kwd>
<kwd>clinical prediction model</kwd>
<kwd>feature selection</kwd>
<kwd>machine learning</kwd>
<kwd>pulmonary hypertension</kwd>
<kwd>SHAP</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement>
</funding-group>
<counts>
<fig-count count="12"/>
<table-count count="4"/>
<equation-count count="0"/>
<ref-count count="39"/>
<page-count count="16"/>
<word-count count="8241"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Pulmonary Medicine</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec1">
<label>1</label>
<title>Introduction</title>
<sec id="sec2">
<label>1.1</label>
<title>Disease burden in COPD</title>
<p>Chronic obstructive pulmonary disease (COPD) causes long-term blockage of airflow and is a major cause of illness and death, and it greatly reduces quality of life. Long-lasting inflammation in the airways and changes in airway structure are common, and pulmonary hypertension (PH) is another important factor that affects outcome (<xref ref-type="bibr" rid="ref1">1</xref>, <xref ref-type="bibr" rid="ref2">2</xref>). The link between COPD and PH comes from several processes. These include changes in the lung blood vessels, inflammatory responses driven by cytokine signals, and the control of genetic risk. Lung tissue (parenchymal) damage relates to how often PH occurs and how severe it is, but there is a lot of variation between patients: some people with only mild changes in the lungs develop severe PH, while others with advanced disease have only small increases in pulmonary artery pressure (<xref ref-type="bibr" rid="ref2 ref3 ref4">2&#x2013;4</xref>). This mismatch between lung tissue damage and blood vessel changes suggests that changes inside the vessels, inflammatory mediators, and genetic factors may all work together to cause PH in COPD, even when parenchymal disease is not very severe (<xref ref-type="bibr" rid="ref5">5</xref>, <xref ref-type="bibr" rid="ref6">6</xref>).</p>
</sec>
<sec id="sec3">
<label>1.2</label>
<title>Clinical importance of PH in COPD</title>
<p>Because of these differences between patients and their effects on outcome, it is very important to tell apart pulmonary hypertension and non-pulmonary hypertension in people with chronic obstructive pulmonary disease. This is important because it affects treatment plans, decisions about who needs urgent care, and how often patients should be checked (<xref ref-type="bibr" rid="ref7">7</xref>). Early detection enables timely, targeted interventions, which are associated with a reduction in hospitalizations and an improvement in quality of life (<xref ref-type="bibr" rid="ref8">8</xref>). Precise differentiation between COPD with and without PH also supports phenotype-informed treatment strategies (<xref ref-type="bibr" rid="ref9">9</xref>). Despite its clinical importance, PH is still frequently missed or recognized late in patients with COPD.</p>
</sec>
<sec id="sec4">
<label>1.3</label>
<title>Diagnostic limitations in detecting PH</title>
<p>Early detection of PH in COPD is hampered by nonspecific symptoms such as exertional dyspnea and chest tightness, which are difficult to attribute to vascular versus airway pathology and often delay further investigation. Delayed diagnosis is associated with worse outcomes (<xref ref-type="bibr" rid="ref10">10</xref>, <xref ref-type="bibr" rid="ref11">11</xref>). Right heart catheterization remains the diagnostic gold standard, but its invasiveness and resource requirements limit use in routine practice (<xref ref-type="bibr" rid="ref12 ref13 ref14">12&#x2013;14</xref>). Echocardiography provides noninvasive assessment of right-heart structure and pressure surrogates but cannot deliver continuous monitoring and may be constrained by hyperinflation-related acoustic windows (<xref ref-type="bibr" rid="ref15">15</xref>, <xref ref-type="bibr" rid="ref16">16</xref>). In low- and middle-income countries, shortages of trained personnel and equipment further contribute to underdiagnosis and fragmented longitudinal management (<xref ref-type="bibr" rid="ref17">17</xref>). Several inexpensive and widely available biomarkers&#x2014;such as mean platelet volume (MPV), red blood cell distribution width (RDW), brain natriuretic peptide (BNP), the pulmonary artery&#x2013;to&#x2013;aortic diameter ratio, and the neutrophil-to-lymphocyte ratio (NLR)&#x2014;have been associated with PH risk in COPD (<xref ref-type="bibr" rid="ref18 ref19 ref20 ref21">18&#x2013;21</xref>). But when these biomarkers are used alone, they do not have enough power to clearly separate patients into different risk groups. Because of this problem, researchers are studying machine learning (ML) methods that bring together different kinds of data and improve diagnostic accuracy.</p>
</sec>
<sec id="sec5">
<label>1.4</label>
<title>Advances in machine learning for COPD&#x2013;PH detection</title>
<p>ML methods can use many types of data in COPD at the same time, such as clinical features, pulmonary function tests, vascular indicators from CT, and blood biomarkers. By using these different inputs together, ML models can find complex risk patterns. With these high-dimensional data, ML models can diagnose disease more accurately and more completely than methods that rely on fixed rules or a single measurement, and they can give quantitative support for decisions in clinical practice (<xref ref-type="bibr" rid="ref22">22</xref>, <xref ref-type="bibr" rid="ref23">23</xref>). Recent studies show that ML can increase the detection rate of pulmonary hypertension in patients with chronic lung disease, but reliable and easy-to-explain tools for pulmonary hypertension related to COPD are still not available.</p>
</sec>
<sec id="sec6">
<label>1.5</label>
<title>Research gaps and study objectives</title>
<p>ML-based methods may help provide individualized risk assessment and improve the accuracy of detecting PH in patients with COPD. But there are still questions about whether these methods work well in many different clinical settings, how clear their decisions are, and how easily they can be added to daily clinical work (<xref ref-type="bibr" rid="ref24">24</xref>, <xref ref-type="bibr" rid="ref25">25</xref>). Solving these problems is important for safe and wide use in clinical practice. Accordingly, this study aimed to develop and internally validate an interpretable ML-based risk-prediction model for PH in COPD. The model is designed to (a) estimate individualized PH risk, (b) clarify feature-level relationships between COPD and PH, and (c) support phenotype-informed therapeutic planning. The intended end-users are respiratory and cardiovascular specialists, and the intended use is early in-hospital risk screening and referral stratification among hospitalized patients with COPD, to help determine whether further advanced examinations, such as echocardiography or right heart catheterization are warranted.</p>
</sec>
</sec>
<sec sec-type="methods" id="sec7">
<label>2</label>
<title>Methods</title>
<sec id="sec8">
<label>2.1</label>
<title>Study design and setting</title>
<p>This single-center, retrospective, cross-sectional observational study was conducted at Beijing Chaoyang Hospital. We analyzed inpatients with COPD, with and without PH, admitted to the Department of Respiratory and Critical Care Medicine between January 2014 and September 2024. Clinical and laboratory data were obtained from the hospital electronic medical record system. The healthcare setting, eligibility criteria, outcome definition, and predictor measurements were identical for participants later allocated to the training and test sets.</p>
</sec>
<sec id="sec9">
<label>2.2</label>
<title>Study participants</title>
<p>Eligible participants were adults (aged 18&#x2013;95&#x202F;years) with a diagnosis of COPD confirmed by pulmonary function testing in accordance with Global Initiative for Chronic Obstructive Lung Disease (GOLD) guidelines.</p>
<p>We excluded patients with: (a) idiopathic pulmonary arterial hypertension (PAH), pulmonary thromboembolism (PTE), or PH associated with congenital heart disease; (b) severe cardiovascular or cerebrovascular disease, or hepatic or renal insufficiency; or (c) active malignancy. For patients with multiple hospitalizations during the study period, only the first eligible admission was considered to avoid duplicate observations.</p>
<p>All consecutive patients meeting the criteria during the accrual period were included, yielding a total of 523 hospitalized patients with COPD, of whom 176 (33.6%) met diagnostic criteria for COPD-PH (i.e., outcome events). According to commonly cited standards for multivariable prediction models, an events-per-predictor (EPV) ratio of at least 10 is often recommended to reduce overfitting and enhance parameter stability. In the present study, the final CatBoost model included 18 predictors and 176 outcome events, corresponding to an EPV of approximately 9.8, which is close to this conventional target and exceeds the minimum EPV threshold of 5 that some methodological work and guidelines still consider acceptable. This EPV suggests that the available data were adequate to support model development and internal validation.</p>
</sec>
<sec id="sec10">
<label>2.3</label>
<title>Definitions</title>
<sec id="sec11">
<label>2.3.1</label>
<title>COPD definition</title>
<p>In accordance with GOLD recommendations, COPD was defined as a post-bronchodilator ratio of forced expiratory volume in 1&#x202F;s to forced vital capacity (FEV&#x2081;/FVC)&#x202F;&#x003C;&#x202F;0.70 on spirometry, with FEV&#x2081; and FVC measured using standard post-bronchodilator protocols (<xref ref-type="bibr" rid="ref26">26</xref>).</p>
</sec>
<sec id="sec12">
<label>2.3.2</label>
<title>PH definition</title>
<p>(1) Right heart catheterization (RHC): PH was diagnosed when resting mean pulmonary arterial pressure (mPAP)&#x202F;&#x003E;&#x202F;20&#x202F;mm Hg, in line with contemporary ESC/ERS guidance; 43 patients met this definition. All 43 also fulfilled the echocardiographic criteria below (<xref ref-type="bibr" rid="ref27">27</xref>).</p>
<p>(2) Transthoracic echocardiography: In the absence of RHC, PH was considered present when transthoracic echocardiography showed a peak tricuspid regurgitant velocity (TRV)&#x202F;&#x003E;&#x202F;2.8&#x202F;m/s; an additional 133 patients met this criterion (excluding the 43 RHC-diagnosed cases to avoid double counting).</p>
<p>Patients who did not meet either criterion (1) or (2) were classified as non-PH. Transthoracic echocardiographic measurements were performed and reported by board-certified sonographers, and RHC was undertaken by interventional cardiologists according to contemporary guidelines. As all investigations were performed as part of routine clinical care in this retrospective cohort, assessors were not formally blinded to other clinical information or to the study hypothesis.</p>
</sec>
</sec>
<sec id="sec13">
<label>2.4</label>
<title>Predictors and data handling</title>
<p>Guided by expert consensus and prior evidence, 39 candidate predictors were prespecified and grouped into six domains: demographics, personal history, medical history, laboratory indices, lung function, and echocardiographic parameters (<xref ref-type="table" rid="tab1">Table 1</xref>). TRV was excluded <italic>a priori</italic> from candidate predictors because it formed part of the echocardiographic outcome definition (TRV&#x202F;&#x003E;&#x202F;2.8&#x202F;m/s), thereby avoiding incorporation bias.</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Characteristic variables.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Category</th>
<th align="left" valign="top">Variables</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Demographics</td>
<td align="left" valign="top">Gender, age, height, weight, body mass index (BMI)</td>
</tr>
<tr>
<td align="left" valign="top">Personal history</td>
<td align="left" valign="top">Smoking index (average number of cigarettes smoked per day &#x00D7; total years of smoking), smoking cessation years</td>
</tr>
<tr>
<td align="left" valign="top">Medical history</td>
<td align="left" valign="top">Diabetes mellitus, hyperlipidemia</td>
</tr>
<tr>
<td align="left" valign="top" rowspan="5">Laboratory</td>
<td align="left" valign="top">Blood gases: oxygen partial pressure (PaO<sub>2</sub>), carbon dioxide partial pressure (PaCO<sub>2</sub>)</td>
</tr>
<tr>
<td align="left" valign="top">Biochemical tests: creatinine (Cr), uric acid (UA), albumin (ALB)</td>
</tr>
<tr>
<td align="left" valign="top">Inflammatory markers: C-reactive protein (CRP), erythrocyte sedimentation rate (ESR)</td>
</tr>
<tr>
<td align="left" valign="top">Routine blood tests: mean platelet volume (MPV), red blood cell distribution width-coefficient of variation (RDW-CV), platelet count (PLT), white blood cell count (WBC), hemoglobin content (HB), neutrophil count (NE), lymphocyte count (LY), eosinophil count (EO), neutrophil/lymphocyte ratio (NLR)</td>
</tr>
<tr>
<td align="left" valign="top">Cardiac function indicators: N-terminal B-type natriuretic peptide precursor (NT-proBNP)</td>
</tr>
<tr>
<td align="left" valign="top">Lung function</td>
<td align="left" valign="top">Forced expiratory volume in one second/forced vital capacity (FEV<sub>1</sub>/FVC), predicted forced expiratory volume in one second (FEV<sub>1</sub>% predicted, FEV<sub>1</sub>pred%), predicted forced vital capacity (FVC% predicted, FVCpred%), diffusion capacity of carbon monoxide (DLCO SB)</td>
</tr>
<tr>
<td align="left" valign="top" rowspan="3">Echocardiography</td>
<td align="left" valign="top">Chamber sizes: right atrial transverse diameter (RA TD), right atrial longitudinal diameter (RA LD), right ventricular diameter (RVD), pulmonary artery diameter (PA), ascending aortic diameter (AAo)</td>
</tr>
<tr>
<td align="left" valign="top">Function: ejection fraction (EF), interventricular septal thickness (IVS)</td>
</tr>
<tr>
<td align="left" valign="top">Hemodynamics: pulmonary valve peak systolic flow velocity (PVmax), and pulmonary artery diameter to ascending aortic diameter ratio (PA/AO)</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>After random shuffling, the 523 patients were split into a training set (<italic>n</italic>&#x202F;=&#x202F;423) and an independent test set (<italic>n</italic>&#x202F;=&#x202F;100) in an 8:2 ratio. Model selection and stability were assessed using stratified five-fold cross-validation within the training set. The optimal model was then refitted on the full training set and evaluated on the test set. Decision thresholds for all performance metrics were fixed during cross-validation and were not adjusted on the test set.</p>
<p>Variables with less than 30% missing values were filled in using the k-nearest neighbors (kNN) method. Missing values for each variable are shown in <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 1</xref> and in <xref ref-type="fig" rid="fig1">Figure 1</xref>. In <xref ref-type="fig" rid="fig1">Figure 1</xref>, each column shows one variable, and white cells show missing values. Variables with 30% or more missing values were not used in model development.</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>Missing data pattern for the 39 candidate predictors in 523 patients.</p>
</caption>
<graphic xlink:href="fmed-13-1752113-g001.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Missingness map of the study dataset showing patients (rows) and candidate predictors (columns). Dark cells indicate observed values and white cells indicate missing values. Missingness is low for most routine clinical and laboratory variables but higher for several pulmonary-function measures. A side summary indicates overall completeness across records.</alt-text>
</graphic>
</fig>
<p>To deal with class imbalance, we used synthetic oversampling of the minority class (SMOTE) on the training data in each cross-validation fold before model fitting. This method lowers bias toward the majority class and keeps the original feature space.</p>
</sec>
<sec id="sec14">
<label>2.5</label>
<title>Feature selection</title>
<p>The initial 39 candidate predictors underwent multicollinearity testing, visualized using a correlation matrix to generate a heatmap (<xref ref-type="fig" rid="fig2">Figure 2</xref>). Features with an absolute pairwise Pearson correlation coefficient (|r|)&#x202F;&#x003E;&#x202F;0.80 were removed to limit redundancy and reduce estimator instability. The |r|&#x202F;=&#x202F;0.80 threshold was chosen as a pragmatic, commonly used cutoff in applied epidemiology and machine-learning studies to flag near-duplicate variables while preserving clinically distinct information, thereby balancing model stability and information retention (<xref ref-type="bibr" rid="ref28">28</xref>).</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>Correlation heatmap for the 39 candidate predictors.</p>
</caption>
<graphic xlink:href="fmed-13-1752113-g002.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Heatmap showing a feature correlation matrix of medical and demographic variables, with values ranging from negative to positive correlations, represented by a blue-to-red color scale and labeled correlation coefficients for each variable pair.</alt-text>
</graphic>
</fig>
<p>Recursive feature elimination (RFE) was then applied to the pruned feature set. All correlation-based screening and RFE procedures were performed within each training fold and applied only to the corresponding held-out fold to avoid information leakage. Based on these procedures, a final set of 18 predictors was selected and subsequently used for model training in the full training set and performance evaluation in the independent test set.</p>
</sec>
<sec id="sec15">
<label>2.6</label>
<title>Model development</title>
<p>Before model fitting, continuous predictors were standardized using z-score transformation, and binary or categorical variables were encoded as dummy (one-hot) indicators for all algorithms except CatBoost, which used its native handling of categorical features. A unified machine-learning pipeline was applied to the retained predictors using eight algorithms: Categorical Boosting (CatBoost), Random Forest (RF), Gradient Boosting Machine (GBM), Adaptive Boosting (AdaBoost), Logistic Regression (LR), Extreme Gradient Boosting (XGBoost), k-Nearest Neighbors (kNN), and Multilayer Perceptron (MLP).</p>
<p>Within this pipeline, data preprocessing and grid-search hyperparameter optimization were carried out prior to final model fitting; algorithm-specific hyperparameters are summarized in <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 2</xref>.</p>
</sec>
<sec id="sec16">
<label>2.7</label>
<title>Model evaluation</title>
<p>Model performance was evaluated using stratified five-fold cross-validation in the training set and subsequently in the independent test set. Discrimination was summarized using the receiver operating characteristic (ROC) curve and area under the curve (AUC). For threshold-based performance, we reported accuracy, sensitivity, specificity, F1 score, positive predictive value (PPV), and negative predictive value (NPV).</p>
<p>For each model, performance across the five validation folds was summarized as the mean and 95% confidence interval (95% CI), providing estimates of central tendency and sampling variability. Each model produced an individual-level predicted probability of COPD-PH.</p>
<p>We used decision curve analysis (DCA) to measure the possible clinical benefits over a range of reasonable intervention thresholds. In addition, we measured the marginal contribution of each predictor variable to the model output by calculating SHAP values (Shapley Additive Explanations). We did this at the cohort level and at the individual patient level. This helped make the model easier to understand in clinical practice.</p>
</sec>
<sec id="sec17">
<label>2.8</label>
<title>Statistical analysis</title>
<p>All statistical analyses were done in R (version 4.4.1) and Python (version 3.12.4). We used R mainly for data management and standard statistical analyses. We used the packages readxl (for data import), dplyr (for data handling), boot (for resampling-based inference), and effsize (for effect size estimation). We used Python to compute SHAP values and plots to explain features in the prediction models.</p>
<p>We compared basic features between the training set and the test set with the Mann&#x2013;Whitney U test. For categorical variables we used Fisher&#x2019;s exact test. We set the significance level at <italic>&#x03B1;</italic>&#x202F;=&#x202F;0.05. By checking similarity in key covariates between the training and test sets, we assessed possible selection bias and supported the model performance estimates.</p>
<p>We then used Cohen&#x2019;s d for paired samples to measure the practical size of differences in predictive performance between models. We estimated its 95% confidence interval with 1,000 non-parametric resampling runs. We interpreted the effect size with common cut points: |d|&#x202F;&#x003C;&#x202F;0.2 (no practical difference), 0.2&#x202F;&#x2264;&#x202F;|d|&#x202F;&#x003C;&#x202F;0.5 (small), 0.5&#x202F;&#x2264;&#x202F;|d|&#x202F;&#x003C;&#x202F;0.8 (moderate), and |d|&#x202F;&#x2265;&#x202F;0.8 (large). A larger absolute value means a stronger practical effect (<xref ref-type="bibr" rid="ref29">29</xref>).</p>
</sec>
</sec>
<sec sec-type="results" id="sec18">
<label>3</label>
<title>Results</title>
<sec id="sec19">
<label>3.1</label>
<title>Patient characteristics</title>
<p>After application of the inclusion and exclusion criteria, 523 hospitalized patients with COPD were included in the analysis. <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 3</xref> presents baseline characteristics for the training and test sets, and <xref ref-type="table" rid="tab2">Table 2</xref> compares patients with COPD-PH and those with COPD alone.</p>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>Baseline characteristics of patients with COPD-PH and COPD alone.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Characteristic</th>
<th align="center" valign="top">COPD</th>
<th align="center" valign="top">COPD-PH</th>
<th align="center" valign="top" rowspan="2"><italic>p</italic> value</th>
</tr>
<tr>
<th align="center" valign="top"><italic>N</italic> =&#x202F;347</th>
<th align="center" valign="top"><italic>N</italic> =&#x202F;176</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Female (<italic>n</italic>)</td>
<td align="center" valign="top">94 (27%)</td>
<td align="center" valign="top">59 (34%)</td>
<td align="center" valign="top">0.2</td>
</tr>
<tr>
<td align="left" valign="top">Male (<italic>n</italic>)</td>
<td align="center" valign="top">253 (73%)</td>
<td align="center" valign="top">117 (66%)</td>
<td align="center" valign="top">0.2</td>
</tr>
<tr>
<td align="left" valign="top">Age (years)</td>
<td align="center" valign="top">68&#x202F;&#x00B1;&#x202F;9</td>
<td align="center" valign="top">70&#x202F;&#x00B1;&#x202F;10</td>
<td align="center" valign="top">0.012</td>
</tr>
<tr>
<td align="left" valign="top">Height (cm)</td>
<td align="center" valign="top">165.5 (160.0, 171.0)</td>
<td align="center" valign="top">164.0 (159.5, 170.0)</td>
<td align="center" valign="top">0.079</td>
</tr>
<tr>
<td align="left" valign="top">Body weight (kg)</td>
<td align="center" valign="top">64.0 (57.1, 72.0)</td>
<td align="center" valign="top">63.0 (55.0, 71.0)</td>
<td align="center" valign="top">0.041</td>
</tr>
<tr>
<td align="left" valign="top">BMI</td>
<td align="center" valign="top">23.5 (21.4, 26.1)</td>
<td align="center" valign="top">23.1 (19.5, 26.3)</td>
<td align="center" valign="top">0.2</td>
</tr>
<tr>
<td align="left" valign="top">Smoking index</td>
<td align="center" valign="top">5 (0, 8)</td>
<td align="center" valign="top">3 (0, 8)</td>
<td align="center" valign="top">0.03</td>
</tr>
<tr>
<td align="left" valign="top">Smoking cessation (years)</td>
<td align="center" valign="top">0 (0, 5)</td>
<td align="center" valign="top">0 (0, 5.3)</td>
<td align="center" valign="top">0.7</td>
</tr>
<tr>
<td align="left" valign="top">History of hyperlipidemia (<italic>n</italic>)</td>
<td align="center" valign="top">46 (13%)</td>
<td align="center" valign="top">13 (7%)</td>
<td align="center" valign="top">0.063</td>
</tr>
<tr>
<td align="left" valign="top">No history of hyperlipidemia (<italic>n</italic>)</td>
<td align="center" valign="top">301 (87%)</td>
<td align="center" valign="top">163 (93%)</td>
<td align="center" valign="top">0.063</td>
</tr>
<tr>
<td align="left" valign="top">History of diabetes mellitus (<italic>n</italic>)</td>
<td align="center" valign="top">69 (20%)</td>
<td align="center" valign="top">33 (19%)</td>
<td align="center" valign="top">0.8</td>
</tr>
<tr>
<td align="left" valign="top">No history of diabetes mellitus (<italic>n</italic>)</td>
<td align="center" valign="top">278 (80%)</td>
<td align="center" valign="top">143 (81%)</td>
<td align="center" valign="top">0.8</td>
</tr>
<tr>
<td align="left" valign="top">PaO<sub>2</sub> (mm Hg)</td>
<td align="center" valign="top">75.3 (65.7, 84.3)</td>
<td align="center" valign="top">66.2 (55.9, 85.5)</td>
<td align="center" valign="top">0.2</td>
</tr>
<tr>
<td align="left" valign="top">PaCO<sub>2</sub> (mm Hg)</td>
<td align="center" valign="top">41.0 (38.0, 44.7)</td>
<td align="center" valign="top">46.9 (38.9, 57.3)</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">Cr (&#x03BC;mol/L)</td>
<td align="center" valign="top">69.0 (60.2, 80.6)</td>
<td align="center" valign="top">70.5 (59.4, 83.5)</td>
<td align="center" valign="top">0.4</td>
</tr>
<tr>
<td align="left" valign="top">UA (&#x03BC;mol/L)</td>
<td align="center" valign="top">307.9 (245.1, 386.4)</td>
<td align="center" valign="top">327.1 (246.2, 420.0)</td>
<td align="center" valign="top">0.003</td>
</tr>
<tr>
<td align="left" valign="top">ALB (g/L)</td>
<td align="center" valign="top">38.0 (35.8, 40.2)</td>
<td align="center" valign="top">37.0 (34.4, 39.3)</td>
<td align="center" valign="top">0.025</td>
</tr>
<tr>
<td align="left" valign="top">CRP (mg/dL)</td>
<td align="center" valign="top">5.0 (0.9, 7.9)</td>
<td align="center" valign="top">1.7 (0.5, 5.0)</td>
<td align="center" valign="top">0.016</td>
</tr>
<tr>
<td align="left" valign="top">NT-proBNP (pg/mL)</td>
<td align="center" valign="top">111.0 (44.3, 263.6)</td>
<td align="center" valign="top">371.0 (95.0, 2205.0)</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">ESR (mm/h)</td>
<td align="center" valign="top">15.0 (5.0, 34.8)</td>
<td align="center" valign="top">10.0 (5.0, 25.0)</td>
<td align="center" valign="top">0.021</td>
</tr>
<tr>
<td align="left" valign="top">MPV (fL)</td>
<td align="center" valign="top">9.9 (9.2, 10.6)</td>
<td align="center" valign="top">10.4 (9.6, 11.1)</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">RDW-CV (%)</td>
<td align="center" valign="top">13.3 (12.8, 13.9)</td>
<td align="center" valign="top">13.8 (12.9, 14.6)</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">PLT (<inline-formula>
<mml:math id="M1">
<mml:mo>&#x00D7;</mml:mo>
</mml:math>
</inline-formula>10<sup>9</sup>/L)</td>
<td align="center" valign="top">213.0 (173.5, 260.5)</td>
<td align="center" valign="top">190.0 (154.0, 233.0)</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">WBC (<inline-formula>
<mml:math id="M2">
<mml:mo>&#x00D7;</mml:mo>
</mml:math>
</inline-formula>10<sup>9</sup>/L)</td>
<td align="center" valign="top">7.6 (5.7, 10.5)</td>
<td align="center" valign="top">6.9 (5.3, 10.5)</td>
<td align="center" valign="top">0.089</td>
</tr>
<tr>
<td align="left" valign="top">HB (g/L)</td>
<td align="center" valign="top">130.0 (102.0, 142.0)</td>
<td align="center" valign="top">129.5 (95.5, 146.0)</td>
<td align="center" valign="top">0.072</td>
</tr>
<tr>
<td align="left" valign="top">NE (<inline-formula>
<mml:math id="M3">
<mml:mo>&#x00D7;</mml:mo>
</mml:math>
</inline-formula>10<sup>9</sup>/L)</td>
<td align="center" valign="top">3.9 (3.0, 5.4)</td>
<td align="center" valign="top">4.2 (3.1, 5.6)</td>
<td align="center" valign="top">&#x003E;0.9</td>
</tr>
<tr>
<td align="left" valign="top">LY (<inline-formula>
<mml:math id="M4">
<mml:mo>&#x00D7;</mml:mo>
</mml:math>
</inline-formula>10<sup>9</sup>/L)</td>
<td align="center" valign="top">1.6 (1.3, 2.1)</td>
<td align="center" valign="top">1.3 (0.9, 1.9)</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">EO (<inline-formula>
<mml:math id="M5">
<mml:mo>&#x00D7;</mml:mo>
</mml:math>
</inline-formula>10<sup>9</sup>/L)</td>
<td align="center" valign="top">0.2 (0.1, 0.2)</td>
<td align="center" valign="top">0.1 (0.1, 0.2)</td>
<td align="center" valign="top">0.006</td>
</tr>
<tr>
<td align="left" valign="top">NE/LY</td>
<td align="center" valign="top">2.4 (1.7, 3.6)</td>
<td align="center" valign="top">3.0 (2.1, 5.0)</td>
<td align="center" valign="top">0.008</td>
</tr>
<tr>
<td align="left" valign="top">FEV<sub>1</sub>pred (%)</td>
<td align="center" valign="top">59.0 (39.4, 76.0)</td>
<td align="center" valign="top">47.5 (30.3, 68.9)</td>
<td align="center" valign="top">0.001</td>
</tr>
<tr>
<td align="left" valign="top">FVCpred (%)</td>
<td align="center" valign="top">90.1&#x202F;&#x00B1;&#x202F;20.1</td>
<td align="center" valign="top">76.3&#x202F;&#x00B1;&#x202F;23.0</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">FEV<sub>1</sub>/FVC</td>
<td align="center" valign="top">51.0 (37.0, 61.0)</td>
<td align="center" valign="top">51.2 (38.0, 61.0)</td>
<td align="center" valign="top">&#x003E;0.9</td>
</tr>
<tr>
<td align="left" valign="top">DLCO SB (mmol/min/kPa)</td>
<td align="center" valign="top">71.6&#x202F;&#x00B1;&#x202F;21.6</td>
<td align="center" valign="top">50.9&#x202F;&#x00B1;&#x202F;22.4</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">RA TD (mm)</td>
<td align="center" valign="top">32.0 (29.0, 35.0)</td>
<td align="center" valign="top">37.5 (32.0, 44.0)</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">RA LD (mm)</td>
<td align="center" valign="top">43.0 (41.0, 47.0)</td>
<td align="center" valign="top">49.5 (44.3, 54.0)</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">RVD (mm)</td>
<td align="center" valign="top">30.0 (28.0, 33.0)</td>
<td align="center" valign="top">36.0 (31.0, 42.0)</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">PA (mm)</td>
<td align="center" valign="top">23.0 (21.0, 25.0)</td>
<td align="center" valign="top">27.5 (23.0, 30.9)</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">EF (%)</td>
<td align="center" valign="top">68.0 (65.0, 71.0)</td>
<td align="center" valign="top">67.5 (63.0, 70.0)</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">IVS (mm)</td>
<td align="center" valign="top">10.0 (9.2, 10.6)</td>
<td align="center" valign="top">10.0 (9.0, 10.7)</td>
<td align="center" valign="top">&#x003E;0.9</td>
</tr>
<tr>
<td align="left" valign="top">AAo (mm)</td>
<td align="center" valign="top">31.0 (29.0, 34.0)</td>
<td align="center" valign="top">33.0 (30.0, 36.0)</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">PVmax (cm/s)</td>
<td align="center" valign="top">88.5 (79.8, 100.3)</td>
<td align="center" valign="top">91.5 (76.0, 105.3)</td>
<td align="center" valign="top">0.6</td>
</tr>
<tr>
<td align="left" valign="top">PA/AO</td>
<td align="center" valign="top">0.7 (0.7, 0.8)</td>
<td align="center" valign="top">0.8 (0.7, 0.9)</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">TRV (m/s)</td>
<td align="center" valign="top">2.20 (2.01, 2.50)</td>
<td align="center" valign="top">3.75 (3.28, 4.28)</td>
<td align="center" valign="top">&#x003C;0.001</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Correlation analysis between the 39 candidate predictors and COPD-PH status (<xref ref-type="fig" rid="fig3">Figure 3</xref>) highlighted right-heart dimensional indices and central pulmonary arterial measures as the most informative features&#x2014;specifically right ventricular end-diastolic diameter (RVD), right atrial transverse diameter (RA TD), pulmonary artery diameter (PA), right atrial longitudinal diameter (RA LD), and the pulmonary artery-to-aorta diameter ratio (PA/AO).</p>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption>
<p>Correlations between the 39 candidate predictors and COPD-PH status.</p>
</caption>
<graphic xlink:href="fmed-13-1752113-g003.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Horizontal bar chart showing correlation coefficients between various clinical and laboratory parameters and the diagnosis of pulmonary arterial hypertension. Bars extend in both positive and negative directions, with variables like RA TD, RA LD, RVD, and PA having higher positive correlations. Chart highlights statistically significant associations for clinical evaluation.</alt-text>
</graphic>
</fig>
<p>Across 40 baseline variables, 36 (90.0%) showed no statistically significant difference between the training and test sets (<italic>p</italic>&#x202F;&#x2265;&#x202F;0.05; <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 3</xref>). The four variables with statistically significant differences were smoking index, NT-proBNP, RA TD, and RVD. For RA TD and RVD, the median differences were both 1&#x202F;mm (approximately 3% relative difference), with interquartile range (IQR) overlap coefficients of 0.67 and 0.75, respectively, indicating highly overlapping distributions and minimal differences in magnitude. NT-proBNP showed an approximately 12% relative difference in medians and smoking index about 40%; nonetheless, IQR overlap remained substantial (0.52 and 0.80, respectively).</p>
<p>Of the 523 included patients, 176 (33.6%) had COPD-PH and 347 (66.4%) had COPD alone. After random allocation, the training set comprised 423 patients (143 COPD-PH, 280 COPD alone), and the independent test set comprised 100 patients (33 COPD-PH, 67 COPD alone). Overall, the distributions of key predictors and outcomes were similar between the training and test sets, with only small differences in a few variables (e.g., smoking index, NT-proBNP, RA TD, and RVD), supporting the comparability of the development and evaluation datasets.</p>
<p>Among the 176 COPD-PH patients, 133 (75.6%) were diagnosed by echocardiography and 43 (24.4%) were confirmed by RHC. Baseline characteristics were partly different between the RHC-diagnosed and echo-diagnosed subgroups (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 4</xref>). In general, the RHC-diagnosed subgroup showed features consistent with greater disease severity and right-heart/pulmonary-artery remodeling (e.g., lower PaO&#x2082; and DLCO and higher NT-proBNP and uric acid, together with larger right-heart or pulmonary-artery dimensions), which likely reflects catheterization referral patterns rather than contradictory diagnostic definitions.</p>
<p>The flow of participants through the study, including numbers screened, excluded, and finally included with and without COPD-PH, is shown in <xref ref-type="fig" rid="fig4">Figure 4</xref>.</p>
<fig position="float" id="fig4">
<label>Figure 4</label>
<caption>
<p>Flow diagram of patient screening, exclusions, and inclusion in the analysis dataset.</p>
</caption>
<graphic xlink:href="fmed-13-1752113-g004.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Flowchart showing patient selection for a COPD study: potential inpatients are assessed for inclusion and exclusion criteria, with 523 included. They are divided into COPD-PH (176) and COPD alone (347), then randomly assigned to a training set (423) and an independent test set (100) for model development and evaluation.</alt-text>
</graphic>
</fig>
</sec>
<sec id="sec20">
<label>3.2</label>
<title>Model development and performance comparison</title>
<p>Eight candidate models were compared using seven core performance metrics. Overall cross-validated performance in the training set is summarized in <xref ref-type="table" rid="tab3">Table 3</xref>, and fold-specific results are provided in <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 5</xref>. The ROC curves from five-fold cross-validation in the training set are shown in <xref ref-type="fig" rid="fig5">Figure 5</xref>, illustrating the consistency of discriminative performance across validation folds.</p>
<table-wrap position="float" id="tab3">
<label>Table 3</label>
<caption>
<p>Overall performance of the candidate algorithms in the training folds.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Model</th>
<th align="center" valign="top">AUC</th>
<th align="center" valign="top">Accuracy</th>
<th align="center" valign="top">Sensitivity (Recall)</th>
<th align="center" valign="top">Specificity</th>
<th align="center" valign="top">PPV (Precision)</th>
<th align="center" valign="top">NPV</th>
<th align="center" valign="top">F1 Score</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">MLP</td>
<td align="center" valign="top">0.745 (95% CI: 0.681, 0.797)</td>
<td align="center" valign="top">0.662 (95% CI: 0.634, 0.709)</td>
<td align="center" valign="top">0.739 (95% CI: 0.514, 0.907)</td>
<td align="center" valign="top">0.586 (95% CI: 0.379, 0.814)</td>
<td align="center" valign="top">0.689 (95% CI: 0.593, 0.793)</td>
<td align="center" valign="top">0.742 (95% CI: 0.653, 0.815)</td>
<td align="center" valign="top">0.669 (95% CI: 0.567, 0.724)</td>
</tr>
<tr>
<td align="left" valign="top">kNN</td>
<td align="center" valign="top">0.794 (95% CI: 0.778, 0.809)</td>
<td align="center" valign="top">0.734 (95% CI: 0.714, 0.755)</td>
<td align="center" valign="top">0.782 (95% CI: 0.757, 0.807)</td>
<td align="center" valign="top">0.686 (95% CI: 0.625, 0.743)</td>
<td align="center" valign="top">0.716 (95% CI: 0.684, 0.748)</td>
<td align="center" valign="top">0.759 (95% CI: 0.747, 0.772)</td>
<td align="center" valign="top">0.747 (95% CI: 0.737, 0.763)</td>
</tr>
<tr>
<td align="left" valign="top">Logistic Regression (LR)</td>
<td align="center" valign="top">0.819 (95% CI: 0.788, 0.853)</td>
<td align="center" valign="top">0.757 (95% CI: 0.707, 0.805)</td>
<td align="center" valign="top">0.739 (95% CI: 0.671, 0.814)</td>
<td align="center" valign="top">0.775 (95% CI: 0.718, 0.821)</td>
<td align="center" valign="top">0.767 (95% CI: 0.716, 0.810)</td>
<td align="center" valign="top">0.752 (95% CI: 0.694, 0.810)</td>
<td align="center" valign="top">0.751 (95% CI: 0.698, 0.805)</td>
</tr>
<tr>
<td align="left" valign="top">AdaBoost</td>
<td align="center" valign="top">0.865 (95% CI: 0.833, 0.891)</td>
<td align="center" valign="top">0.805 (95% CI: 0.786, 0.830)</td>
<td align="center" valign="top">0.821 (95% CI: 0.793, 0.850)</td>
<td align="center" valign="top">0.789 (95% CI: 0.743, 0.846)</td>
<td align="center" valign="top">0.799 (95% CI: 0.764, 0.840)</td>
<td align="center" valign="top">0.816 (95% CI: 0.795, 0.837)</td>
<td align="center" valign="top">0.809 (95% CI: 0.793, 0.832)</td>
</tr>
<tr>
<td align="left" valign="top">GBM</td>
<td align="center" valign="top">0.885 (95% CI: 0.868, 0.901)</td>
<td align="center" valign="top">0.814 (95% CI: 0.798, 0.829)</td>
<td align="center" valign="top">0.839 (95% CI: 0.807, 0.871)</td>
<td align="center" valign="top">0.789 (95% CI: 0.754, 0.818)</td>
<td align="center" valign="top">0.800 (95% CI: 0.779, 0.821)</td>
<td align="center" valign="top">0.833 (95% CI: 0.806, 0.859)</td>
<td align="center" valign="top">0.819 (95% CI: 0.804, 0.834)</td>
</tr>
<tr>
<td align="left" valign="top">Random Forest (RF)</td>
<td align="center" valign="top">0.909 (95% CI: 0.887, 0.928)</td>
<td align="center" valign="top">0.836 (95% CI: 0.818, 0.854)</td>
<td align="center" valign="top">0.836 (95% CI: 0.800, 0.871)</td>
<td align="center" valign="top">0.836 (95% CI: 0.811, 0.861)</td>
<td align="center" valign="top">0.837 (95% CI: 0.814, 0.855)</td>
<td align="center" valign="top">0.837 (95% CI: 0.809, 0.868)</td>
<td align="center" valign="top">0.835 (95% CI: 0.815, 0.855)</td>
</tr>
<tr>
<td align="left" valign="top">XGBoost</td>
<td align="center" valign="top">0.931 (95% CI: 0.904, 0.952)</td>
<td align="center" valign="top">0.868 (95% CI: 0.838, 0.902)</td>
<td align="center" valign="top">0.900 (95% CI: 0.875, 0.925)</td>
<td align="center" valign="top">0.836 (95% CI: 0.789, 0.879)</td>
<td align="center" valign="top">0.847 (95% CI: 0.811, 0.883)</td>
<td align="center" valign="top">0.893 (95% CI: 0.867, 0.921)</td>
<td align="center" valign="top">0.872 (95% CI: 0.844, 0.904)</td>
</tr>
<tr>
<td align="left" valign="top">CatBoost</td>
<td align="center" valign="top">0.934 (95% CI: 0.908, 0.955)</td>
<td align="center" valign="top">0.870 (95% CI: 0.845, 0.893)</td>
<td align="center" valign="top">0.893 (95% CI: 0.875, 0.907)</td>
<td align="center" valign="top">0.846 (95% CI: 0.796, 0.893)</td>
<td align="center" valign="top">0.856 (95% CI: 0.816, 0.894)</td>
<td align="center" valign="top">0.888 (95% CI: 0.873, 0.903)</td>
<td align="center" valign="top">0.873 (95% CI: 0.851, 0.894)</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig position="float" id="fig5">
<label>Figure 5</label>
<caption>
<p>ROC curves from five-fold cross-validation in the training set.</p>
</caption>
<graphic xlink:href="fmed-13-1752113-g005.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Line chart showing ROC curves for eight machine learning models on a training set using five-fold cross-validation, with true positive rate versus false positive rate. XGBoost and CatBoost have the highest mean AUC of zero point nine three, while MLP has the lowest at zero point seven four. A diagonal dashed line represents random performance. A legend identifies each model and its mean AUC.</alt-text>
</graphic>
</fig>
<p>CatBoost demonstrated the best overall internal performance among the eight algorithms. Its fold-specific ROC curves are shown in <xref ref-type="fig" rid="fig6">Figure 6</xref> and indicate stable discrimination across validation folds. In cross-validation, CatBoost achieved an average AUC of 0.9337, accuracy of 0.8696, and F1 score of 0.8732.</p>
<fig position="float" id="fig6">
<label>Figure 6</label>
<caption>
<p>Fold-specific ROC curves for the CatBoost model.</p>
</caption>
<graphic xlink:href="fmed-13-1752113-g006.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Line graph showing five ROC curves for a CatBoost model across five folds, comparing true positive rate versus false positive rate. Fold Area Under Curve values range from zero point eight eight to zero point nine six.</alt-text>
</graphic>
</fig>
<p>Across 196 pairwise model comparisons, effect size analysis using Cohen&#x2019;s d showed large effects (|d|&#x202F;&#x2265;&#x202F;0.8) versus traditional baselines (kNN, MLP), corresponding to 15&#x2013;31% relative improvements in AUC and accuracy. In contrast, effect sizes versus XGBoost were mostly small or negligible (|d|&#x202F;&#x003C;&#x202F;0.5), suggesting broadly comparable predictive performance, although CatBoost was more efficient and easier to tune in practice. Among 49 valid pairwise comparisons favoring CatBoost, 81.6% showed medium-to-large and 75.5% large effects, supporting its selection as the primary model for subsequent test-set evaluation.</p>
<p>On the independent test set, using the decision threshold fixed during cross-validation, CatBoost maintained superior out-of-sample performance (<xref ref-type="fig" rid="fig7">Figure 7</xref>). Test-set results for all eight models are summarized in <xref ref-type="table" rid="tab4">Table 4</xref>. CatBoost achieved an AUC of 0.848, accuracy of 0.830, F1 score of 0.746, sensitivity of 0.758, and specificity of 0.866, with balanced PPV and NPV. Random Forest ranked second (AUC 0.815, accuracy 0.820, F1 0.719), while logistic regression (AUC 0.806, F1 0.700) and GBM (AUC 0.804, F1 0.684) showed moderate performance; XGBoost was comparatively weaker (AUC 0.756, F1 0.683).</p>
<fig position="float" id="fig7">
<label>Figure 7</label>
<caption>
<p>Receiver operating characteristic (ROC) curves for all candidate models on the test set.</p>
</caption>
<graphic xlink:href="fmed-13-1752113-g007.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">ROC curve comparison line chart displaying eight machine learning models with their respective AUC values: CatBoost 0.85, Random Forest 0.82, Logistic Regression 0.81, GBM 0.80, XGBoost 0.76, KNN 0.74, MLP 0.73, and AdaBoost 0.71, measuring true positive rate versus false positive rate.</alt-text>
</graphic>
</fig>
<table-wrap position="float" id="tab4">
<label>Table 4</label>
<caption>
<p>Test-set performance of the eight machine-learning models for predicting PH in COPD.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Model</th>
<th align="center" valign="top">AUC</th>
<th align="center" valign="top">Accuracy</th>
<th align="center" valign="top">Sensitivity (Recall)</th>
<th align="center" valign="top">Specificity</th>
<th align="center" valign="top">PPV (Precision)</th>
<th align="center" valign="top">NPV</th>
<th align="center" valign="top">F1 Score</th>
<th align="center" valign="top">Best Threshold</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">GBM</td>
<td align="center" valign="middle">0.804 (95% CI: 0.707, 0.898)</td>
<td align="center" valign="middle">0.760 (95% CI: 0.680, 0.840)</td>
<td align="center" valign="middle">0.788 (95% CI: 0.647, 0.917)</td>
<td align="center" valign="middle">0.746 (95% CI: 0.643, 0.841)</td>
<td align="center" valign="middle">0.605 (95% CI: 0.458, 0.750)</td>
<td align="center" valign="middle">0.877 (95% CI: 0.784, 0.963)</td>
<td align="center" valign="middle">0.684 (95% CI: 0.552, 0.791)</td>
<td align="center" valign="middle">0.178</td>
</tr>
<tr>
<td align="left" valign="middle">AdaBoost</td>
<td align="center" valign="middle">0.709 (95% CI: 0.587, 0.837)</td>
<td align="center" valign="middle">0.750 (95% CI: 0.660, 0.830)</td>
<td align="center" valign="middle">0.545 (95% CI: 0.379, 0.725)</td>
<td align="center" valign="middle">0.851 (95% CI: 0.767, 0.930)</td>
<td align="center" valign="middle">0.643 (95% CI: 0.458, 0.815)</td>
<td align="center" valign="middle">0.792 (95% CI: 0.692, 0.881)</td>
<td align="center" valign="middle">0.590 (95% CI: 0.428, 0.724)</td>
<td align="center" valign="middle">0.498</td>
</tr>
<tr>
<td align="left" valign="middle">Random Forest (RF)</td>
<td align="center" valign="middle">0.815 (95% CI: 0.718, 0.909)</td>
<td align="center" valign="middle">0.820 (95% CI: 0.750, 0.890)</td>
<td align="center" valign="middle">0.697 (95% CI: 0.531, 0.861)</td>
<td align="center" valign="middle">0.881 (95% CI: 0.800, 0.952)</td>
<td align="center" valign="middle">0.742 (95% CI: 0.581, 0.893)</td>
<td align="center" valign="middle">0.855 (95% CI: 0.769, 0.936)</td>
<td align="center" valign="middle">0.719 (95% CI: 0.576, 0.829)</td>
<td align="center" valign="middle">0.4</td>
</tr>
<tr>
<td align="left" valign="middle">Logistic Regression (LR)</td>
<td align="center" valign="middle">0.806 (95% CI: 0.697, 0.899)</td>
<td align="center" valign="middle">0.820 (95% CI: 0.750, 0.890)</td>
<td align="center" valign="middle">0.636 (95% CI: 0.481, 0.800)</td>
<td align="center" valign="middle">0.910 (95% CI: 0.838, 0.971)</td>
<td align="center" valign="middle">0.778 (95% CI: 0.609, 0.926)</td>
<td align="center" valign="middle">0.836 (95% CI: 0.750, 0.917)</td>
<td align="center" valign="middle">0.700 (95% CI: 0.571, 0.829)</td>
<td align="center" valign="middle">0.5</td>
</tr>
<tr>
<td align="left" valign="middle">XGBoost</td>
<td align="center" valign="middle">0.756 (95% CI: 0.648, 0.855)</td>
<td align="center" valign="middle">0.740 (95% CI: 0.650, 0.830)</td>
<td align="center" valign="middle">0.848 (95% CI: 0.727, 0.969)</td>
<td align="center" valign="middle">0.687 (95% CI: 0.576, 0.794)</td>
<td align="center" valign="middle">0.571 (95% CI: 0.431, 0.711)</td>
<td align="center" valign="middle">0.902 (95% CI: 0.820, 0.979)</td>
<td align="center" valign="middle">0.683 (95% CI: 0.560, 0.783)</td>
<td align="center" valign="middle">0.328</td>
</tr>
<tr>
<td align="left" valign="middle">kNN</td>
<td align="center" valign="middle">0.742 (95% CI: 0.642, 0.842)</td>
<td align="center" valign="middle">0.670 (95% CI: 0.570, 0.770)</td>
<td align="center" valign="middle">0.697 (95% CI: 0.513, 0.850)</td>
<td align="center" valign="middle">0.657 (95% CI: 0.533, 0.770)</td>
<td align="center" valign="middle">0.500 (95% CI: 0.341, 0.640)</td>
<td align="center" valign="middle">0.815 (95% CI: 0.700, 0.912)</td>
<td align="center" valign="middle">0.582 (95% CI: 0.436, 0.703)</td>
<td align="center" valign="middle">0.4</td>
</tr>
<tr>
<td align="left" valign="middle">MLP</td>
<td align="center" valign="middle">0.731 (95% CI: 0.625, 0.838)</td>
<td align="center" valign="middle">0.700 (95% CI: 0.610, 0.790)</td>
<td align="center" valign="middle">0.758 (95% CI: 0.600, 0.897)</td>
<td align="center" valign="middle">0.672 (95% CI: 0.554, 0.784)</td>
<td align="center" valign="middle">0.532 (95% CI: 0.388, 0.667)</td>
<td align="center" valign="middle">0.849 (95% CI: 0.744, 0.936)</td>
<td align="center" valign="middle">0.625 (95% CI: 0.493, 0.732)</td>
<td align="center" valign="middle">0.026</td>
</tr>
<tr>
<td align="left" valign="middle">CatBoost</td>
<td align="center" valign="middle">0.848 (95% CI: 0.766, 0.926)</td>
<td align="center" valign="middle">0.830 (95% CI: 0.750, 0.910)</td>
<td align="center" valign="middle">0.758 (95% CI: 0.586, 0.897)</td>
<td align="center" valign="middle">0.866 (95% CI: 0.784, 0.943)</td>
<td align="center" valign="middle">0.735 (95% CI: 0.577, 0.879)</td>
<td align="center" valign="middle">0.879 (95% CI: 0.797, 0.954)</td>
<td align="center" valign="middle">0.746 (95% CI: 0.620, 0.853)</td>
<td align="center" valign="middle">0.235</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Decision curve analysis in the test set showed that, across an approximate threshold probability range of 0.10&#x2013;0.60, the net benefit of the CatBoost model consistently exceeded that of the other top-performing models (<xref ref-type="fig" rid="fig8">Figure 8</xref>), indicating greater potential clinical decision-making value. Taken together, these findings support CatBoost as the preferred model, with the best overall balance of discrimination and clinical utility for subsequent application.</p>
<fig position="float" id="fig8">
<label>Figure 8</label>
<caption>
<p>Decision curve analysis of the three top-performing models on the test set.</p>
</caption>
<graphic xlink:href="fmed-13-1752113-g008.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Three side-by-side line charts display decision curve analysis with net benefit on the y-axis and threshold probability on the x-axis. Each chart compares a machine learning model&#x2014;CatBoost, RandomForest, and LogisticRegression&#x2014;against "Treat All" and "Treat None" strategies.</alt-text>
</graphic>
</fig>
</sec>
<sec id="sec21">
<label>3.3</label>
<title>Model explanation</title>
<p>We used SHAP to interrogate the CatBoost model and quantify feature-level contributions at both cohort and individual levels. In the global importance plot (<xref ref-type="fig" rid="fig9">Figure 9</xref>), right ventricular diameter (RVD) and pulmonary artery diameter were the dominant structural predictors. Arterial blood gases (PaCO&#x2082;, PaO&#x2082;) reflected gas-exchange impairment, while right atrial transverse and longitudinal diameters (RA TD, RA LD) captured right-sided cardiac remodeling. NT-proBNP and the neutrophil-to-lymphocyte ratio (NLR) indexed cardiac strain and systemic inflammation. Spirometric indices (FEV&#x2081;/FVC, FEV&#x2081;% predicted), hematologic markers (white blood cell and lymphocyte counts, hemoglobin, RDW-CV), and additional clinical or imaging variables (creatinine, BMI, PA/AO) contributed additional, though more modest, discriminatory information.</p>
<fig position="float" id="fig9">
<label>Figure 9</label>
<caption>
<p>Features ranked by mean absolute SHAP values.</p>
</caption>
<graphic xlink:href="fmed-13-1752113-g009.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">SHAP beeswarm summary plot for the final CatBoost model. Eighteen predictors are ranked on the y-axis and SHAP value (impact on predicted COPD-PH risk) is on the x-axis. Each dot represents one patient; dots to the right increase and to the left decrease predicted risk. Color encodes feature value from low (blue) to high (red).</alt-text>
</graphic>
</fig>
<p><xref ref-type="fig" rid="fig10">Figure 10</xref> illustrates case-level feature attributions for representative patients across GOLD 1&#x2013;4 airflow limitation categories. In the GOLD 1 exemplar, enlarged right atrial diameters (RA TD 37&#x202F;mm, RA LD 47&#x202F;mm) and a white blood cell count of 5.7&#x202F;&#x00D7;&#x202F;10<sup>9</sup>/L have positive SHAP contributions, shifting the prediction toward higher PH risk, whereas a supranormal FVC % predicted (133%) and a relatively small main pulmonary artery diameter (24&#x202F;mm) have negative SHAP contributions that attenuate the overall risk estimate.</p>
<fig position="float" id="fig10">
<label>Figure 10</label>
<caption>
<p>Case-level SHAP explanations for GOLD 1&#x2013;4 exemplars. <bold>(A)</bold> GOLD 1 exemplar. <bold>(B)</bold> GOLD 2 exemplar. <bold>(C)</bold> GOLD 3 exemplar. <bold>(D)</bold> GOLD 4 exemplar. In each panel, features with bars extending to the right increase, and those extending to the left decrease, the predicted PH risk.</p>
</caption>
<graphic xlink:href="fmed-13-1752113-g010.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Grouped waterfall charts labeled A, B, C, and D display contributions of clinical variables to predicted model outputs. Blue sections indicate factors lowering the output, while red sections indicate factors increasing it. Each variable&#x2019;s value and direction of effect are shown along the bars, with f(x) and base values marked on the x-axes.</alt-text>
</graphic>
</fig>
<p><xref ref-type="fig" rid="fig11">Figure 11</xref> (SHAP dependence plots) highlights clinically interpretable regions in which the model&#x2019;s predicted PH risk increases, such as pulmonary artery diameter &#x003E; 25&#x202F;mm, PaCO<sub>2</sub>&#x202F;&#x003E;&#x202F;50&#x202F;mm Hg, and RVD&#x202F;&#x003E;&#x202F;30&#x202F;mm. These values are not intended as prespecified clinical cut-offs but rather as data-driven inflection points that may inform triage decisions and generate hypotheses for future studies.</p>
<fig position="float" id="fig11">
<label>Figure 11</label>
<caption>
<p>SHAP dependence plots for key predictors. <bold>(A)</bold> Dependence of SHAP values on pulmonary artery diameter. <bold>(B)</bold> Dependence of SHAP values on PaCO<sub>2</sub>. <bold>(C)</bold> Dependence of SHAP values on right ventricular diameter. Each point represents one patient, with the <italic>x</italic>-axis showing the raw feature value and the <italic>y</italic>-axis the corresponding SHAP value; higher SHAP values indicate greater contribution to the predicted PH risk.</p>
</caption>
<graphic xlink:href="fmed-13-1752113-g011.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Three scatter plots labeled A, B, and C display SHAP values against clinical variables. Plot A shows SHAP value versus PA (millimeters), Plot B shows SHAP value versus PaCO2 (millimeters of mercury), and Plot C shows SHAP value versus RVD (millimeters), each suggesting a positive nonlinear relationship.</alt-text>
</graphic>
</fig>
<p>Interpretability analyses using subgroup SHAP summaries suggested broadly consistent global feature-importance patterns between the RHC-diagnosed and echo-diagnosed subgroups (<xref ref-type="fig" rid="fig12">Figure 12</xref>). Because the RHC-diagnosed subgroup was small, subgroup-specific discrimination metrics were considered exploratory and were not emphasized; we therefore focused on the consistency of model explanations using subgroup SHAP summaries.</p>
<fig position="float" id="fig12">
<label>Figure 12</label>
<caption>
<p>SHAP summary plot. <bold>(A)</bold> SHAP summary plot for the RHC-diagnosed PH subgroup. <bold>(B)</bold> SHAP summary plot for the echo-diagnosed PH subgroup.</p>
</caption>
<graphic xlink:href="fmed-13-1752113-g012.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Two side-by-side SHAP beeswarm summary plots comparing interpretability by PH ascertainment method: (A) RHC-diagnosed PH and (B) echo-diagnosed PH. Predictors are ranked on the y-axis and SHAP value is shown on the x-axis. Each dot represents a patient, colored from low (blue) to high (red) feature values.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec sec-type="discussion" id="sec22">
<label>4</label>
<title>Discussion</title>
<sec id="sec23">
<label>4.1</label>
<title>Model performance</title>
<p>We developed and internally validated a machine-learning model to estimate PH risk in patients with COPD. Among the eight candidate algorithms, CatBoost provided the strongest overall discrimination and was therefore selected as the final model for downstream evaluation and interpretation.</p>
<p>In clinical risk stratification, CatBoost offers multiple practical advantages: it supports the handling of categorical variables within electronic health records, effectively mitigates overfitting through its symmetric tree structure and ordered boosting mechanism (<xref ref-type="bibr" rid="ref30">30</xref>), and integrates multi-modal data sources without requiring complex feature engineering (<xref ref-type="bibr" rid="ref31">31</xref>). Furthermore, its built-in missing value handling strategy enhances model robustness in real-world datasets containing incomplete observations, establishing CatBoost as a pragmatic and reliable choice for clinical decision support applications (<xref ref-type="bibr" rid="ref32">32</xref>).</p>
</sec>
<sec id="sec24">
<label>4.2</label>
<title>Key predictive factors</title>
<p>Based on SHAP feature ranking of the CatBoost model, the five most important predictors were right ventricular diameter, pulmonary artery diameter, arterial partial pressure of carbon dioxide, right atrial transverse diameter, and age. Together, these variables give information about demographic characteristics, heart structure, and breathing function in patients with chronic obstructive pulmonary disease and pulmonary hypertension (COPD-PH).</p>
<sec id="sec25">
<label>4.2.1</label>
<title>Right ventricular diameter</title>
<p>Injury to the pulmonary blood vessels, such as hypoxic vasoconstriction, loss of vascular bed, and endothelial dysfunction, raises PVR, PAP, and right ventricular afterload. Over time, the right ventricle shifts from an adaptive to a maladaptive state, with increased right ventricular diameter reflecting this transition (<xref ref-type="bibr" rid="ref33">33</xref>, <xref ref-type="bibr" rid="ref34">34</xref>).</p>
</sec>
<sec id="sec26">
<label>4.2.2</label>
<title>Pulmonary artery diameter</title>
<p>Hypoxemia causes the pulmonary artery to constrict by triggering cytokine release and production of reactive oxygen species (ROS). These changes increase pulmonary vascular resistance and then lead to enlargement of the pulmonary artery diameter (<xref ref-type="bibr" rid="ref35">35</xref>).</p>
</sec>
<sec id="sec27">
<label>4.2.3</label>
<title>Carbon dioxide partial pressure</title>
<p>Hypercapnia speeds up the progression of PH. It causes pulmonary vasoconstriction and promotes injury and remodeling of the pulmonary vascular endothelium, which increases pulmonary vascular resistance (<xref ref-type="bibr" rid="ref36">36</xref>).</p>
</sec>
<sec id="sec28">
<label>4.2.4</label>
<title>Right atrial transverse diameter</title>
<p>When pulmonary vascular resistance rises, right ventricular afterload also rises. The right atrium then dilates as a compensatory response. As the disease gets worse, the right atrium becomes larger, right ventricular diastolic function declines, and atrial remodeling increases (<xref ref-type="bibr" rid="ref37">37</xref>).</p>
</sec>
<sec id="sec29">
<label>4.2.5</label>
<title>Age</title>
<p>In older patients, changes in lung tissue reduce the elasticity of the pulmonary vessels. At the same time, long-term systemic inflammation in this group further harms pulmonary vascular endothelial function and raises pulmonary artery pressure. These effects speed up both the start and the worsening of COPD-PH (<xref ref-type="bibr" rid="ref38">38</xref>, <xref ref-type="bibr" rid="ref39">39</xref>).</p>
</sec>
</sec>
<sec id="sec30">
<label>4.3</label>
<title>Strengths of the study</title>
<sec id="sec31">
<label>4.3.1</label>
<title>Data scope</title>
<p>This study used standard diagnostic criteria and one consistent data collection process. We enrolled 523 patients. This sample gives enough power to assess COPD-PH risk inside the cohort and to find clinically important correlations. The candidate predictors cover main areas: demographic characteristics, clinical history, laboratory indicators, pulmonary function, and echocardiography. Together, these variables give a broad description of the COPD-PH phenotype.</p>
</sec>
<sec id="sec32">
<label>4.3.2</label>
<title>Data analysis and interpretability</title>
<p>We compared several machine learning models in a systematic way, and we chose the CatBoost model because it performed best on this mixed clinical data. The explanations from SHAP are in line with known pathophysiological findings in COPD-PH.</p>
</sec>
<sec id="sec33">
<label>4.3.3</label>
<title>Practical application potential</title>
<p>The final model uses variables that are routinely collected in clinical practice and gives explanations at the level of each patient. This makes it suitable for real-world use. It can work as a simple risk calculator or be built into decision support tools in electronic health record systems.</p>
</sec>
</sec>
<sec id="sec34">
<label>4.4</label>
<title>Limitations</title>
<p>This study has several limitations. First, the single-center, retrospective design may introduce selection bias, information bias, and unmeasured confounding. Second, we did not perform external validation; therefore, transportability to other case-mix profiles, imaging protocols, and laboratory platforms remains uncertain and should be tested prospectively. Third, PH was primarily ascertained by echocardiography; although appropriate for screening, it may misclassify cases compared with RHC, the hemodynamic gold standard. The RHC-diagnosed subgroup was small, limiting the precision of subgroup-specific estimates; thus, subgroup findings were considered descriptive/exploratory and warrant confirmation in larger cohorts with broader RHC verification. Finally, some clinically relevant candidate predictors&#x2014;most notably DLCO&#x2014;were excluded because of substantial missingness under our prespecified &#x2265;30% threshold, which may have limited model generalizability; future studies with standardized diffusion testing should evaluate the incremental value of DLCO.</p>
</sec>
<sec id="sec35">
<label>4.5</label>
<title>Future directions</title>
<p>We plan to improve the model so that it can be built into the electronic medical record (EMR). In this setting, the system will automatically pull routinely collected variables and will ask clinicians to add any missing key predictors before it estimates risk. When non-critical predictors are missing or seem unlikely, the system should mark these values for checking and either ask for confirmation or continue using only the other confirmed predictors. The main users will be pulmonology and cardiology specialists and residents who manage COPD-PH. They will not need special machine learning skills, only a clear understanding of the COPD-PH risk probabilities and the related risk groups given by the model.</p>
<p>Future work should focus on multicenter collaborative datasets and prospective external validation. This will test how well the model works in different patient groups, imaging protocols, and laboratory platforms. Adding more cases confirmed by right heart catheterization would also improve diagnostic accuracy for pulmonary hypertension. In addition, future studies should examine how model-based risk stratification changes daily clinical care, including diagnostic test requests, referral patterns, treatment choices, and patient-centered outcomes. Ideally, the model should be tested in prospective trials that follow real clinical pathways in different healthcare systems.</p>
</sec>
</sec>
<sec sec-type="conclusions" id="sec36">
<label>5</label>
<title>Conclusion</title>
<p>Based on routinely collected clinical data, we developed and internally tested a CatBoost model to detect COPD-PH. The model reached an area under the curve (AUC) of 0.848. Decision curve analysis showed that the model gives a net clinical benefit. The main predictive variables were right ventricular diameter, pulmonary artery diameter, arterial partial pressure of carbon dioxide, right atrial transverse diameter, and age. These variables cover structural, hemodynamic, ventilatory, and demographic domains. They match known pathophysiological features of COPD-PH and support bedside risk stratification.</p>
<p>In the future, we plan to test how well the model works in different centers and how it affects clinical outcomes, using multicenter external validation and prospective effectiveness studies. We expect to build the model into electronic health record systems and to create a web-based risk calculator to improve triage for echocardiography and right heart catheterization. This may help make better use of right heart monitoring resources and support more personalized COPD care.</p>
</sec>
<sec id="sec37">
<title>Patient and public involvement</title>
<p>Patients and members of the public were not involved in the design, conduct, reporting, or dissemination plans of this research.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="sec38">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec sec-type="ethics-statement" id="sec39">
<title>Ethics statement</title>
<p>The studies involving humans were approved by the Institutional Review Board of Beijing Chaoyang Hospital. The studies were conducted in accordance with the local legislation and institutional requirements. The ethics committee/institutional review board waived the requirement of written informed consent for participation from the participants or the participants&#x2019; legal guardians/next of kin because this was a retrospective study using routinely collected, de-identified clinical data, involving no direct contact with patients and posing minimal risk to participants.</p>
</sec>
<sec sec-type="author-contributions" id="sec40">
<title>Author contributions</title>
<p>RW: Formal analysis, Data curation, Visualization, Validation, Investigation, Writing &#x2013; review &#x0026; editing, Writing &#x2013; original draft, Methodology. JT: Formal analysis, Supervision, Writing &#x2013; review &#x0026; editing, Writing &#x2013; original draft, Software, Methodology, Data curation, Validation, Visualization. GL: Supervision, Methodology, Writing &#x2013; review &#x0026; editing, Validation. ZP: Writing &#x2013; review &#x0026; editing, Investigation, Resources. HG: Investigation, Writing &#x2013; review &#x0026; editing, Resources. WS: Investigation, Resources, Writing &#x2013; review &#x0026; editing. JW: Resources, Writing &#x2013; review &#x0026; editing, Validation, Conceptualization, Project administration, Supervision.</p>
</sec>
<sec sec-type="COI-statement" id="sec41">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="sec42">
<title>Generative AI statement</title>
<p>The author(s) declared that Generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="sec43">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="sec44">
<title>Supplementary material</title>
<p>The Supplementary material for this article can be found online at: <ext-link xlink:href="https://www.frontiersin.org/articles/10.3389/fmed.2026.1752113/full#supplementary-material" ext-link-type="uri">https://www.frontiersin.org/articles/10.3389/fmed.2026.1752113/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><label>1.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Blanco</surname><given-names>I</given-names></name> <name><surname>Tura-Ceide</surname><given-names>O</given-names></name> <name><surname>Peinado</surname><given-names>VI</given-names></name> <name><surname>Barbera</surname><given-names>JA</given-names></name></person-group>. <article-title>Updated perspectives on pulmonary hypertension in COPD</article-title>. <source>Int J Chron Obstruct Pulmon Dis</source>. (<year>2020</year>) <volume>15</volume>:<fpage>1315</fpage>&#x2013;<lpage>24</lpage>. doi: <pub-id pub-id-type="doi">10.2147/COPD.S211841</pub-id>, <pub-id pub-id-type="pmid">32606641</pub-id></mixed-citation></ref>
<ref id="ref2"><label>2.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sysol</surname><given-names>JR</given-names></name> <name><surname>Machado</surname><given-names>RF</given-names></name></person-group>. <article-title>Classification and pathophysiology of pulmonary hypertension</article-title>. <source>Contin Cardiol Educ</source>. (<year>2018</year>) <volume>4</volume>:<fpage>2</fpage>&#x2013;<lpage>12</lpage>. doi: <pub-id pub-id-type="doi">10.1002/cce2.71</pub-id></mixed-citation></ref>
<ref id="ref3"><label>3.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Olsson</surname><given-names>KM</given-names></name> <name><surname>Corte</surname><given-names>TJ</given-names></name> <name><surname>Kamp</surname><given-names>JC</given-names></name> <name><surname>Montani</surname><given-names>D</given-names></name> <name><surname>Nathan</surname><given-names>SD</given-names></name> <name><surname>Neubert</surname><given-names>L</given-names></name> <etal/></person-group>. <article-title>Pulmonary hypertension associated with lung disease: new insights into pathomechanisms, diagnosis, and management</article-title>. <source>Lancet Respir Med</source>. (<year>2023</year>) <volume>11</volume>:<fpage>820</fpage>&#x2013;<lpage>35</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S2213-2600(23)00259-X</pub-id>, <pub-id pub-id-type="pmid">37591300</pub-id></mixed-citation></ref>
<ref id="ref4"><label>4.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chaouat</surname><given-names>A</given-names></name> <name><surname>Naeije</surname><given-names>R</given-names></name> <name><surname>Weitzenblum</surname><given-names>E</given-names></name></person-group>. <article-title>Pulmonary hypertension in COPD</article-title>. <source>Eur Respir J</source>. (<year>2008</year>) <volume>32</volume>:<fpage>1371</fpage>&#x2013;<lpage>85</lpage>. doi: <pub-id pub-id-type="doi">10.1183/09031936.00015608</pub-id>, <pub-id pub-id-type="pmid">18978137</pub-id></mixed-citation></ref>
<ref id="ref5"><label>5.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hoffmann</surname><given-names>J</given-names></name> <name><surname>Wilhelm</surname><given-names>J</given-names></name> <name><surname>Olschewski</surname><given-names>A</given-names></name> <name><surname>Kwapiszewska</surname><given-names>G</given-names></name></person-group>. <article-title>Microarray analysis in pulmonary hypertension</article-title>. <source>Eur Respir J</source>. (<year>2016</year>) <volume>48</volume>:<fpage>229</fpage>&#x2013;<lpage>41</lpage>. doi: <pub-id pub-id-type="doi">10.1183/13993003.02030-2015</pub-id></mixed-citation></ref>
<ref id="ref6"><label>6.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wrobel</surname><given-names>JP</given-names></name> <name><surname>Thompson</surname><given-names>BR</given-names></name> <name><surname>Williams</surname><given-names>TJ</given-names></name></person-group>. <article-title>Mechanisms of pulmonary hypertension in chronic obstructive pulmonary disease: a pathophysiologic review</article-title>. <source>J Heart Lung Transplant</source>. (<year>2012</year>) <volume>31</volume>:<fpage>557</fpage>&#x2013;<lpage>64</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.healun.2012.02.029</pub-id>, <pub-id pub-id-type="pmid">22502811</pub-id></mixed-citation></ref>
<ref id="ref7"><label>7.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhou</surname><given-names>D</given-names></name> <name><surname>Liu</surname><given-names>C</given-names></name> <name><surname>Wang</surname><given-names>L</given-names></name> <name><surname>Li</surname><given-names>J</given-names></name> <name><surname>Zhao</surname><given-names>Y</given-names></name> <name><surname>Deng</surname><given-names>Z</given-names></name> <etal/></person-group>. <article-title>Prediction of clinical risk assessment and survival in chronic obstructive pulmonary disease with pulmonary hypertension</article-title>. <source>Clin Transl Med</source>. (<year>2024</year>) <volume>14</volume>:<fpage>e1702</fpage>. doi: <pub-id pub-id-type="doi">10.1002/ctm2.1702</pub-id>, <pub-id pub-id-type="pmid">38861300</pub-id></mixed-citation></ref>
<ref id="ref8"><label>8.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Barber&#x00E0;</surname><given-names>JA</given-names></name> <name><surname>Blanco</surname><given-names>I</given-names></name></person-group>. <article-title>Pulmonary hypertension in patients with chronic obstructive pulmonary disease: advances in pathophysiology and management</article-title>. <source>Drugs</source>. (<year>2009</year>) <volume>69</volume>:<fpage>1153</fpage>&#x2013;<lpage>71</lpage>. doi: <pub-id pub-id-type="doi">10.2165/00003495-200969090-00002</pub-id>, <pub-id pub-id-type="pmid">19537834</pub-id></mixed-citation></ref>
<ref id="ref9"><label>9.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nugraha</surname><given-names>IBA</given-names></name> <name><surname>Deviani</surname><given-names>IAPP</given-names></name></person-group>. <article-title>Management of pulmonal hypertension in chronic obstructive pulmonary disease</article-title>. <source>Jurnal Kedokteran Raflesia</source>. (<year>2023</year>) <volume>9</volume>:<fpage>36</fpage>&#x2013;<lpage>47</lpage>. doi: <pub-id pub-id-type="doi">10.33369/juke.v9i1.28838</pub-id></mixed-citation></ref>
<ref id="ref10"><label>10.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Grimminger</surname><given-names>J</given-names></name> <name><surname>Ghofrani</surname><given-names>HA</given-names></name> <name><surname>Weissmann</surname><given-names>N</given-names></name> <name><surname>Klose</surname><given-names>H</given-names></name> <name><surname>Grimminger</surname><given-names>F</given-names></name></person-group>. <article-title>COPD-associated pulmonary hypertension: clinical implications and current methods for treatment</article-title>. <source>Expert Rev Respir Med</source>. (<year>2016</year>) <volume>10</volume>:<fpage>755</fpage>&#x2013;<lpage>66</lpage>. doi: <pub-id pub-id-type="doi">10.1080/17476348.2016.1190275</pub-id>, <pub-id pub-id-type="pmid">27212458</pub-id></mixed-citation></ref>
<ref id="ref11"><label>11.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hoeper</surname><given-names>MM</given-names></name> <name><surname>Humbert</surname><given-names>M</given-names></name> <name><surname>Souza</surname><given-names>R</given-names></name> <name><surname>Idrees</surname><given-names>M</given-names></name> <name><surname>Kawut</surname><given-names>SM</given-names></name> <name><surname>Sliwa-Hahnle</surname><given-names>K</given-names></name> <etal/></person-group>. <article-title>A global view of pulmonary hypertension</article-title>. <source>Lancet Respir Med</source>. (<year>2016</year>) <volume>4</volume>:<fpage>306</fpage>&#x2013;<lpage>22</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S2213-2600(15)00543-3</pub-id>, <pub-id pub-id-type="pmid">26975810</pub-id></mixed-citation></ref>
<ref id="ref12"><label>12.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kovacs</surname><given-names>G</given-names></name> <name><surname>Bartolome</surname><given-names>S</given-names></name> <name><surname>Denton</surname><given-names>CP</given-names></name> <name><surname>Gatzoulis</surname><given-names>MA</given-names></name> <name><surname>Gu</surname><given-names>S</given-names></name> <name><surname>Khanna</surname><given-names>D</given-names></name> <etal/></person-group>. <article-title>Definition, classification and diagnosis of pulmonary hypertension</article-title>. <source>Eur Respir J</source>. (<year>2024</year>) <volume>64</volume>:<fpage>2401324</fpage>. doi: <pub-id pub-id-type="doi">10.1183/13993003.01324-2024</pub-id>, <pub-id pub-id-type="pmid">39209475</pub-id></mixed-citation></ref>
<ref id="ref13"><label>13.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>D'Alto</surname><given-names>M</given-names></name> <name><surname>Dimopoulos</surname><given-names>K</given-names></name> <name><surname>Coghlan</surname><given-names>JG</given-names></name> <name><surname>Kovacs</surname><given-names>G</given-names></name> <name><surname>Rosenkranz</surname><given-names>S</given-names></name> <name><surname>Naeije</surname><given-names>R</given-names></name></person-group>. <article-title>Right heart catheterization for the diagnosis of pulmonary hypertension: controversies and practical issues</article-title>. <source>Heart Fail Clin</source>. (<year>2018</year>) <volume>14</volume>:<fpage>467</fpage>&#x2013;<lpage>77</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.hfc.2018.03.011</pub-id></mixed-citation></ref>
<ref id="ref14"><label>14.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rosenkranz</surname><given-names>S</given-names></name> <name><surname>Preston</surname><given-names>IR</given-names></name></person-group>. <article-title>Right heart catheterisation: best practice and pitfalls in pulmonary hypertension</article-title>. <source>Eur Respir Rev</source>. (<year>2015</year>) <volume>24</volume>:<fpage>642</fpage>&#x2013;<lpage>52</lpage>. doi: <pub-id pub-id-type="doi">10.1183/16000617.0062-2015</pub-id>, <pub-id pub-id-type="pmid">26621978</pub-id></mixed-citation></ref>
<ref id="ref15"><label>15.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Habib</surname><given-names>G</given-names></name> <name><surname>Torbicki</surname><given-names>A</given-names></name></person-group>. <article-title>The role of echocardiography in the diagnosis and management of patients with pulmonary hypertension</article-title>. <source>Eur Respir Rev</source>. (<year>2010</year>) <volume>19</volume>:<fpage>288</fpage>&#x2013;<lpage>99</lpage>. doi: <pub-id pub-id-type="doi">10.1183/09059180.00008110</pub-id>, <pub-id pub-id-type="pmid">21119187</pub-id></mixed-citation></ref>
<ref id="ref16"><label>16.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>D'Alto</surname><given-names>M</given-names></name> <name><surname>Bossone</surname><given-names>E</given-names></name> <name><surname>Opotowsky</surname><given-names>AR</given-names></name> <name><surname>Ghio</surname><given-names>S</given-names></name> <name><surname>Rudski</surname><given-names>LG</given-names></name> <name><surname>Naeije</surname><given-names>R</given-names></name></person-group>. <article-title>Strengths and weaknesses of echocardiography for the diagnosis of pulmonary hypertension</article-title>. <source>Int J Cardiol</source>. (<year>2018</year>) <volume>263</volume>:<fpage>177</fpage>&#x2013;<lpage>83</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ijcard.2018.04.024</pub-id>, <pub-id pub-id-type="pmid">29655950</pub-id></mixed-citation></ref>
<ref id="ref17"><label>17.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Boutros</surname><given-names>P</given-names></name> <name><surname>Kassem</surname><given-names>N</given-names></name> <name><surname>Boudo</surname><given-names>V</given-names></name> <name><surname>Si&#x00E9;</surname><given-names>A</given-names></name> <name><surname>Munga</surname><given-names>S</given-names></name> <name><surname>Maggioni</surname><given-names>MA</given-names></name> <etal/></person-group>. <article-title>Understanding the risk factors, burden, and interventions for chronic respiratory diseases in low- and middle-income countries: a scoping review</article-title>. <source>Public Health Rev</source>. (<year>2024</year>) <volume>45</volume>:<fpage>1607339</fpage>. doi: <pub-id pub-id-type="doi">10.3389/phrs.2024.1607339</pub-id>, <pub-id pub-id-type="pmid">39544625</pub-id></mixed-citation></ref>
<ref id="ref18"><label>18.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jutras-Beaudoin</surname><given-names>N</given-names></name> <name><surname>Toro</surname><given-names>V</given-names></name> <name><surname>Lajoie</surname><given-names>AC</given-names></name> <name><surname>Breuils-Bonnet</surname><given-names>S</given-names></name> <name><surname>Paulin</surname><given-names>R</given-names></name> <name><surname>Potus</surname><given-names>F</given-names></name></person-group>. <article-title>Neutrophil-lymphocyte ratio as an independent predictor of survival in pulmonary arterial hypertension: an exploratory study</article-title>. <source>CJC Open</source>. (<year>2022</year>) <volume>4</volume>:<fpage>357</fpage>&#x2013;<lpage>63</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cjco.2021.11.010</pub-id>, <pub-id pub-id-type="pmid">35495856</pub-id></mixed-citation></ref>
<ref id="ref19"><label>19.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Harbaum</surname><given-names>L</given-names></name> <name><surname>Baaske</surname><given-names>KM</given-names></name> <name><surname>Simon</surname><given-names>M</given-names></name> <name><surname>Oqueka</surname><given-names>T</given-names></name> <name><surname>Sinning</surname><given-names>C</given-names></name> <name><surname>Glatzel</surname><given-names>A</given-names></name> <etal/></person-group>. <article-title>Exploratory analysis of the neutrophil to lymphocyte ratio in patients with pulmonary arterial hypertension</article-title>. <source>BMC Pulm Med</source>. (<year>2017</year>) <volume>17</volume>:<fpage>72</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s12890-017-0407-5</pub-id>, <pub-id pub-id-type="pmid">28446163</pub-id></mixed-citation></ref>
<ref id="ref20"><label>20.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mohamed</surname><given-names>MF</given-names></name> <name><surname>Ali</surname><given-names>A</given-names></name> <name><surname>Abbas</surname><given-names>A</given-names></name> <name><surname>Awad</surname><given-names>MS</given-names></name> <name><surname>Gouda</surname><given-names>M</given-names></name> <name><surname>Sediq</surname><given-names>AM</given-names></name></person-group>. <article-title>Mean platelet volume as a predictor of pulmonary hypertension in patients with stable COPD</article-title>. <source>Int J Chron Obstruct Pulmon Dis</source>. (<year>2019</year>) <volume>14</volume>:<fpage>1099</fpage>&#x2013;<lpage>108</lpage>. doi: <pub-id pub-id-type="doi">10.2147/COPD.S176413</pub-id>, <pub-id pub-id-type="pmid">31213790</pub-id></mixed-citation></ref>
<ref id="ref21"><label>21.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname><given-names>J</given-names></name> <name><surname>Liu</surname><given-names>C</given-names></name> <name><surname>Li</surname><given-names>L</given-names></name> <name><surname>Tu</surname><given-names>X</given-names></name> <name><surname>Lu</surname><given-names>Z</given-names></name></person-group>. <article-title>Red blood cell distribution width predicts pulmonary hypertension secondary to chronic obstructive pulmonary disease</article-title>. <source>Can Respir J</source>. (<year>2019</year>) <volume>2019</volume>:<fpage>3853454</fpage>. doi: <pub-id pub-id-type="doi">10.1155/2019/3853454</pub-id></mixed-citation></ref>
<ref id="ref22"><label>22.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Anand</surname><given-names>V</given-names></name> <name><surname>Weston</surname><given-names>AD</given-names></name> <name><surname>Scott</surname><given-names>CG</given-names></name> <name><surname>Kane</surname><given-names>GC</given-names></name> <name><surname>Pellikka</surname><given-names>PA</given-names></name> <name><surname>Carter</surname><given-names>RE</given-names></name></person-group>. <article-title>Machine learning for diagnosis of pulmonary hypertension by echocardiography</article-title>. <source>Mayo Clin Proc</source>. (<year>2024</year>) <volume>99</volume>:<fpage>260</fpage>&#x2013;<lpage>70</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.mayocp.2023.05.006</pub-id>, <pub-id pub-id-type="pmid">38309937</pub-id></mixed-citation></ref>
<ref id="ref23"><label>23.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Barbieri</surname><given-names>S</given-names></name> <name><surname>Mehta</surname><given-names>S</given-names></name> <name><surname>Wu</surname><given-names>B</given-names></name> <name><surname>Bharat</surname><given-names>C</given-names></name> <name><surname>Poppe</surname><given-names>K</given-names></name> <name><surname>Jorm</surname><given-names>L</given-names></name> <etal/></person-group>. <article-title>Predicting cardiovascular risk from national administrative databases using a combined survival analysis and deep learning approach</article-title>. <source>Int J Epidemiol</source>. (<year>2022</year>) <volume>51</volume>:<fpage>931</fpage>&#x2013;<lpage>44</lpage>. doi: <pub-id pub-id-type="doi">10.1093/ije/dyab258</pub-id>, <pub-id pub-id-type="pmid">34910160</pub-id></mixed-citation></ref>
<ref id="ref24"><label>24.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Frasca</surname><given-names>M</given-names></name> <name><surname>La Torre</surname><given-names>D</given-names></name> <name><surname>Pravettoni</surname><given-names>G</given-names></name> <name><surname>Cutica</surname><given-names>I</given-names></name></person-group>. <article-title>Explainable and interpretable artificial intelligence in medicine: a systematic bibliometric review</article-title>. <source>Discov Artif Intell</source>. (<year>2024</year>) <volume>4</volume>:<fpage>15</fpage>. doi: <pub-id pub-id-type="doi">10.1007/s44163-024-00114-7</pub-id></mixed-citation></ref>
<ref id="ref25"><label>25.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shaik</surname><given-names>T</given-names></name> <name><surname>Tao</surname><given-names>X</given-names></name> <name><surname>Li</surname><given-names>L</given-names></name> <name><surname>Xie</surname><given-names>H</given-names></name> <name><surname>Vel&#x00E1;squez</surname><given-names>JD</given-names></name></person-group>. <article-title>A survey of multimodal information fusion for smart healthcare: mapping the journey from data to wisdom</article-title>. <source>Inf Fusion</source>. (<year>2024</year>) <volume>102</volume>:<fpage>102040</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.inffus.2023.102040</pub-id></mixed-citation></ref>
<ref id="ref26"><label>26.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Agust&#x00ED;</surname><given-names>A</given-names></name> <name><surname>Celli</surname><given-names>BR</given-names></name> <name><surname>Criner</surname><given-names>GJ</given-names></name> <name><surname>Halpin</surname><given-names>D</given-names></name> <name><surname>Anzueto</surname><given-names>A</given-names></name> <name><surname>Barnes</surname><given-names>P</given-names></name> <etal/></person-group>. <article-title>Global initiative for chronic obstructive lung disease 2023 report: GOLD executive summary</article-title>. <source>Eur Respir J</source>. (<year>2023</year>) <volume>61</volume>:<fpage>2300239</fpage>. doi: <pub-id pub-id-type="doi">10.1183/13993003.00239-2023</pub-id>, <pub-id pub-id-type="pmid">36858443</pub-id></mixed-citation></ref>
<ref id="ref27"><label>27.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Humbert</surname><given-names>M</given-names></name> <name><surname>Kovacs</surname><given-names>G</given-names></name> <name><surname>Hoeper</surname><given-names>MM</given-names></name> <name><surname>Badagliacca</surname><given-names>R</given-names></name> <name><surname>Berger</surname><given-names>RMF</given-names></name> <name><surname>Brida</surname><given-names>M</given-names></name> <etal/></person-group>. <article-title>2022 ESC/ERS guidelines for the diagnosis and treatment of pulmonary hypertension</article-title>. <source>Eur Heart J</source>. (<year>2022</year>) <volume>43</volume>:<fpage>3618</fpage>&#x2013;<lpage>731</lpage>. doi: <pub-id pub-id-type="doi">10.1093/eurheartj/ehac237</pub-id>, <pub-id pub-id-type="pmid">36017548</pub-id></mixed-citation></ref>
<ref id="ref28"><label>28.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhou</surname><given-names>H</given-names></name> <name><surname>Wang</surname><given-names>X</given-names></name> <name><surname>Zhu</surname><given-names>R</given-names></name></person-group>. <article-title>Feature selection based on mutual information with correlation coefficient</article-title>. <source>Appl Intell</source>. (<year>2021</year>) <volume>52</volume>:<fpage>5457</fpage>&#x2013;<lpage>74</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10489-021-02524-x</pub-id></mixed-citation></ref>
<ref id="ref29"><label>29.</label><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Cohen</surname><given-names>J</given-names></name></person-group>. <source>Statistical power analysis for the behavioral sciences</source>. <edition>2nd</edition> ed. <publisher-loc>Hillsdale, NJ</publisher-loc>: <publisher-name>Lawrence Erlbaum Associates</publisher-name> (<year>1988</year>).</mixed-citation></ref>
<ref id="ref30"><label>30.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Prokhorenkova</surname><given-names>L</given-names></name> <name><surname>Gusev</surname><given-names>G</given-names></name> <name><surname>Vorobev</surname><given-names>A</given-names></name> <name><surname>Dorogush</surname><given-names>AV</given-names></name> <name><surname>Gulin</surname><given-names>A</given-names></name></person-group>. "<article-title>CatBoost: unbiased boosting with categorical features</article-title>." In: <person-group person-group-type="editor"><name><surname>Bengio</surname><given-names>S</given-names></name> <name><surname>Wallach</surname><given-names>H</given-names></name> <name><surname>Larochelle</surname><given-names>H</given-names></name> <name><surname>Grauman</surname><given-names>K</given-names></name> <name><surname>Cesa-Bianchi</surname><given-names>N</given-names></name> <name><surname>Garnett</surname><given-names>R</given-names></name></person-group>, editors. <source>Advances in neural information processing systems</source>. <publisher-loc>Red Hook, NY, USA</publisher-loc>: <publisher-name>Curran Associates, Inc.</publisher-name>, (<year>2018</year>). <fpage>31</fpage>.</mixed-citation></ref>
<ref id="ref31"><label>31.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cai</surname><given-names>Y</given-names></name> <name><surname>Yuan</surname><given-names>Y</given-names></name> <name><surname>Zhou</surname><given-names>A</given-names></name></person-group>. <article-title>Predictive slope stability early warning model based on CatBoost</article-title>. <source>Sci Rep</source>. (<year>2024</year>) <volume>14</volume>:<fpage>25727</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-024-77058-6</pub-id></mixed-citation></ref>
<ref id="ref32"><label>32.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rao</surname><given-names>C</given-names></name> <name><surname>Wei</surname><given-names>X</given-names></name> <name><surname>Xiao</surname><given-names>X</given-names></name> <name><surname>Shi</surname><given-names>Y</given-names></name> <name><surname>Goh</surname><given-names>M</given-names></name></person-group>. <article-title>Oversampling method via adaptive double weights and Gaussian kernel function for the transformation of unbalanced data in risk assessment of cardiovascular disease</article-title>. <source>Inf Sci</source>. (<year>2024</year>) <volume>665</volume>:<fpage>120410</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ins.2024.120410</pub-id></mixed-citation></ref>
<ref id="ref33"><label>33.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Celeski</surname><given-names>M</given-names></name> <name><surname>Segreti</surname><given-names>A</given-names></name> <name><surname>Polito</surname><given-names>D</given-names></name> <name><surname>Valente</surname><given-names>D</given-names></name> <name><surname>Vicchio</surname><given-names>L</given-names></name> <name><surname>Di Gioia</surname><given-names>G</given-names></name> <etal/></person-group>. <article-title>Traditional and advanced echocardiographic evaluation in chronic obstructive pulmonary disease: the forgotten relation</article-title>. <source>Am J Cardiol</source>. (<year>2024</year>) <volume>217</volume>:<fpage>102</fpage>&#x2013;<lpage>18</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.amjcard.2024.02.022</pub-id>, <pub-id pub-id-type="pmid">38412881</pub-id></mixed-citation></ref>
<ref id="ref34"><label>34.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mandoli</surname><given-names>GE</given-names></name> <name><surname>Sciaccaluga</surname><given-names>C</given-names></name> <name><surname>Bandera</surname><given-names>F</given-names></name> <name><surname>Cameli</surname><given-names>P</given-names></name> <name><surname>Esposito</surname><given-names>R</given-names></name> <name><surname>D&#x2019;Andrea</surname><given-names>A</given-names></name> <etal/></person-group>. <article-title>Cor pulmonale: the role of traditional and advanced echocardiography in the acute and chronic settings</article-title>. <source>Heart Fail Rev</source>. (<year>2020</year>) <volume>26</volume>:<fpage>263</fpage>&#x2013;<lpage>75</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10741-020-10014-4</pub-id>, <pub-id pub-id-type="pmid">32860180</pub-id></mixed-citation></ref>
<ref id="ref35"><label>35.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kovacs</surname><given-names>G</given-names></name> <name><surname>Avian</surname><given-names>A</given-names></name> <name><surname>Bachmaier</surname><given-names>G</given-names></name> <name><surname>Troester</surname><given-names>N</given-names></name> <name><surname>Tornyos</surname><given-names>A</given-names></name> <name><surname>Douschan</surname><given-names>P</given-names></name> <etal/></person-group>. <article-title>Severe pulmonary hypertension in COPD</article-title>. <source>Chest</source>. (<year>2022</year>) <volume>162</volume>:<fpage>202</fpage>&#x2013;<lpage>12</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.chest.2022.01.031</pub-id>, <pub-id pub-id-type="pmid">35092746</pub-id></mixed-citation></ref>
<ref id="ref36"><label>36.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Balasubramanian</surname><given-names>A</given-names></name> <name><surname>Kolb</surname><given-names>TM</given-names></name> <name><surname>Damico</surname><given-names>RL</given-names></name> <name><surname>Hassoun</surname><given-names>PM</given-names></name> <name><surname>McCormack</surname><given-names>MC</given-names></name> <name><surname>Mathai</surname><given-names>SC</given-names></name></person-group>. <article-title>Diffusing capacity is an independent predictor of outcomes in pulmonary hypertension associated with COPD</article-title>. <source>Chest</source>. (<year>2020</year>) <volume>158</volume>:<fpage>722</fpage>&#x2013;<lpage>34</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.chest.2020.02.047</pub-id>, <pub-id pub-id-type="pmid">32184109</pub-id></mixed-citation></ref>
<ref id="ref37"><label>37.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sivakumar</surname><given-names>DK</given-names></name> <name><surname>Anandaraj</surname><given-names>J</given-names></name> <name><surname>Shanavaz</surname><given-names>B</given-names></name> <name><surname>Nirojkumar</surname><given-names>E</given-names></name></person-group>. <article-title>An echocardiographic analysis of the right ventricular morphological changes in pulmonary hypertension</article-title>. <source>Int J Health Sci</source>. (<year>2022</year>) <volume>6</volume>:<fpage>1</fpage>&#x2013;<lpage>7</lpage>. doi: <pub-id pub-id-type="doi">10.53730/ijhs.v6n7.10661</pub-id></mixed-citation></ref>
<ref id="ref38"><label>38.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chang</surname><given-names>T-C</given-names></name> <name><surname>Wang</surname><given-names>C-M</given-names></name> <name><surname>Ho</surname><given-names>C-H</given-names></name> <name><surname>Chen</surname><given-names>Y-C</given-names></name> <name><surname>Liao</surname><given-names>C-T</given-names></name> <name><surname>Shieh</surname><given-names>J-M</given-names></name> <etal/></person-group>. <article-title>A prevalence study focusing on hospitalized COPD related pulmonary hypertension</article-title>. <source>Sci Rep</source>. (<year>2025</year>) <volume>15</volume>:<fpage>12426</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-025-96629-9</pub-id>, <pub-id pub-id-type="pmid">40216921</pub-id></mixed-citation></ref>
<ref id="ref39"><label>39.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jarad</surname><given-names>N</given-names></name></person-group>. <article-title>Chronic obstructive pulmonary disease (COPD) and old age?</article-title> <source>Chron Respir Dis</source>. (<year>2011</year>) <volume>8</volume>:<fpage>143</fpage>&#x2013;<lpage>51</lpage>. doi: <pub-id pub-id-type="doi">10.1177/1479972311407218</pub-id>, <pub-id pub-id-type="pmid">21596895</pub-id></mixed-citation></ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0002">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1846963/overview">Paolo Scanagatta</ext-link>, ASST Valtellina e Alto Lario, Italy</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0003">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1373881/overview">Yuqin Chen</ext-link>, First Affiliated Hospital of Guangzhou Medical University, China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1504813/overview">Tarik Kivrak</ext-link>, Firat University, T&#x00FC;rkiye</p>
</fn>
</fn-group>
</back>
</article>