<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article article-type="review-article" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Pediatr.</journal-id><journal-title-group>
<journal-title>Frontiers in Pediatrics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Pediatr.</abbrev-journal-title></journal-title-group>
<issn pub-type="epub">2296-2360</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fped.2026.1648943</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Systematic Review</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Artificial intelligence for patent ductus arteriosus&#x2014;a systematic review</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes"><name><surname>Long</surname><given-names>Sarah Elizabeth</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x002A;</xref><uri xlink:href="https://loop.frontiersin.org/people/3008769/overview"/><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Uden</surname><given-names>Theodor</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/3351341/overview"/><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Peter</surname><given-names>Corinna</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/3351344/overview"/><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Oeltze-Jafra</surname><given-names>Steffen</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/3351383/overview"/><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Beerbaum</surname><given-names>Philipp</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/2546470/overview" /><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Department of Pediatric Cardiology and Intensive Care, Hannover Medical School</institution>, <city>Hannover</city>, <country country="de">Germany</country></aff>
<aff id="aff2"><label>2</label><institution>Department of Pediatric Neonatology, Pulmonology and Allergology, Hannover Medical School</institution>, <city>Hannover</city>, <country country="de">Germany</country></aff>
<aff id="aff3"><label>3</label><institution>Peter L. Reichertz Institute for Medical Informatics, Hannover Medical School</institution>, <city>Hannover</city>, <country country="de">Germany</country></aff>
<aff id="aff4"><label>4</label><institution>CAIMed: Lower Saxony Center for AI &#x0026; Causal Methods in Medicine</institution>, <city>Hannover</city>, <state>Lower Saxony</state>, <country country="de">Germany</country></aff>
<author-notes>
<corresp id="cor1"><label>&#x002A;</label><bold>Correspondence:</bold> Sarah Elizabeth Long <email xlink:href="mailto:long.sarah@mh-hannover.de">long.sarah@mh-hannover.de</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-01-30"><day>30</day><month>01</month><year>2026</year></pub-date>
<pub-date publication-format="electronic" date-type="collection"><year>2026</year></pub-date>
<volume>14</volume><elocation-id>1648943</elocation-id>
<history>
<date date-type="received"><day>17</day><month>06</month><year>2025</year></date>
<date date-type="rev-recd"><day>25</day><month>12</month><year>2025</year></date>
<date date-type="accepted"><day>07</day><month>01</month><year>2026</year></date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2026 Long, Uden, Peter, Oeltze-Jafra and Beerbaum.</copyright-statement>
<copyright-year>2026</copyright-year><copyright-holder>Long, Uden, Peter, Oeltze-Jafra and Beerbaum</copyright-holder><license><ali:license_ref start_date="2026-01-30">https://creativecommons.org/licenses/by/4.0/</ali:license_ref><license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p></license>
</permissions>
<abstract><sec><title>Introduction</title>
<p>Optimal management of patent ductus arteriosus (PDA) remains controversial. Complexity in severity appraisal, high-dimensional data, and the need for longitudinal, individualized assessment make PDA a compelling candidate for Artificial Intelligence (AI)-driven approaches. This systematic review evaluates AI research in the context of PDA, identifying strengths, limitations, and future directions.</p>
</sec><sec><title>Methods</title>
<p>Following PRISMA 2020, databases were searched for peer-reviewed articles from January 1, 2010, to May 31, 2025. Eleven studies met inclusion criteria. Data on design, population, sources, AI methods, performance, validation, limitations, and explainability were extracted. Risk of bias was assessed using the Prediction model Risk of Bias Assessment Tool and Joanna Briggs Institute Critical Appraisal Checklist; reporting quality using the Minimum Information about Clinical AI Modeling checklist. Heterogeneity precluded meta-analysis; therefore findings were synthesized narratively.</p>
</sec><sec><title>Results</title>
<p>Eleven studies addressed diagnosis/screening (<italic>n</italic>&#x2009;&#x003D;&#x2009;5), treatment-response prediction (<italic>n</italic>&#x2009;&#x003D;&#x2009;2), risk-factor identification (<italic>n</italic>&#x2009;&#x003D;&#x2009;2), treatment-complication prediction (<italic>n</italic>&#x2009;&#x003D;&#x2009;1), and subphenotype analysis (<italic>n</italic>&#x2009;&#x003D;&#x2009;1). Ten were retrospective; nine single-center, one multi-center, and one used a national registry. Sample sizes were mostly &#x003C;500 (range: 66&#x2013;8,369). Definitions of PDA subgroups&#x2014;symptomatic and hemodynamically significant PDA&#x2014;varied significantly. Populations included preterm, neonatal and pediatric cohorts, often excluding other congenital heart disease, pulmonary hypertension, or early mortality. Input data ranged from multimodal parameters to high-dimensional unimodal sources. Ten studies used supervised learning; nine traditional machine learning; five deep learning. No study performed adequate external validation. Diagnostic models achieved AUCs of 0.74&#x2013;0.93, however risk of bias was high, particularly in analysis, suggesting overfitting. Models addressing other aspects showed modest performance. None of the included studies exhibited low risk of bias. Most studies addressed explainability to some degree; only one addressed clinical utility; none evaluated fairness. Reproducibility was hindered by manual preprocessing and limited sharing of data, models, or code.</p>
</sec><sec><title>Conclusions</title>
<p>Artificial intelligence shows feasibility for supporting PDA risk stratification, diagnosis, severity assessment, and prediction of treatment-related outcomes. However, current applications remain in early, pilot-stage development and are not yet suitable for clinical implementation. Future work should prioritize clinically meaningful tasks, scientifically rigorous and bias-aware methodologies, larger and more representative cohorts, and systematic external validation. Fairness, explainability, and reproducibility must be addressed to support translation. Continued methodological refinement and clinical grounding will be key to unlocking the potential of these technologies for this highly vulnerable patient population in the future.</p>
</sec>
</abstract>
<kwd-group>
<kwd>artificial intelligence (AI)</kwd>
<kwd>deep learning</kwd>
<kwd>ductus arteriosus</kwd>
<kwd>machine learning</kwd>
<kwd>patent ductus arteriosus</kwd>
<kwd>PDA</kwd>
<kwd>PROBAST</kwd>
</kwd-group><funding-group><funding-statement>The author(s) declared that financial support was received for this work and/or its publication. Sarah Elizabeth Long received funding through her participation in the Clinician Scientist Program &#x201C;TITUS&#x2014;The First Thousand Days of Life&#x201D; (grant no. 2022_EKFK.12), supported by the Else Kr&#x00F6;ner-Fresenius-Stiftung and Hannover Medical School.</funding-statement></funding-group><counts>
<fig-count count="1"/>
<table-count count="5"/><equation-count count="0"/><ref-count count="56"/><page-count count="20"/><word-count count="84524"/></counts><custom-meta-group><custom-meta><meta-name>section-at-acceptance</meta-name><meta-value>Neonatology</meta-value></custom-meta></custom-meta-group>
</article-meta>
</front>
<body><sec id="s1" sec-type="intro"><label>1</label><title>Introduction</title>
<sec id="s1a"><label>1.1</label><title>The clinical problem</title>
<p>Patent ductus arteriosus (PDA) describes the persistence of a fetal blood vessel connecting the aorta and pulmonary artery beyond 72&#x2005;h after birth, resulting in abnormal circulation (<xref ref-type="bibr" rid="B1">1</xref>).</p>
<p>Despite decades of research efforts, PDA assessment and management remain challenging (<xref ref-type="bibr" rid="B2">2</xref>). Depending on the context, direction and volume of shunting, a PDA may be protective&#x2014;as in duct-dependent congenital heart disease or pulmonary hypertension&#x2014;a benign bystander, or hemodynamically significant. In contexts of early postnatal discharge or limited healthcare resources, a PDA may go undetected, only discovered at neonatal readmission due to complications or incidentally (<xref ref-type="bibr" rid="B3">3</xref>).</p>
<p>In the case of hemodynamic significant PDA, excessive left-to-right shunting can lead to pulmonary over circulation and systemic steal, which is associated with serious comorbidities in preterm neonates, including chronic lung disease, intraventricular hemorrhage, necrotizing enterocolitis, heart failure, pulmonary hypertension, and increased mortality (<xref ref-type="bibr" rid="B4">4</xref>, <xref ref-type="bibr" rid="B5">5</xref>).</p>
<p>The hemodynamic impact of a PDA exists on a continuum; while some cases can be readily deemed as insignificant or significant, many fall within intermediate states that defy binary classification. Further complicating clinical assessment, the hemodynamic status of a PDA is inherently dynamic and spontaneous closure can occur. In the first days and weeks of life, shunt direction typically shifts from right-to-left or bidirectional&#x2014;due to elevated pulmonary resistance&#x2014;to left-to-right as pulmonary pressure falls (<xref ref-type="bibr" rid="B2">2</xref>). A PDA that initially appears insignificant may progress to hemodynamic significance, while one that seems likely to require intervention may begin to close spontaneously. About one-third of infants under 1,000&#x2005;g experience spontaneous closure within the first 2&#x2013;6 days (<xref ref-type="bibr" rid="B6">6</xref>), and around 47&#x0025; of very preterm infants within the first year (<xref ref-type="bibr" rid="B7">7</xref>).</p>
<p>As such, longitudinal evaluation is essential to monitor the PDA&#x0027;s evolution over time. Currently, serial echocardiography remains the gold standard for evaluating PDA, providing cross-sectional snapshots that assist clinicians in determining whether the ductus is likely to close spontaneously or progress toward hemodynamic significance (<xref ref-type="bibr" rid="B8">8</xref>).</p>
<p>To mitigate subjectivity and facilitate longitudinal appraisal clinicians have traditionally relied on the comparison of unidimensional parameters over time, such as measurements of PDA diameter, LA:Ao ratio and the presence of retrograde diastolic flow in abdominal arteries. However, many of these parameters are subject to considerable intra- and inter-observer variability and demonstrate limited correlation with PDA-associated comorbidities (<xref ref-type="bibr" rid="B9">9</xref>).</p>
<p>As a result, despite decades of research, diagnostic criteria for hemodynamic significant PDA remain non-standardized. Five main categories of echocardiographic parameters can be found in the literature&#x2014;duct assessment, pulmonary overcirculation, systemic hypoperfusion, end-organ perfusion, and myocardial performance&#x2014;with considerable variation in which parameters are selected, how they are weighted, and cut off thresholds across institutions (<xref ref-type="bibr" rid="B4">4</xref>, <xref ref-type="bibr" rid="B8">8</xref>, <xref ref-type="bibr" rid="B10">10</xref>).</p>
<p>To support PDA assessment via echocardiography, clinicians also consider diverse multimodal data (<xref ref-type="bibr" rid="B4">4</xref>), including but not limited to ventilator dependency, comorbidities, cardiac stress biomarkers like BNP and N-terminal pro-BNP (<xref ref-type="bibr" rid="B11">11</xref>, <xref ref-type="bibr" rid="B12">12</xref>), and imaging tools for additional insights into systemic perfusion such as renal and cerebral Doppler ultrasound and near-infrared spectroscopy (NIRS) (<xref ref-type="bibr" rid="B4">4</xref>).</p>
<p>More recent efforts to capture PDA severity in a standardized manner include a variety of echocardiography-based PDA severity scores, summarized in <xref ref-type="table" rid="T1">Table&#x00A0;1</xref>. Multiple scores demonstrate high predictive performance on their internal validation sets, but face challenges in clinical adoption, likely due to the inclusion of parameters that are not routinely measured, a lack of external validation and, on the receiving end, heterogeneous institution-specific practices (<xref ref-type="bibr" rid="B10">10</xref>). Furthermore, the frequent exclusion of neonates with additional congenital heart disease from development and validation cohorts, together with reliance on data from high-income countries, limits the generalizability of these scores to neonates with congenital heart disease comorbidities and to populations in low- and middle-income countries.</p>
<table-wrap id="T1" position="float"><label>Table&#x00A0;1</label>
<caption><p>Echocardiography-based PDA severity scores ordered from left to right based on year of publication.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Score</th>
<th valign="top" align="center">McNamara-Sehgal Score (<xref ref-type="bibr" rid="B52">52</xref>)</th>
<th valign="top" align="center">El-Khuffash et al. PDA Severity Score (PDAsc) (<xref ref-type="bibr" rid="B35">35</xref>)</th>
<th valign="top" align="center">Shaare Zedek Score (<xref ref-type="bibr" rid="B53">53</xref>)</th>
<th valign="top" align="center">Umapathi et al. PDA Severity Score (PDAss) (<xref ref-type="bibr" rid="B36">36</xref>)</th>
<th valign="top" align="center">Iowa PDA Score (<xref ref-type="bibr" rid="B54">54</xref>)</th>
<th valign="top" align="center">PLASE Score (<xref ref-type="bibr" rid="B55">55</xref>)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Predictive Outcome</td>
<td valign="top" align="left">CLD</td>
<td valign="top" align="left">CLD/death</td>
<td valign="top" align="left">CLD/death</td>
<td valign="top" align="left">CLD/death</td>
<td valign="top" align="left">death &#x003C;36 weeks or severe BPD</td>
<td valign="top" align="left">&#x00A0;Surgical ligation of PDA</td>
</tr>
<tr>
<td valign="top" align="left">Timepoint of Parameter Acquisition</td>
<td valign="top" align="left">Day of Ibuprofen therapy</td>
<td valign="top" align="left">2nd day of life</td>
<td valign="top" align="left">2nd day of life</td>
<td valign="top" align="left">Within 7 days of life</td>
<td valign="top" align="left">Between 12 and 24&#x2005;h of life</td>
<td valign="top" align="left">3rd day of life</td>
</tr>
<tr>
<td valign="top" align="left">Study Population for Score Development</td>
<td valign="top" align="left">52 preterm neonates with GA&#x2009;&#x003C;&#x2009;32 weeks who received Ibuprofen for PDA closure; 27 developed CLD</td>
<td valign="top" align="left">118 preterm neonates with GA&#x2009;&#x003C;&#x2009;29 weeks and open ductus arteriosus on day 2; 65 developed CLD, 15 died before discharge</td>
<td valign="top" align="left">Derived from common practice at Shaare Zedek Medical Center (SZMC), validated via El-Khuffash cohort</td>
<td valign="top" align="left">98 preterm neonates with GA&#x2009;&#x003C;&#x2009;32 weeks; 34 developed CLD, 2 died before discharge</td>
<td valign="top" align="left">Derived from protocol at University of Iowa, applied in cohort of 73 preterm neonates &#x003C; 24 weeks GA as part of assessment of early hemodynamic screening impact</td>
<td valign="top" align="left">692 preterm neonates with GA&#x2009;&#x003C;&#x2009;30 weeks; 77 required surgical ligation</td>
</tr>
<tr>
<td valign="top" align="left">Country of Origin</td>
<td valign="top" align="left">Australia</td>
<td valign="top" align="left">Ireland, Canada, and Australia</td>
<td valign="top" align="left">Score from Israel, data from Ireland, Canada, Australia</td>
<td valign="top" align="left">USA</td>
<td valign="top" align="left">USA</td>
<td valign="top" align="left">Japan</td>
</tr>
<tr>
<td valign="top" align="left">Parameters</td>
<td valign="top" align="left">PDA diameter; Max PDA velocity; PDA:LPA diameter; PAedv; LPAedv; LA:Ao ratio; LV:Ao ratio; LVO:SVC; Mitral E/A ratio; IVRT</td>
<td valign="top" align="left">GA; PDA diameter; Max PDA velocity; LVO; LV a&#x2019; wave</td>
<td valign="top" align="left">PDA diameter; LA:Ao ratio; DFR; PDA Doppler shunt pattern</td>
<td valign="top" align="left">GA; PPI; LVO; SMA VTI; PV Vd; DFR in dAo</td>
<td valign="top" align="left">PDA diameter: weight; LA:Ao ratio; PV D wave; LVO:RVO; Mitral E/A ratio; IVRT; DFR in dAo, celiac, or middle cerebral artery</td>
<td valign="top" align="left">GA; PDA diameter; LPAedv; LA:Ao</td>
</tr>
<tr>
<td valign="top" align="left">AUC</td>
<td valign="top" align="left">0.91 (95&#x0025; CI, 0.83&#x2013;1.00)</td>
<td valign="top" align="left">0.92 (95&#x0025; CI: 0.86&#x2013;0.97)</td>
<td valign="top" align="left">Not reported<xref ref-type="table-fn" rid="TF2">&#x002A;</xref></td>
<td valign="top" align="left">0.97 (95&#x0025; CI: 0.93&#x2013;0.99)</td>
<td valign="top" align="left">Not reported<xref ref-type="table-fn" rid="TF3">&#x002A;&#x002A;</xref></td>
<td valign="top" align="left">0.827 (0.744&#x2013;0.911)</td>
</tr>
<tr>
<td valign="top" align="left">Sensitivity</td>
<td valign="top" align="left">88.90&#x0025;</td>
<td valign="top" align="left">92&#x0025;</td>
<td valign="top" align="left">Not reported</td>
<td valign="top" align="left">94&#x0025;</td>
<td valign="top" align="left">Not reported</td>
<td valign="top" align="left">Not reported</td>
</tr>
<tr>
<td valign="top" align="left">Specifity</td>
<td valign="top" align="left">88&#x0025;</td>
<td valign="top" align="left">87&#x0025;</td>
<td valign="top" align="left">Not reported</td>
<td valign="top" align="left">93&#x0025;</td>
<td valign="top" align="left">Not reported</td>
<td valign="top" align="left">Not reported</td>
</tr>
<tr>
<td valign="top" align="left">PPV</td>
<td valign="top" align="left">88.90&#x0025;</td>
<td valign="top" align="left">92&#x0025;</td>
<td valign="top" align="left">Not reported</td>
<td valign="top" align="left">94&#x0025;</td>
<td valign="top" align="left">Not reported</td>
<td valign="top" align="left">Not reported</td>
</tr>
<tr>
<td valign="top" align="left">NPV</td>
<td valign="top" align="left">88&#x0025;</td>
<td valign="top" align="left">82&#x0025;</td>
<td valign="top" align="left">Not reported</td>
<td valign="top" align="left">93&#x0025;</td>
<td valign="top" align="left">Not reported</td>
<td valign="top" align="left">Not reported</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TF1"><p>PDA, Patent ductus arteriosus; CLD, Chronic lung disease; BPD, Bronchopulmonary dysplasia; LPA, Left pulmonary artery; PA, Pulmonary artery; edv, End diastolic velocity; LA, Left atrium, Ao, Aortic root; LV, Left ventricle; GA, Gestational age; PPI, pulmonary perfusion index; LVO, Left ventricular output; SMA VTI, Superior Mesenteric Artery Velocity Time Integral; PV, Pulmonary vein; Vd, Peak diastolic flow velocity; DFR, Reversal of flow in diastole in descending aorta; IVRT, Isovolumetric relaxation time; RVO, Right ventricular output; SVC, Superior vena cava; Mitral E/A ratio, Mitral inflow velocities; a&#x2019; wave, tissue Doppler atrial contraction wave; PV D wave, pulmonary vein diastolic wave.</p></fn>
<fn id="TF2"><label>&#x002A;</label>
<p>Correlation with El-Khuffash PDAsc in El-Khuffash cohort 0.62, <italic>p</italic>&#x2009;&#x003C;&#x2009;0.001; Correlation with CLD/death in El-Khuffash cohort: <italic>p</italic>&#x2009;&#x003D;&#x2009;0.02.</p></fn>
<fn id="TF3"><label>&#x002A;&#x002A;</label>
<p>Utilized as part of an early hemodynamic screening protocol which demonstrated a two-fold reduction in the composite primary outcome of death prior to 36 weeks or severe BPD in the &#x003C;24 weeks GA cohort.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>In attempting to standardize PDA severity assessment&#x2014;whether through individual parameters or scoring systems&#x2014;clinicians must reduce highly complex, multidimensional data into simplified metrics. This process, though necessary for human assessment, risks overlooking the complexity of PDA, is examiner dependent, and may direct clinical attention toward parameters that are not reliably predictive of relevant outcomes.</p>
<p>Establishing evidence-based treatment guidelines requires a body of research that demonstrates methodological consistency for reliable comparison and validity. Without such a foundation, it is unsurprising that consensus is lacking regarding which preterm infants benefit from treatment, when intervention should occur, and which therapeutic strategy is most effective (<xref ref-type="bibr" rid="B13">13</xref>).</p>
<p>Current treatment options include cyclooxygenase (COX) inhibitors (<xref ref-type="bibr" rid="B14">14</xref>), transcatheter occlusion devices, and surgical ligation (<xref ref-type="bibr" rid="B2">2</xref>). Historically, PDA management strategies have included prophylactic (<xref ref-type="bibr" rid="B15">15</xref>, <xref ref-type="bibr" rid="B16">16</xref>), early targeted (<xref ref-type="bibr" rid="B17">17</xref>), and conservative approaches (<xref ref-type="bibr" rid="B18">18</xref>). In recent decades, there has been a marked shift toward expectant management (<xref ref-type="bibr" rid="B19">19</xref>) and transcatheter occlusion devices for definitive closure (<xref ref-type="bibr" rid="B2">2</xref>). This change has been driven by several factors: concerns about adverse effects of COX inhibitors, incomplete response to pharmacological closure attempts, risks associated with surgical intervention, high rates of spontaneous closure (<xref ref-type="bibr" rid="B6">6</xref>), and inconclusive findings from randomized controlled trials (RCTs) examining pharmacologic PDA treatment. Jansen et al. (<xref ref-type="bibr" rid="B20">20</xref>), analyzing 47 RCTs, found a potential reduction in severe IVH with early indomethacin but no consistent benefits otherwise, while Hundscheid et al. (<xref ref-type="bibr" rid="B21">21</xref>), reviewing four RCTs, reported no significant differences in outcomes between conservative and active management. Similarly, Mitra et al. (<xref ref-type="bibr" rid="B22">22</xref>), based on 14 RCTs, concluded that early treatment likely does not reduce mortality or major morbidities. However, reviews were limited by moderate to low certainty of evidence.</p>
<p>Experts hypothesize that treatment may still be beneficial, but only in a well-defined subgroup&#x2014;namely, infants with hemodynamically significant PDA&#x2014;and that the inconclusive results of RCTs likely reflect underlying limitations in study design (<xref ref-type="bibr" rid="B2">2</xref>, <xref ref-type="bibr" rid="B4">4</xref>). Many trials have relied on binary classifications&#x2014;distinguishing simply between the presence or absence of PDA&#x2014;thereby overlooking the nuanced spectrum of hemodynamic significance. Others, while aiming to focus on hemodynamically significant cases, employed heterogeneous and insufficiently validated criteria to define this subgroup (<xref ref-type="bibr" rid="B23">23</xref>). Furthermore, high rates of rescue therapy in control groups may have further diluted observable differences. In the absence of adequately stratified evidence, current data remain insufficient to support the development of evidence-based treatment guidelines.</p>
<p>This uncertainty is particularly consequential given the high prevalence of PDA in preterm infants&#x2014;affecting roughly 30&#x0025; of those born at 32 weeks&#x2019; gestation and as many as 70&#x0025; at 25 weeks (<xref ref-type="bibr" rid="B24">24</xref>). These extremely vulnerable neonates often present with a complex, individualized constellation of comorbidities. As such, assessing treatment necessity requires careful consideration, not only of the PDA&#x0027;s hemodynamic significance, but its impact within each infant&#x0027;s broader physiological context.</p>
<p>Without standardized criteria or guidelines, severity appraisal and treatment decisions continue to rely on case-by-case expert judgment and institution-specific protocols (<xref ref-type="bibr" rid="B25">25</xref>). This contributes to inconsistencies in patient care, which is highly dependent on access to experienced sub-specialists in pediatric cardiology and neonatology&#x2014;ultimately disadvantaging patients in resource-limited settings. Furthermore, the heterogeneity in clinical practice contributes to a self-perpetuating cycle: it undermines the design and comparability of randomized controlled trials which continues to impede the establishment of evidence-based treatment guidelines (<xref ref-type="bibr" rid="B26">26</xref>).</p>
<p>Viewed as a whole, PDA represents a highly complex condition within an exceptionally vulnerable patient population, requiring individualized, context-aware clinical decision-making. Despite ongoing efforts, current approaches remain limited in their ability to capture the multifactorial, dynamic nature of PDA pathophysiology. It is precisely these challenges&#x2014;diagnostic ambiguity, high-dimensional data, and the need for individualized assessment&#x2014;that have led researchers to consider PDA a compelling candidate for artificial intelligence (AI)&#x2013;based approaches (<xref ref-type="bibr" rid="B27">27</xref>). The application of AI to identify relevant risk factors, support diagnosis, and guide management represents a relatively new and promising frontier in PDA care.</p>
</sec>
<sec id="s1b"><label>1.2</label><title>AI&#x2014;a tool for improvement?</title>
<p>Unlike traditional methods that rely on predefined, low-dimensional feature sets, AI systems can process the full spectrum of available data, without the need for prior dimensionality reduction. In the context of echocardiography, for example, AI can analyze every pixel of every frame in a video sequence, leveraging precise spatial and temporal information, the complexity of which would be inaccessible to human interpretation (<xref ref-type="bibr" rid="B28">28</xref>). This computational depth enables AI to discover complex high-dimensional patterns&#x2014;an attractive capability given the nonlinear and multifactorial nature of PDA and neonatal physiology. By extracting relevant features and capturing temporal dynamics across diverse data modalities, AI has the potential to enhance PDA diagnostic precision, improve early risk stratification, and support real-time, individualized clinical decision support (<xref ref-type="bibr" rid="B27">27</xref>). Importantly, these systems can do so without the limitations of inter-observer variability or the scarcity of highly specialized clinical expertise (<xref ref-type="bibr" rid="B29">29</xref>).</p>
<p>Despite these promising benefits, it is important to note that most clinical AI systems remain in the early stages of development. Such models are currently limited by oversimplified training data, narrowly scoped tasks, modest performance metrics, limited external validation, and insufficient readiness for real-world clinical integration (<xref ref-type="bibr" rid="B30">30</xref>). Unlike general AI applications, which can leverage vast publicly available datasets (e.g., natural images or text), medical AI development is constrained by limited access to large, high-quality clinical data. This scarcity&#x2014;driven by ethical, legal, and logistical barriers&#x2014;makes model training substantially more challenging (<xref ref-type="bibr" rid="B30">30</xref>). Additionally, the high capital investment required to meet computational demands has led to significant disparity between proprietary industry models and those developed in academia.</p>
<p>Beyond these challenges, there are also fundamental limitations in how AI systems operate. They identify mathematical patterns based on the data they are trained on. As a result, their accuracy and reliability are closely tied to the quality, diversity, and representativeness of the training data (<xref ref-type="bibr" rid="B30">30</xref>). Moreover, the complexity of AI models&#x2014;particularly those based on deep learning architectures&#x2014;often renders their decision pathways non-transparent for humans, limiting interpretability for clinicians. The field of explainable AI (XAI) seeks to address this by developing tools to clarify model behavior, but achieving effective, clinically relevant explainability remains an ongoing challenge (<xref ref-type="bibr" rid="B31">31</xref>).</p>
<p>Nonetheless, despite these current limitations, AI remains a promising avenue for advancing PDA management. To support meaningful progress, it is important to assess the current state of research, evaluate its contributions and shortcomings, and identify priorities for future work.</p>
</sec>
<sec id="s1c"><label>1.3</label><title>Objective</title>
<p>This review aims to systematically synthesize and evaluate current research on the application of AI in the context of PDA, including all aspects of risk assessment, diagnosis and management, identifying trends and directions for future research.</p>
</sec>
<sec id="s1d"><label>1.4</label><title>Existing knowledge</title>
<p>In their 2023 correspondence on McAdams et al.&#x0027;s review (<xref ref-type="bibr" rid="B32">32</xref>) of AI in neonatal care, Sharma et al. (<xref ref-type="bibr" rid="B27">27</xref>) address the underexplored domain of AI for PDA, particularly hemodynamically significant PDA in preterm infants. They suggest that AI could enhance decision-making and outcome prediction, especially for pharmacologic treatments, by integrating imaging data (e.g., echocardiograms)&#x2014;a notable advance over earlier clinical prediction models reliant on logistic regression and basic clinical data. They briefly reference two relevant studies [Lei et al. (<xref ref-type="bibr" rid="B33">33</xref>); Na et al. (<xref ref-type="bibr" rid="B34">34</xref>)] that will be discussed in more detail in this review. Sharma et al. (<xref ref-type="bibr" rid="B27">27</xref>) identify evaluating treatment necessity and predicting therapeutic response as desirable AI applications, noting that current scores for PDA severity assessment [El-Khuffash et al. (<xref ref-type="bibr" rid="B35">35</xref>); Umapathi et al. (<xref ref-type="bibr" rid="B36">36</xref>)] show promise but lack widespread use. The authors argue that AI could refine such scoring systems, enhancing early diagnosis and severity appraisal and paving the way to modeling treatment responses in the future. Concluding their correspondence, Sharma et al. (<xref ref-type="bibr" rid="B27">27</xref>) concur with McAdams et al. (<xref ref-type="bibr" rid="B32">32</xref>) that AI&#x0027;s predictive capabilities in neonatology are still developing. They propose that it holds substantial promise for PDA management and is likely to become as essential to patient care as physical examinations and laboratory testing. As a correspondence rather than a systematic review, Sharma et al. (<xref ref-type="bibr" rid="B27">27</xref>) did not aim to comprehensively cover all studies on AI in the context of PDA. Furthermore, since its publication in January 2023, further research in this area has emerged.</p>
</sec>
<sec id="s1e"><label>1.5</label><title>Relevant AI background information</title>
<p>To provide context for the subsequent exploration of AI applications in PDA, this section briefly outlines relevant concepts in artificial intelligence.</p>
<p>Artificial Intelligence (AI) refers to computer systems designed to perform tasks that typically require human intelligence. A core method in AI is machine learning (ML), where systems improve their performance on a given task by inferring patterns from training data rather than following explicitly programmed rules. This &#x201C;learning&#x201D; process begins with rough guesses and improves iteratively by adjusting internal parameters to minimize error during training.</p>
<p>At the heart of any machine learning system is a model&#x2014;a mathematical structure that represents the relationships between inputs and outputs, optimized for a specific task. In traditional machine learning, these models are often relatively simple and rely on clearly defined features. For example, a model might use gestational age, ventilator dependency, and lab values to estimate the risk of hemodynamically significant PDA. These inputs are selected and structured by humans, and the model learns how to weigh them based on the data.</p>
<p>One example of a traditional machine learning model is the decision tree, which generates predictions by iteratively splitting data into subsets using decision thresholds optimized to best separate the training data for a given task. At each split, the model selects the feature and threshold that most improve the quality of the split according to a predefined mathematical criterion, forming a hierarchical sequence of if&#x2013;then rules that terminate in leaf nodes representing final classifications or predictions. While simple and interpretable, individual decision trees can be prone to overfitting&#x2014;meaning they may capture noise instead of underlying patterns, reducing their accuracy on new, unseen data. To address this, ensemble methods&#x2014;such as Random Forest, XGBoost (Extreme Gradient Boosting), and Light Gradient Boosting Machine (L-GBM)&#x2014;combine the predictions of multiple decision trees to improve accuracy and generalizability. These methods are particularly effective for structured tabular data and can model complex, nonlinear relationships.</p>
<p>While these models are well-suited for structured input, other types of data&#x2014;such as medical images or unstructured clinical notes&#x2014;require more advanced techniques. This is where deep learning (DL) becomes particularly valuable.</p>
<p>DL is a more advanced approach within ML that uses models with many layers&#x2014;known as neural networks&#x2014;to automatically learn complex features from raw data. Instead of requiring manual feature selection, DL models can process unstructured data directly. For instance, in echocardiography, early layers in a neural network might detect simple visual patterns like edges, while deeper layers recognize chambers, vessels or lesions. This layered structure enables DL models to capture patterns that traditional models might miss.</p>
<p>Before learning can take place, raw data&#x2014;such as medical images, electronic health records, or clinical text&#x2014;must be converted into numerical form in a way that reflects its structure and meaning. The exact method depends on the type of data and the task. For example, the pixels in an image are typically represented as matrices of intensity values; categorical labels like disease types are encoded as numbers; and words in clinical notes are transformed into vectors that capture semantic relationships&#x2014;so that, for instance, &#x201C;fever&#x201D; and &#x201C;temperature&#x201D; are closer in vector space than &#x201C;fever&#x201D; and &#x201C;fracture.&#x201D; These numerical formats enable the model to carry out operations that highlight useful patterns.</p>
<p>As the data passes through each layer of the model, it undergoes transformations: values are multiplied, added, and passed through functions that emphasize relevant features while suppressing noise. The model contains many parameters, which are gradually adjusted during training to reduce errors, minimizing the difference between a predicted and ground truth value. A model type refers to the general category of ML approach and a model architecture specifies the particular design and structure within that type&#x2014;for example, how many layers it has, how they are connected, and how information flows through them. Hyperparameters are model settings that are defined before training (e.g., learning rate, tree depth etc.) and influence how the model learns. These are distinct from model parameters, which are optimized during training. Methods such as Grid search systematically test combinations of hyperparameters to find the optimal configuration for a given model and a given task.</p>
<p>Models are often trained using supervised learning, where they are given input-output pairs&#x2014;for example, echocardiography cine loops labeled with PDA yes or no&#x2014;and learn to associate features in the input with the correct output. In unsupervised learning, the model is given no labels and must find patterns in the data on its own. For instance, analyzing thousands of patient records, it might group patients with similar lab results and symptoms who respond to the same treatments or develop similar complications. These groupings&#x2014;called clusters&#x2014;can reveal unknown disease subtypes or treatment responses without prior instruction.</p>
<p>Once trained, the model is used in inference, where it applies what it has learned to new, unseen data. To ensure reliability, models are evaluated using standard performance metrics&#x2014;quantitative measures of how well they perform. Common metrics include accuracy (how often predictions are correct), precision (how many positive predictions are correct), and recall (how many actual cases are detected). Another widely used metric, AUC (Area Under the Receiver Operating Characteristic Curve), summarizes the model&#x0027;s ability to distinguish between different classes. To assess performance fairly, data is typically split into three parts: a training set to train the model, a validation set to fine-tune it during training, and a test set to evaluate final performance. This approach helps avoid overfitting&#x2014;where a model memorizes training data instead of learning general patterns.</p>
<p>In addition to internal testing, external validation is often performed using entirely new data&#x2014;from different hospitals, populations, or time periods&#x2014;to check how well the model generalizes to real-world clinical settings. Both internal and external validation are central to ensuring the model&#x0027;s predictions are not only accurate but also robust and clinically trustworthy.</p>
<p>A key challenge in clinical AI is explainability&#x2014;the ability to understand how a model arrives at its predictions. This is especially important in medicine, where clinicians must be able to justify decisions and maintain accountability. Traditional ML models, such as decision trees or logistic regression, are often more interpretable because their decision pathways are based on a small set of predefined features. In contrast, DL models learn complex, high-dimensional representations that are distributed across many layers, making their internal logic far less transparent and more difficult to interpret. To address this, the field of explainable AI (XAI) is developing tools to make models more understandable. XAI techniques aim to make complex predictions more interpretable by showing how input data influences the output. These include feature attribution methods, such as Shapley Additive Explanations (SHAP), which assign each input feature a value representing its contribution to a specific prediction. SHAP does this by comparing the model&#x0027;s output across many combinations of features to determine how each one affects the result. For image-based models, outputs can be interpreted using visual explanation methods such as Grad-CAM&#x002B;&#x002B; and other saliency maps, which highlight image regions most influential to a given prediction, offering intuitive, spatial insight into the model&#x0027;s reasoning. While these tools do not fully restore human-level interpretability, they offer valuable insights that can help clinicians assess whether a model is behaving reasonably.</p>
<p>Training effective models in medicine also involves addressing data-specific challenges. One common issue is class imbalance, which occurs when one category (e.g., presence vs. absence of disease) is significantly more frequent than others in the training data. This can bias the model toward the majority class and reduce performance for minority cases. Specialized techniques, such as reweighting or oversampling, are used to mitigate this.</p>
<p>In settings where high-quality labeled data is limited&#x2014;a common challenge in medical AI&#x2014;researchers use techniques designed to improve model performance and generalizability despite data scarcity. One such method is data augmentation, which artificially expands the dataset by generating plausible variations of existing data, such as rotating, flipping, or adding noise to medical images. This encourages the model to learn underlying patterns rather than memorizing specific examples, enhancing its ability to generalize. Another widely used strategy is transfer learning, where a model initially trained on a large, general dataset is adapted to a more specific task. Instead of training a new model from scratch, this approach leverages previously learned representations and fine-tunes them for the target application.</p>
</sec>
</sec>
<sec id="s2" sec-type="methods"><label>2</label><title>Methods</title>
<sec id="s2a"><label>2.1</label><title>Literature search strategy</title>
<p>This systematic review followed the Preferred Reporting Items for Systematic reviews and Meta-Analyses (PRISMA) 2020 (<xref ref-type="bibr" rid="B37">37</xref>) guidelines to identify studies of the application AI in the context of PDA. A comprehensive search was conducted across PubMed (742 results), Cochrane Library (295 results), and IEEE Xplore (135 results), with supplementary searches using tools such as Connected Papers and ResearchRabbit. The final search was completed on June 13, 2025. The search terms combined AI-related keywords (&#x201C;AI&#x201D;, &#x201C;Artificial Intelligence&#x201D;, &#x201C;ML&#x201D;, &#x201C;Machine Learning&#x201D;, &#x201C;DL&#x201D; &#x201C;Deep Learning&#x201D;, &#x201C;Neural Network&#x201D;, &#x201C;Algorithm&#x201D;, &#x201C;Automated&#x201D; or &#x201C;Computer-Assisted&#x201D;) with PDA-specific terms (&#x201C;PDA&#x201D;, &#x201C;Patent Ductus Arteriosus&#x201D;, &#x201C;Persistent Ductus Arteriosus&#x201D; or &#x201C;Ductus Arteriosus&#x201D;). Filters were applied to include peer-reviewed journal articles published in English between January 1, 2010, and May 31, 2025.</p>
</sec>
<sec id="s2b"><label>2.2</label><title>Study selection and eligibility criteria</title>
<p>Titles and abstracts were screened to assess relevance. A total of <italic>n</italic>&#x2009;&#x003D;&#x2009;14 articles describing the use of AI in the context of PDA published in English between January 1, 2010, and May 31, 2025, were identified. Exclusion criteria encompassed non-peer-reviewed articles, correspondence letters, and conference abstracts lacking sufficient methodological detail, leading to the exclusion of <italic>n</italic>&#x2009;&#x003D;&#x2009;2 publications (<xref ref-type="bibr" rid="B27">27</xref>, <xref ref-type="bibr" rid="B56">56</xref>).</p>
<p>The remaining full-text articles were reviewed for eligibility. One study was excluded due to concerns about the integrity of its methodology and interpretation, including the overinterpretation of weak results (AUC&#x2009;&#x003D;&#x2009;0.53; Relative Erro<italic>r</italic>&#x2009;&#x003D;&#x2009;1.08) and a pattern of self-citation (<xref ref-type="bibr" rid="B38">38</xref>). This resulted in a final inclusion of <italic>n</italic>&#x2009;&#x003D;&#x2009;11 studies. A PRISMA Flow Diagram detailing the study selection process is depicted in <xref ref-type="fig" rid="F1">Figure&#x00A0;1</xref>. A list of the excluded studies (<italic>n</italic>&#x2009;&#x003D;&#x2009;3), their citations and corresponding rationales for exclusion can be found in the <xref ref-type="sec" rid="s11">Supplementary Table S3</xref>.</p>
<fig id="F1" position="float"><label>Figure&#x00A0;1</label>
<caption><p>PRISMA flow diagram illustrating study selection. A list of the excluded studies (<italic>n</italic>&#x2009;&#x003D;&#x2009;3), their citations and corresponding rationales for exclusion can be found in the <xref ref-type="sec" rid="s11">Supplementary Table S3</xref>. This flow diagram template is licensed under CC BY 4.0 [Source: Page et al. (<xref ref-type="bibr" rid="B37">37</xref>)]. To view a copy of this license, visit <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fped-14-1648943-g001.tif"><alt-text content-type="machine-generated">Flowchart illustrating the identification of studies via databases and registers with steps: Identification from PubMed (742), IEEE Xplore (135), Cochrane (295). Records after screening were 1,172, leading to 14 reports sought for retrieval and 11 studies included in review. Exclusions detailed at each stage. Final search on June 13, 2025.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s2c"><label>2.3</label><title>Data extraction</title>
<p>Data extraction was performed using a standardized form by a clinician-scientist trained in medicine and computer science, with domain expertise spanning both PDA and AI. Independent verification was performed by a second reviewer, with discrepancies resolved by consensus within the multidisciplinary author team including professors in neonatology, computer science, and pediatric cardiology. Extracted variables included study details, population characteristics, data sources, AI methods, evaluation metrics, validation strategies, performance results, key limitations, and information regarding model explainability and availability. A full summary of the extracted data is presented in the <xref ref-type="sec" rid="s11">Supplementary Table S1</xref>.</p>
</sec>
<sec id="s2d"><label>2.4</label><title>Data synthesis and analysis</title>
<p>Given the small number of eligible studies (<italic>n</italic>&#x2009;&#x003D;&#x2009;11) and their substantial methodological heterogeneity, formal outcome assessments were not feasible. Instead, a narrative synthesis was conducted to compare and summarize the studies across key methodological and performance-related dimensions.</p>
</sec>
<sec id="s2e"><label>2.5</label><title>Risk of bias assessment</title>
<p>The risk of bias for diagnostic and predictive studies was evaluated using the Prediction model Risk Of Bias Assessment Tool (PROBAST) (<xref ref-type="bibr" rid="B39">39</xref>), while analytical cross-sectional studies were assessed using the Joanna Briggs Institute (JBI) Checklist (<xref ref-type="bibr" rid="B40">40</xref>). Reporting quality was assessed using the Minimum Information About Clinical Artificial Intelligence Modeling checklist (MI-CLAIM) (<xref ref-type="bibr" rid="B41">41</xref>).</p>
</sec>
<sec id="s2f"><label>2.6</label><title>Validation</title>
<p>Aspects of the review requiring interdisciplinary judgment, including conceptual design, evidence appraisal, narrative synthesis, and conclusions, were addressed collaboratively by a multidisciplinary author team with expertise in neonatology, computer science, and pediatric cardiology.</p>
</sec>
</sec>
<sec id="s3" sec-type="results"><label>3</label><title>Results</title>
<p>A total of <italic>n</italic>&#x2009;&#x003D;&#x2009;11 studies were included in this review, exploring different aspects of PDA detection, diagnosis, risk stratification, or management using some form of AI. Studies varied in design, objectives, approaches, and validation strategies.</p>
<sec id="s3a"><label>3.1</label><title>Aspect of PDA</title>
<p>The included studies addressed five key areas of PDA management: Diagnosis and Screening (<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B42">42</xref>&#x2013;<xref ref-type="bibr" rid="B45">45</xref>), pharmacological Treatment Response Prediction (<xref ref-type="bibr" rid="B46">46</xref>, <xref ref-type="bibr" rid="B47">47</xref>), Risk Factor Identification (<xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B48">48</xref>), Subphenotype Analysis (<xref ref-type="bibr" rid="B49">49</xref>) and Treatment complication prediction (<xref ref-type="bibr" rid="B50">50</xref>).</p>
</sec>
<sec id="s3b"><label>3.2</label><title>Research objectives</title>
<p>The research objectives of the included studies are outlined in <xref ref-type="table" rid="T2">Table&#x00A0;2</xref>.</p>
<table-wrap id="T2" position="float"><label>Table&#x00A0;2</label>
<caption><p>Research objectives of included studies, grouped by the aspect of PDA management addressed, and sorted by year of publication.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Study</th>
<th valign="top" align="center">Aspect of PDA</th>
<th valign="top" align="center">Research objective</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Na et al. (<xref ref-type="bibr" rid="B34">34</xref>)</td>
<td valign="top" align="left">Risk Factor Identification</td>
<td valign="top" align="left">To identify risk factors associated with symptomatic PDA and assess the feasibility and performance of AI models versus logistic regression for prediction.</td>
</tr>
<tr>
<td valign="top" align="left">Jura et al. (<xref ref-type="bibr" rid="B48">48</xref>)</td>
<td valign="top" align="left">Risk Factor Identification</td>
<td valign="top" align="left">To evaluate the influence of maternal pathologies, medications, and neonatal factors on PDA risk, and develop predictive models using logistic regression, chi-square tests, Random Forest, and XGBoost.</td>
</tr>
<tr>
<td valign="top" align="left">G&#x00F3;mez-Quintana et al. (<xref ref-type="bibr" rid="B42">42</xref>)</td>
<td valign="top" align="left">Diagnosis/Screening</td>
<td valign="top" align="left">To develop a ML framework for early screening and detection of PDA and congenital heart disease in neonates using phonocardiograms.</td>
</tr>
<tr>
<td valign="top" align="left">Lei et al. (<xref ref-type="bibr" rid="B33">33</xref>)</td>
<td valign="top" align="left">Diagnosis/Screening</td>
<td valign="top" align="left">To assess the feasibility and performance of a lightweight CNN (MobileNet-V2) for detecting PDA in neonatal echocardiograms for potential edge deployment.</td>
</tr>
<tr>
<td valign="top" align="left">Erno et al. (<xref ref-type="bibr" rid="B44">44</xref>)</td>
<td valign="top" align="left">Diagnosis/Screening</td>
<td valign="top" align="left">To create a DL model to classify echocardiographic video clips for PDA presence or absence and evaluate its performance.</td>
</tr>
<tr>
<td valign="top" align="left">Park et al. (<xref ref-type="bibr" rid="B43">43</xref>)</td>
<td valign="top" align="left">Diagnosis/Screening</td>
<td valign="top" align="left">To develop and assess a ML model and AI diagnostic support system for early PDA detection to improve accuracy and timeliness.</td>
</tr>
<tr>
<td valign="top" align="left">Chang et al. (<xref ref-type="bibr" rid="B45">45</xref>)</td>
<td valign="top" align="left">Diagnosis/Screening</td>
<td valign="top" align="left">To define radiographic features of severe PDA in chest x-rays of preterm infants using DL and ML methods and evaluate their predictive value.</td>
</tr>
<tr>
<td valign="top" align="left">Matsushita et al. (<xref ref-type="bibr" rid="B49">49</xref>)</td>
<td valign="top" align="left">Subphenotype Analysis</td>
<td valign="top" align="left">To apply unsupervised ML to identify subphenotypes of preterm infants with hemodynamically significant PDA for stratified intervention strategies.</td>
</tr>
<tr>
<td valign="top" align="left">Liu et al. (<xref ref-type="bibr" rid="B46">46</xref>)</td>
<td valign="top" align="left">Pharmacological Treatment Response Prediction</td>
<td valign="top" align="left">To develop and validate an interpretable ML model to predict NSAID efficacy in closing hemodynamically significant PDA in preterm infants under 30 weeks.</td>
</tr>
<tr>
<td valign="top" align="left">Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>)</td>
<td valign="top" align="left">Pharmacological Treatment Response Prediction</td>
<td valign="top" align="left">To train and validate a DL model to predict PDA closure likelihood after initial pharmacotherapy in preterm infants.</td>
</tr>
<tr>
<td valign="top" align="left">Zhang et al. (<xref ref-type="bibr" rid="B50">50</xref>)</td>
<td valign="top" align="left">Treatment Complication Prediction</td>
<td valign="top" align="left">To develop and compare four ML algorithms to identify the optimal model for predicting post-intervention platelet count decline in children with PDA.</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TF4"><p>PDA, Patent ductus arteriosus; AI, Artificial intelligence; XGBoost, Extreme Gradient Boosting; ML, Machine learning; CNN, Convolutional neural network; DL, Deep learning; NSAID, Nonsteroidal Anti-Inflammatory Drug.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3c"><label>3.3</label><title>Study design</title>
<p>Ten of the eleven studies reviewed were retrospective, with G&#x00F3;mez-Quintana et al. (<xref ref-type="bibr" rid="B42">42</xref>) being the only study that prospectively recruited participants. Most studies were conducted at single centres, except for Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>), which sourced data from two hospitals, and Na et al. (<xref ref-type="bibr" rid="B34">34</xref>), which utilized data from a nationwide registry encompassing 74 neonatal intensive care units. G&#x00F3;mez-Quintana et al. (<xref ref-type="bibr" rid="B42">42</xref>) technically involved two centers, though one of these contributed only three patients (1&#x0025; of the total sample).</p>
</sec>
<sec id="s3d"><label>3.4</label><title>Definitions of study groups</title>
<p>While some studies differentiated between PDA and no PDA as confirmed by echocardiography (<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B42">42</xref>, <xref ref-type="bibr" rid="B44">44</xref>, <xref ref-type="bibr" rid="B48">48</xref>), others focused on distinguishing between symptomatic and asymptomatic PDA (<xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B43">43</xref>, <xref ref-type="bibr" rid="B45">45</xref>). However, there was significant heterogeneity in these definitions across studies. Na et al. (<xref ref-type="bibr" rid="B34">34</xref>) characterized symptomatic PDA based on clinical signs such as murmur, bounding pulse, hypotension, respiratory difficulty, pulmonary congestion, or cardiomegaly, with or without echocardiographic confirmation. In contrast, Park et al. (<xref ref-type="bibr" rid="B43">43</xref>) defined symptomatic PDA as infants who required pharmacological or surgical intervention within the first 15 days of life. Chang et al. (<xref ref-type="bibr" rid="B45">45</xref>) applied echocardiographic criteria, defining symptomatic PDA as a transductal diameter &#x2265;1.5&#x2005;mm and a left atrial-to-aortic diameter ratio &#x2265;1.4:1.</p>
<p>Another term used to classify study groups was hemodynamically significant PDA (hsPDA). Matsushita et al. (<xref ref-type="bibr" rid="B49">49</xref>) studied hsPDA without a control group, defining it solely based on echocardiographic findings&#x2014;specifically, an instance of PDA diameter &#x003E;1.5&#x2005;mm or an LA/Ao ratio &#x003E;1.5 within the first two weeks of life. Liu et al. (<xref ref-type="bibr" rid="B46">46</xref>) used a combined definition that included both echocardiographic thresholds (PDA diameter &#x2265;1.5&#x2005;mm, LA/Ao ratio &#x2265;1.4) and clinical indicators of systemic ischemia (e.g., hypotension, tachycardia, oliguria) and pulmonary overcirculation (e.g., continuous murmur, apnea, respiratory distress, increased oxygen or ventilation requirements).</p>
<p>In pharmacological treatment response prediction studies, definitions of treatment success varied. Liu et al. (<xref ref-type="bibr" rid="B46">46</xref>) defined success based on echocardiographic confirmation of PDA closure within 72&#x2005;h after treatment, while Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>) defined successful closure as the absence of repeat pharmacotherapy, transcatheter occlusion, or surgical ligation before neonatal intensive care unit (NICU) discharge.</p>
<p>Zhang et al. (<xref ref-type="bibr" rid="B50">50</xref>), examining outcomes after transcatheter PDA closure, defined decline in platelet count (DPC) as a platelet count decline of &#x2265;25&#x0025; and NO-DPC as a decline of &#x003C;25&#x0025;.</p>
</sec>
<sec id="s3e"><label>3.5</label><title>Inclusion criteria</title>
<p>The inclusion criteria varied across studies. In retrospective studies, data availability was a primary consideration. For example, Erno et al. (<xref ref-type="bibr" rid="B44">44</xref>) and Lei et al. (<xref ref-type="bibr" rid="B33">33</xref>) included patients who had undergone echocardiography with a ductal view, while Chang et al. (<xref ref-type="bibr" rid="B45">45</xref>) required chest x-ray images taken within 4 days before or 3 days after an echocardiogram. Na et al. (<xref ref-type="bibr" rid="B34">34</xref>) focused on patients registered in the Korean Neonatal Network a nationwide neonatal registry, while Jura et al. included all NICU patients at their institution with complete maternal and neonatal records (<xref ref-type="bibr" rid="B48">48</xref>).</p>
<p>Seven of the eleven studies exclusively focused on preterm infants (gestational age&#x2009;&#x003C;&#x2009;37 weeks) (<xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B43">43</xref>&#x2013;<xref ref-type="bibr" rid="B47">47</xref>, <xref ref-type="bibr" rid="B49">49</xref>). Among these, three studies further restricted their populations based on specific preterm categories: Matsushita et al. (<xref ref-type="bibr" rid="B49">49</xref>) examined extremely preterm births (&#x2264;28 weeks), Liu et al. (<xref ref-type="bibr" rid="B46">46</xref>) included very preterm births (&#x2264;30 weeks), and Na et al. (<xref ref-type="bibr" rid="B34">34</xref>) targeted very low birth weight preterm infants (&#x003C;1,500&#x2005;g).</p>
<p>In contrast, G&#x00F3;mez-Quintana et al. (<xref ref-type="bibr" rid="B42">42</xref>) included neonates independent of gestational age, with preterm infants comprising only 9&#x0025; of the study participants, while Jura et al. (<xref ref-type="bibr" rid="B48">48</xref>) reported 69&#x0025; preterm and 31&#x0025; term participants. Lei et al. (<xref ref-type="bibr" rid="B33">33</xref>) did not report the gestational ages of their cohort.</p>
<p>Additional criteria were specified in some studies depending on their research objectives. For instance, Liu et al. (<xref ref-type="bibr" rid="B46">46</xref>) and Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>) included infants who received pharmacologic treatment for PDA to evaluate treatment response, while Zhang et al. (<xref ref-type="bibr" rid="B50">50</xref>) included children with PDA who underwent successful transcatheter closure to predict occurrence of post-interventional decline in platelet count. G&#x00F3;mez-Quintana et al. (<xref ref-type="bibr" rid="B42">42</xref>) focused on the development of a screening method and thus included only clinically asymptomatic newborns. Matsushita et al. (<xref ref-type="bibr" rid="B49">49</xref>), solely examining cases of hemodynamically significant PDA, incorporated this definition into their inclusion criteria.</p>
<p>The retrospective study periods also varied significantly, ranging from 1 year (<xref ref-type="bibr" rid="B44">44</xref>), to 3 years (<xref ref-type="bibr" rid="B48">48</xref>), 5 years (<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B34">34</xref>), 6 years (<xref ref-type="bibr" rid="B47">47</xref>), 7 years (<xref ref-type="bibr" rid="B46">46</xref>, <xref ref-type="bibr" rid="B50">50</xref>), and up to 10 years (<xref ref-type="bibr" rid="B42">42</xref>, <xref ref-type="bibr" rid="B43">43</xref>, <xref ref-type="bibr" rid="B45">45</xref>, <xref ref-type="bibr" rid="B49">49</xref>).</p>
</sec>
<sec id="s3f"><label>3.6</label><title>Exclusion criteria</title>
<p>Six studies excluded newborns with significant congenital heart disease (CHD) or pulmonary hypertension (<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B44">44</xref>, <xref ref-type="bibr" rid="B46">46</xref>, <xref ref-type="bibr" rid="B47">47</xref>, <xref ref-type="bibr" rid="B50">50</xref>). For instance, Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>) excluded participants exhibiting presence of other cardiac lesions (<xref ref-type="bibr" rid="B47">47</xref>), while Erno et al. (<xref ref-type="bibr" rid="B44">44</xref>) and Lei et al. (<xref ref-type="bibr" rid="B33">33</xref>) specifically excluded subjects with right-to-left PDA flow or complex CHD, except for patent foramen ovale (PFO), atrial septal defect (ASD), ventricular septal defect (VSD), or bicuspid aortic valve. Liu et al. (<xref ref-type="bibr" rid="B46">46</xref>) similarly excluded cases of complex CHD but allowed for ASD and small VSD. Na et al. (<xref ref-type="bibr" rid="B34">34</xref>) chose to exclude infants with any major congenital anomalies. G&#x00F3;mez-Quintana et al. (<xref ref-type="bibr" rid="B42">42</xref>), aiming to develop a screening method for PDA and other CHD, excluded infants with vivid clinical signs of CHD or pulmonary hypertension. Zhang et al. (<xref ref-type="bibr" rid="B50">50</xref>) excluded other CHD requiring surgery. In contrast, Chang et al. (<xref ref-type="bibr" rid="B45">45</xref>) deliberately chose not to exclude congenital heart or lung diseases to create a more clinically realistic study population, acknowledging the high rate of comorbidity between PDA and these conditions.</p>
<p>Death within the first three days of life was another frequent exclusion criterion. Park et al. (<xref ref-type="bibr" rid="B43">43</xref>) and Na et al. (<xref ref-type="bibr" rid="B34">34</xref>) excluded such cases because confirmation or evaluation of PDA would not be feasible. Matsushita et al. (<xref ref-type="bibr" rid="B49">49</xref>) excluded infants who died in the delivery room or were transferred to another institution before data collection.</p>
<p>In studies where group classification depended on treatment, additional exclusions were applied to ensure focus on relevant populations. Liu et al. (<xref ref-type="bibr" rid="B46">46</xref>) excluded infants with spontaneous PDA closure, those with drug contraindications requiring direct surgical ligation, and those with a history of NSAID treatment at other hospitals; Na et al. (<xref ref-type="bibr" rid="B34">34</xref>) excluded infants who received prophylactic or presymptomatic PDA treatment and Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>) excluded infants who received indomethacin for IVH prophylaxis. To avoid potential confounding effects on platelet count decline, Zhang et al. (<xref ref-type="bibr" rid="B50">50</xref>) also excluded patients with bleeding or hematologic disorders, preoperative heparin or chronic antiplatelet use, infective endocarditis or uncontrolled infections, and those with baseline platelet counts &#x003C;100&#x2009;&#x00D7;&#x2009;10<sup>9</sup>/L.</p>
<p>Finally, missing or incomplete data led to exclusions in Na et al. (<xref ref-type="bibr" rid="B34">34</xref>), Liu et al. (<xref ref-type="bibr" rid="B46">46</xref>), Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>) and Jura et al. (<xref ref-type="bibr" rid="B48">48</xref>).</p>
</sec>
<sec id="s3g"><label>3.7</label><title>Sample size</title>
<p>After applying exclusion criteria, the study populations were generally small, with most studies including fewer than 500 participants: 66 (<xref ref-type="bibr" rid="B44">44</xref>), 113 (<xref ref-type="bibr" rid="B49">49</xref>), 174 (<xref ref-type="bibr" rid="B47">47</xref>), 182 (<xref ref-type="bibr" rid="B46">46</xref>), 201 (<xref ref-type="bibr" rid="B48">48</xref>), 265 (<xref ref-type="bibr" rid="B42">42</xref>), 300 (<xref ref-type="bibr" rid="B33">33</xref>), 330 (<xref ref-type="bibr" rid="B50">50</xref>) and 409 (<xref ref-type="bibr" rid="B43">43</xref>). Chang et al. (<xref ref-type="bibr" rid="B45">45</xref>) and Na et al. (<xref ref-type="bibr" rid="B34">34</xref>) were outliers with 4,617 and 8,369 participants respectively.</p>
</sec>
<sec id="s3h"><label>3.8</label><title>Study population characteristics</title>
<p>Eight of the eleven studies provided information on their study populations (<xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B42">42</xref>, <xref ref-type="bibr" rid="B43">43</xref>, <xref ref-type="bibr" rid="B46">46</xref>&#x2013;<xref ref-type="bibr" rid="B50">50</xref>). Lei et al. (<xref ref-type="bibr" rid="B33">33</xref>), Chang et al. (<xref ref-type="bibr" rid="B45">45</xref>), and Erno et al. (<xref ref-type="bibr" rid="B44">44</xref>), relying on imaging datasets, omitted detailed population data.</p>
<p>As highlighted by varying inclusion criteria, gestational age varied widely in study populations. G&#x00F3;mez-Quintana et al. (<xref ref-type="bibr" rid="B42">42</xref>) reported a median of 39 weeks (IQR: 38&#x2013;40) for predominantly term infants, while Liu et al. (<xref ref-type="bibr" rid="B46">46</xref>), Matsushita et al. (<xref ref-type="bibr" rid="B49">49</xref>), Na et al. (<xref ref-type="bibr" rid="B34">34</xref>), and Park et al. (<xref ref-type="bibr" rid="B43">43</xref>) focused on preterm populations, with medians or means ranging from 26.3 weeks to 29.1 weeks.</p>
<p>Birth weight also differed significantly. Liu et al. (<xref ref-type="bibr" rid="B46">46</xref>) and Matsushita et al. (<xref ref-type="bibr" rid="B49">49</xref>) reported medians of 980&#x2005;g (IQR: 860&#x2013;1,160) and 715&#x2005;g (IQR: 610&#x2013;840), respectively, while Na et al. (<xref ref-type="bibr" rid="B34">34</xref>) and Park et al. (<xref ref-type="bibr" rid="B43">43</xref>) observed means of around 1,100&#x2005;g.</p>
<p>Males outnumbered females in most studies (49.2&#x0025;&#x2013;66.7&#x0025; male). Additional data reported in some studies included, among others, head circumference, cesarean births, multiple births, small for gestational age (SGA) status, Apgar scores, maternal factors, fetal anomalies, intraventricular hemorrhage (IVH) occurrence, and mortality. Notably, Matsushita et al. (<xref ref-type="bibr" rid="B49">49</xref>) reported a 49.6&#x0025; mortality rate, likely attributable to their focus on preterm neonates with a gestational age of &#x2264;28 weeks.</p>
</sec>
<sec id="s3i"><label>3.9</label><title>Data modality</title>
<p>In terms of input data, two main trends can be noted. Six of eleven studies focused on a multidimensional collection of patient data, ranging in number from six to one hundred eight different variables (<xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B43">43</xref>, <xref ref-type="bibr" rid="B46">46</xref>, <xref ref-type="bibr" rid="B48">48</xref>&#x2013;<xref ref-type="bibr" rid="B50">50</xref>). These variables were categorized into distinct subgroups dependent on the focus of each study, including demographic factors, maternal factors, prenatal factors, delivery and post-birth factors, postnatal clinical factors, blood pressure-related features, laboratory parameters, pre- and intra-operative factors and echocardiographic measurements. Liu et al. (<xref ref-type="bibr" rid="B46">46</xref>) and Park et al. (<xref ref-type="bibr" rid="B43">43</xref>) included a balanced number of pre- and postnatal factors, whereas Na et al. (<xref ref-type="bibr" rid="B34">34</xref>) included primarily prenatal and delivery factors, and Matsushita et al. (<xref ref-type="bibr" rid="B49">49</xref>) focused solely on postnatal data collected within the first three days of life.</p>
<p>The remaining five studies focused on high-dimensional unimodal data, including phonocardiogram recordings (<xref ref-type="bibr" rid="B42">42</xref>), echocardiography video frames (<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B47">47</xref>), and chest x-rays (<xref ref-type="bibr" rid="B45">45</xref>). Two of these studies, Chang et al. (<xref ref-type="bibr" rid="B45">45</xref>) and Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>), pursued multimodal approaches, combining imaging data with perinatal variables.</p>
<p>All three studies using echocardiograms utilized the ductal view (high left parasternal sagittal), with Erno et al. (<xref ref-type="bibr" rid="B44">44</xref>) choosing to work with color Doppler only and Lei et al. (<xref ref-type="bibr" rid="B33">33</xref>) using frames with and without color Doppler. Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>) was the only echocardiogram study that reported results of experiments with other views.</p>
</sec>
<sec id="s3j"><label>3.10</label><title>AI methodology</title>
<p>Six of eleven studies focused on a single ML method. The five studies which explored more than one method varied in approach. Jura et al. (<xref ref-type="bibr" rid="B48">48</xref>), Zhang et al. (<xref ref-type="bibr" rid="B50">50</xref>) and Na et al. (<xref ref-type="bibr" rid="B34">34</xref>) compared two, four and five different ML methods for their prediction models, respectively. Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>) compared imaging-only and multimodal convolutional neural networks with random forest and logistic regression models based on perinatal data alone. Chang et al. (<xref ref-type="bibr" rid="B45">45</xref>) initially employed convolutional neural networks (CNNs) to perform feature extraction and subsequently leveraged XGBoost to exploit radiographic and clinical features in an explainable way.</p>
<p>Traditional ML methods were employed more often (nine of eleven studies) than DL methods (five of eleven studies). All studies but one used supervised ML methods with labeled data. The exception, Matsushita et al. (<xref ref-type="bibr" rid="B49">49</xref>), clustered unlabeled data using agglomerative hierarchical clustering with the Euclidean distance measure and the Ward method, which minimizes intra-cluster variance, on principal components. An overview of the machine learning approaches observed in the included studies is shown in <xref ref-type="table" rid="T3">Table&#x00A0;3</xref>.</p>
<table-wrap id="T3" position="float"><label>Table&#x00A0;3</label>
<caption><p>Overview of machine learning approaches in the included studies, categorized by learning type, model type, models architectures, frequency, and cited studies.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="center"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Category</th>
<th valign="top" align="center">Subcategory</th>
<th valign="top" align="center">Models</th>
<th valign="top" align="center"><italic>n</italic></th>
<th valign="top" align="center">Studies</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" rowspan="2">Supervised</td>
<td valign="top" align="left">Traditional ML</td>
<td valign="top" align="left">RF, XGBoost, LightGBM, SVM, k-NN</td>
<td valign="top" align="center">8</td>
<td valign="top" align="left">Liu el al. (<xref ref-type="bibr" rid="B46">46</xref>), Na el al. (<xref ref-type="bibr" rid="B34">34</xref>), Park el al. (<xref ref-type="bibr" rid="B43">43</xref>), G&#x00F3;mez-Quintana el al. (<xref ref-type="bibr" rid="B42">42</xref>), Chang el al. (<xref ref-type="bibr" rid="B45">45</xref>), Jura el al. (<xref ref-type="bibr" rid="B48">48</xref>), Sharma el al. (<xref ref-type="bibr" rid="B47">47</xref>), Zhang el al. (<xref ref-type="bibr" rid="B50">50</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">DL</td>
<td valign="top" align="left">ResNet-50 CNN, MobileNet-V2, CNN, ConvNeXt CNN, EchoNet-Pediatric (<xref ref-type="bibr" rid="B51">51</xref>) CNN, MLP</td>
<td valign="top" align="left">5</td>
<td valign="top" align="left">Na el al. (<xref ref-type="bibr" rid="B34">34</xref>), Lei el al. (<xref ref-type="bibr" rid="B33">33</xref>), Erno el al. (<xref ref-type="bibr" rid="B44">44</xref>), Chang el al. (<xref ref-type="bibr" rid="B45">45</xref>), Sharma el al. (<xref ref-type="bibr" rid="B47">47</xref>)</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="2">Unsupervised</td>
<td valign="top" align="left">Traditional ML</td>
<td valign="top" align="left">Agglomerative Clustering</td>
<td valign="top" align="center">1</td>
<td valign="top" align="left">Matsushita el al. (<xref ref-type="bibr" rid="B49">49</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">DL</td>
<td valign="top" align="left">None</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">None</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TF5"><p><italic>n</italic>, Frequency; ML, Machine learning; DL, Deep learning; RF, Random forest; XGBoost, Extreme Gradient Boosting; LightGBM, Light Gradient Boosting Machine; SVM, Support Vector Machine; k-NN, k-Nearest Neighbors; CNN, Convolutional neural network; MLP, Multilayer Perceptron.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>Among the supervised models, the most common were decision tree-based ensemble methods, used in eight of ten studies. These included random forest (<xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B43">43</xref>, <xref ref-type="bibr" rid="B46">46</xref>&#x2013;<xref ref-type="bibr" rid="B48">48</xref>, <xref ref-type="bibr" rid="B50">50</xref>), XGBoost (gradient boosted decision trees) (<xref ref-type="bibr" rid="B42">42</xref>, <xref ref-type="bibr" rid="B45">45</xref>, <xref ref-type="bibr" rid="B48">48</xref>, <xref ref-type="bibr" rid="B50">50</xref>), and Light Gradient Boosting Machine (L-GBM) (<xref ref-type="bibr" rid="B34">34</xref>). The next most common supervised method, present in five of ten studies, was the neural network. This included convolutional neural networks of varying architectures [ResNet-50 (<xref ref-type="bibr" rid="B44">44</xref>), ConvNeXt (<xref ref-type="bibr" rid="B45">45</xref>), MobileNet-V2 (<xref ref-type="bibr" rid="B33">33</xref>), EchoNet-Pediatric (<xref ref-type="bibr" rid="B47">47</xref>, <xref ref-type="bibr" rid="B51">51</xref>)] and an instance of a multilayer perceptron (MLP) (<xref ref-type="bibr" rid="B34">34</xref>). Erno et al. (<xref ref-type="bibr" rid="B44">44</xref>) and Lei et al. (<xref ref-type="bibr" rid="B33">33</xref>) leveraged pretrained ImageNet weights, while Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>) utilized transfer learning via a pretrained EchoNet-Pediatric (<xref ref-type="bibr" rid="B51">51</xref>) backbone. Data augmentation to compensate for class imbalance was reported only by Lei et al. (<xref ref-type="bibr" rid="B33">33</xref>). Support vector machine (SVM) and instance-based learning with k-nearest neighbors (k-NN) were each employed in only one study (<xref ref-type="bibr" rid="B34">34</xref>).</p>
</sec>
<sec id="s3k"><label>3.11</label><title>Experiments for model choice/ablation studies</title>
<p>Supplementary methodologies aimed at achieving the most effective model solutions were utilized across all studies and can be categorized into four key areas: feature engineering and optimization, model development and training, data selection and input configuration, and output aggregation and thresholding.</p>
<p>In feature engineering and optimization, G&#x00F3;mez-Quintana et al. (<xref ref-type="bibr" rid="B42">42</xref>) utilized feature aggregation to improve classification, while ablation studies identified optimal subsets, such as the Top-15 features for PDA vs. healthy cases. Similarly, Park et al. (<xref ref-type="bibr" rid="B43">43</xref>) compared the outputs of four models to determine the most effective feature sets. Matsushita et al. (<xref ref-type="bibr" rid="B49">49</xref>) applied principal component analysis (PCA) to reduce dimensionality, address collinearity, and enhance clustering, alongside normalization and imputation to handle missing values. Liu et al. (<xref ref-type="bibr" rid="B46">46</xref>) used binary logistic regression to identify factors influencing treatment efficacy and Zhang et al. (<xref ref-type="bibr" rid="B50">50</xref>) applied a two-step process involving univariate analysis followed by extra-trees for feature selection, using AUC comparisons to evaluate model performance.</p>
<p>For model development and training, G&#x00F3;mez-Quintana et al. (<xref ref-type="bibr" rid="B42">42</xref>) applied regularization techniques such as subsampling and shrinkage (learning rate) to control overfitting. Key hyperparameters, including tree depth, subsample ratios, feature selection ratios, and the number of decision trees, were tuned for optimal performance. Similarly, Na et al. (<xref ref-type="bibr" rid="B34">34</xref>) and Jura et al. (<xref ref-type="bibr" rid="B48">48</xref>) used grid searches and stratified five-fold cross-validation to optimize hyperparameters. Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>) compared three vision-based architectures&#x2014;a scratch-built 3D CNN and two pretrained models (Swin3D and EchoNet-Pediatric)&#x2014;ultimately selecting EchoNet-Pediatric (<xref ref-type="bibr" rid="B51">51</xref>) as the backbone for both an imaging-only CNN and a multimodal model incorporating echocardiographic clips and perinatal data.</p>
<p>In the area of data selection and input configuration, Chang et al. (<xref ref-type="bibr" rid="B45">45</xref>) tested rule-based filtering and a novel &#x201C;human-guided easy learning&#x201D; approach to curate datasets, with the latter improving performance. In this method, a medical professional was first trained by reviewing 173 chest x-ray (CXR) images&#x2014;balanced between positive and negative sPDA cases&#x2014;and received the correct diagnosis after each review. After this training, the professional evaluated 4,654 additional images, and only the 3,349 images that were correctly classified were used to train the model, based on the assumption that these clearer cases contain more distinct and anatomically relevant features of sPDA. Additionally, Chang et al. (<xref ref-type="bibr" rid="B45">45</xref>) evaluated various input types&#x2014;raw images, heart-segmented masks, and thorax-segmented masks&#x2014;with combined inputs yielding the best results. Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>) evaluated EchoNet-Pediatric (<xref ref-type="bibr" rid="B51">51</xref>) based models across echo view subsets (non-PDA, 2D PDA, Color PDA, Color Compare PDA, and Overall) and benchmarked against perinatal-only logistic regression and random forest models.</p>
<p>Finally, comparisons of output aggregation and thresholding were explored. Lei et al. (<xref ref-type="bibr" rid="B33">33</xref>) focused on optimizing classification thresholds using frame- and clip-level criteria, while Erno et al. (<xref ref-type="bibr" rid="B44">44</xref>) explored predictions at both clip- and study-levels.</p>
</sec>
<sec id="s3l"><label>3.12</label><title>Performance metrics reported</title>
<p>Across the studies, performance metrics varied. The AUC was the most commonly reported metric, appearing in ten studies (<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B42">42</xref>, <xref ref-type="bibr" rid="B44">44</xref>&#x2013;<xref ref-type="bibr" rid="B48">48</xref>, <xref ref-type="bibr" rid="B50">50</xref>), often accompanied by accuracy, sensitivity, and specificity (<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B44">44</xref>, <xref ref-type="bibr" rid="B45">45</xref>, <xref ref-type="bibr" rid="B47">47</xref>, <xref ref-type="bibr" rid="B48">48</xref>). Precision, recall, and false negative rate were specific to Park et al.&#x0027;s (<xref ref-type="bibr" rid="B43">43</xref>) evaluation, while Lei et al. (<xref ref-type="bibr" rid="B33">33</xref>) and Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>) uniquely reported positive and negative predictive values (PPV, NPV). Matsushita et al. (<xref ref-type="bibr" rid="B49">49</xref>), focusing on unsupervised clustering without ground truth, did not provide conventional performance metrics.</p>
<p>Nine studies calculated metrics on a separate test set distinct from the training and validation data (<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B43">43</xref>&#x2013;<xref ref-type="bibr" rid="B48">48</xref>, <xref ref-type="bibr" rid="B50">50</xref>). Liu et al. (<xref ref-type="bibr" rid="B46">46</xref>) and Chang et al. (<xref ref-type="bibr" rid="B45">45</xref>) used 10&#x0025; of the data for testing, Erno et al. (<xref ref-type="bibr" rid="B44">44</xref>) and Lei et al. (<xref ref-type="bibr" rid="B33">33</xref>) approximately 15&#x0025;, Na et al. (<xref ref-type="bibr" rid="B34">34</xref>) and Park et al. (<xref ref-type="bibr" rid="B43">43</xref>) 20&#x0025;, while Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>), Jura et al. (<xref ref-type="bibr" rid="B48">48</xref>), and Zhang et al. (<xref ref-type="bibr" rid="B50">50</xref>) each allocated 30&#x0025;. In contrast, G&#x00F3;mez-Quintana et al. (<xref ref-type="bibr" rid="B42">42</xref>) did not use an independent test set, instead relying on outer 10-fold cross-validation to derive performance metrics. Matsushita et al. (<xref ref-type="bibr" rid="B49">49</xref>), employing an unsupervised clustering approach, did not necessitate data splitting.</p>
</sec>
<sec id="s3m"><label>3.13</label><title>External validation</title>
<p>Ten of eleven studies (<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B42">42</xref>&#x2013;<xref ref-type="bibr" rid="B45">45</xref>, <xref ref-type="bibr" rid="B47">47</xref>&#x2013;<xref ref-type="bibr" rid="B50">50</xref>) did not perform any external validation. Liu et al. (<xref ref-type="bibr" rid="B46">46</xref>) conducted limited external validation using data from only two patients at a second hospital.</p>
</sec>
<sec id="s3n"><label>3.14</label><title>Results/performance</title>
<p>Reported model performance should be interpreted with caution, as differences in study design limit direct comparability and the presence of methodological bias may contribute to overfitting with artificially inflated performance metrics.</p>
<sec id="s3n1"><label>3.14.1</label><title>Diagnosis and screening</title>
<p>The performance metrics of the best performing models in the category diagnosis and screening are depicted in <xref ref-type="table" rid="T4">Table&#x00A0;4</xref>.</p>
<table-wrap id="T4" position="float"><label>Table&#x00A0;4</label>
<caption><p>Performance metrics from the best-performing models in PDA diagnosis and screening studies. Metrics are not directly comparable due to variations in study design. Given the high risk of bias, overfitting with artificial inflation of performance metrics is likely.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Study</th>
<th valign="top" align="center">Model</th>
<th valign="top" align="center">AUC</th>
<th valign="top" align="center">Sensitivity</th>
<th valign="top" align="center">Specificity</th>
<th valign="top" align="center">PROBAST risk of bias</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Erno et al. (<xref ref-type="bibr" rid="B44">44</xref>)</td>
<td valign="top" align="left">USVN CNN model (study-level prediction)</td>
<td valign="top" align="center">0.93</td>
<td valign="top" align="center">0.83</td>
<td valign="top" align="center">0.89</td>
<td valign="top" align="left">High</td>
</tr>
<tr>
<td valign="top" align="left">Lei et al. (<xref ref-type="bibr" rid="B33">33</xref>)</td>
<td valign="top" align="left">MobileNet-V2 CNN model</td>
<td valign="top" align="center">0.88</td>
<td valign="top" align="center">0.76</td>
<td valign="top" align="center">0.87</td>
<td valign="top" align="left">High</td>
</tr>
<tr>
<td valign="top" align="left">G&#x00F3;mez-Quintana et al. (<xref ref-type="bibr" rid="B42">42</xref>)</td>
<td valign="top" align="left">XGBoost PDA detection model</td>
<td valign="top" align="center">0.761</td>
<td valign="top" align="center">Not reported</td>
<td valign="top" align="center">Not reported</td>
<td valign="top" align="left">High</td>
</tr>
<tr>
<td valign="top" align="left">Chang et al. (<xref ref-type="bibr" rid="B45">45</xref>)</td>
<td valign="top" align="left">XGBoost model (ratio features&#x2009;&#x002B;&#x2009;clinical data)</td>
<td valign="top" align="center">0.74</td>
<td valign="top" align="center">0.42</td>
<td valign="top" align="center">0.94</td>
<td valign="top" align="left">High</td>
</tr>
<tr>
<th valign="top" align="left">Study</th>
<th valign="top" align="center">Model</th>
<th valign="top" align="center">Accuracy</th>
<th valign="top" align="center">Precision</th>
<th valign="top" align="center">Recall</th>
<th valign="top" align="left"/>
</tr>
<tr>
<td valign="top" align="left">Park et al. (<xref ref-type="bibr" rid="B43">43</xref>)</td>
<td valign="top" align="left">Perinatal RF model</td>
<td valign="top" align="center">82&#x0025;</td>
<td valign="top" align="center">38&#x0025;</td>
<td valign="top" align="center">76&#x0025;</td>
<td valign="top" align="left">High</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TF6"><p>USVN, Ultrasound Video Network; CNN, Convolutional neural network; XGBoost, Extreme Gradient Boosting; PDA, Patent ductus arteriosus; RF, Random forest.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>Most studies achieved reasonable AUC values, demonstrating feasibility for PDA detection. Erno et al.&#x0027;s (<xref ref-type="bibr" rid="B44">44</xref>) study-level predictions reported an AUC of 0.93 (95&#x0025; CI: 0.89&#x2013;0.98), sensitivity of 0.83, and specificity of 0.89. Clip-level predictions achieved an AUC of 0.86 (95&#x0025; CI: 0.83&#x2013;0.90). Lei et al. (<xref ref-type="bibr" rid="B33">33</xref>) reported an AUC of 0.88, sensitivity of 0.76, specificity of 0.87, positive predictive value of 0.84, and negative predictive value of 0.80. Each test video clip was processed in approximately seven seconds on an Intel 8-core i7 processor without Graphics Processing Unit. G&#x00F3;mez-Quintana et al. (<xref ref-type="bibr" rid="B42">42</xref>) reported a validation AUC of 0.76 for PDA detection and 0.77 for other congenital heart disease, with similar test AUCs of 0.74 and 0.78. These results were derived from cross-validation without an independent test set. Chang et al.&#x0027;s (<xref ref-type="bibr" rid="B45">45</xref>) XGBoost models achieved AUCs ranging from 0.72 to 0.75, with specificity ranging from 0.91 to 0.95 and sensitivity from 0.33 to 0.42. The highest AUC of 0.75 was achieved by the model trained using feature vectors from the CNN and clinical data, with specificity of 0.95 and sensitivity of 0.33. The best balance between specificity and sensitivity was achieved by the model trained on extracted ratio features and clinical data, with an AUC of 0.74, specificity of 0.94, and sensitivity of 0.42. Park et al.&#x0027;s (<xref ref-type="bibr" rid="B43">43</xref>) models reported accuracy between 71&#x0025; and 84&#x0025;, precision between 24&#x0025; and 42&#x0025;, and recall between 61&#x0025; and 76&#x0025;. Whilst Model_perinatal&#x2009;&#x002B;&#x2009;bp achieved the best accuracy and precision at 84&#x0025; and 42&#x0025; respectively, Model_perinatal achieved the best balance of accuracy (82&#x0025;), precision (38&#x0025;), and recall (76&#x0025;). The AI-based diagnostic support system, employing Model_perinatal, improved NICU professionals&#x2019; performance, with accuracy increasing from 48&#x0025; to 76&#x0025;, precision from 46&#x0025; to 80&#x0025;, and recall from 52&#x0025; to 73&#x0025;. Diagnoses were made 2.5 (nurses and doctors) and 3.1 days (doctors only) earlier with AI compared to 2.1 and 3 days earlier without AI.</p>
</sec>
<sec id="s3n2"><label>3.14.2</label><title>Pharmacological treatment response prediction</title>
<p>Liu et al. (<xref ref-type="bibr" rid="B46">46</xref>) reported a 45.6&#x0025; pharmacological PDA closure rate with a predictive model achieving an AUC of 0.792 (95&#x0025; CI: 0.457&#x2013;0.841). Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>) observed a 60&#x0025; success rate, using the absence of further PDA interventions as a surrogate outcome. In their model comparisons, an EchoNet-Pediatric (<xref ref-type="bibr" rid="B51">51</xref>) backbone outperformed a scratch-built 3D CNN and pretrained Swin3D for imaging-only prediction. The multimodal model, combining imaging with perinatal data, achieved superior performance (AUC: 0.82, F1: 0.78, sensitivity: 0.76, specificity: 0.70, PPV: 0.79, NPV: 0.66), outperforming both the imaging-only model (AUC: 0.66) and baseline models using perinatal data alone (logistic regression: AUC 0.66; random forest: AUC 0.74).</p>
</sec>
<sec id="s3n3"><label>3.14.3</label><title>Risk factor identification</title>
<p>Both risk factor identification studies showed reasonable predictive performance with AUCs of 0.82 (<xref ref-type="bibr" rid="B34">34</xref>) and 0.87 (<xref ref-type="bibr" rid="B48">48</xref>).</p>
<p>Na et al. (<xref ref-type="bibr" rid="B34">34</xref>) identified gestational age, invasive mechanical ventilation, sepsis, and birth weight as key features for symptomatic PDA (sPDA) prediction and sepsis, supplemental oxygen at birth, noninvasive ventilation, and birth temperature as key features for treatment necessity prediction. In their study comparing 6 ML methods, ensemble models such as Random Forest and Light Gradient Boosting Machine showed modest performance improvements over Multiple Logistic Regression. Light Gradient Boosting Machine achieved an accuracy of 0.77 and an AUC of 0.82, while Multiple Logistic Regression achieved an accuracy of 0.76 and an AUC of 0.81. Multiple Logistic Regression reported a sensitivity of 0.85, compared to 0.65 for Light Gradient Boosting Machine and 0.64 for Random Forest. Light Gradient Boosting Machine reported a specificity of 0.84, compared to 0.83 for Random Forest and 0.60 for Multiple Logistic Regression. For predicting sPDA requiring treatment, all models achieved an accuracy of approximately 0.85. Random Forest reported a sensitivity of 0.97 and specificity of 0.36, while Multiple Logistic Regression reported a sensitivity of 0.85 and specificity of 0.98, although this may be a typographical error (reported confidence interval of 0.28&#x2013;0.32).</p>
<p>Jura et al. (<xref ref-type="bibr" rid="B48">48</xref>) identified prolonged rupture of membranes (PROM) as a strong predictor of PDA (OR: 13.03, <italic>p</italic>&#x2009;&#x003C;&#x2009;0.001), with lower gestational age (OR: 0.85, <italic>p</italic>&#x2009;&#x003D;&#x2009;0.042) and lower birth weight (OR: 0.72, <italic>p</italic>&#x2009;&#x003D;&#x2009;0.029) also showing significant associations (<italic>p</italic>&#x2009;&#x003C;&#x2009;0.05). In predictive modeling, XGBoost outperformed Random Forest with 81.4&#x0025; accuracy, 92.5&#x0025; sensitivity, 57.9&#x0025; specificity, and an AUC of 0.872. Random Forest achieved 76.3&#x0025; accuracy, 47.4&#x0025; sensitivity, and 90&#x0025; specificity.</p>
</sec>
<sec id="s3n4"><label>3.14.4</label><title>Subphenotype analysis</title>
<p>Matsushita et al. (<xref ref-type="bibr" rid="B49">49</xref>) identified two clusters. The inflamed cluster had leukocyte counts of 10.6&#x2009;&#x00D7;&#x2009;10<sup>3</sup>/uL compared to 5.97&#x2009;&#x00D7;&#x2009;10<sup>3</sup>/uL, neutrophil percentages of 69.7&#x0025; compared to 46&#x0025;, neutrophil-to-lymphocyte ratios of 3.23 compared to 1.14, mean corpuscular volume of 108.5 fL compared to 115.6&#x2005;fL, and mean corpuscular hemoglobin concentration of 34.9&#x2005;g/dL compared to 33&#x2005;g/dL (<italic>p</italic>&#x2009;&#x003C;&#x2009;0.001 for all comparisons). The respiratory acidosis cluster had pH levels of 7.23 compared to 7.28 (<italic>p</italic>&#x2009;&#x003D;&#x2009;0.005) and pCO2 levels of 45.5&#x2005;mmHg compared to 38.3&#x2005;mmHg (<italic>p</italic>&#x2009;&#x003C;&#x2009;0.001). No differences were observed in demographics, IVH severity, SGA prevalence, or twin births.</p>
</sec>
<sec id="s3n5"><label>3.14.5</label><title>Treatment complication prediction</title>
<p>Zhang et al. (<xref ref-type="bibr" rid="B50">50</xref>) identified six key predictors of post-intervention decline in platelet count in children with PDA through univariate analysis and extra-trees feature selection: systolic pulmonary artery pressure, pulmonary valve velocity, age, weight, defect size, and mean pulmonary artery pressure. Among the models based on these predictors, Random Forest achieved the highest performance with a Train-AUC of 0.81 and Test-AUC of 0.71. Logistic Regression, AdaBoost, and XGBoost yielded Train-AUCs of 0.68, 0.71, and 0.70, respectively.</p>
</sec>
</sec>
<sec id="s3o"><label>3.15</label><title>Model examination/explainability</title>
<p>Model explanation and explainability methods varied across studies, ranging from thorough techniques to limited or absent approaches.</p>
<p>Several studies focused on statistical feature importance. Jura et al. (<xref ref-type="bibr" rid="B48">48</xref>) used logistic regression and chi-squared tests to rank predictive features. Matsushita et al. (<xref ref-type="bibr" rid="B49">49</xref>) validated patterns identified during hierarchical clustering using statistical tests, including Mann&#x2013;Whitney U and chi-square tests. Liu et al. (<xref ref-type="bibr" rid="B46">46</xref>) employed variable importance plots to rank plasma albumin level, platelet count, and 24&#x2005;h urine volume as the top predictors for PDA closure success, with marginal effect plots revealing nonlinear relationships.</p>
<p>Na et al. (<xref ref-type="bibr" rid="B34">34</xref>) used Shapley Additive Explanations (SHAP)&#x2014;a method that quantifies how much each input feature contributes to a model&#x0027;s prediction&#x2014;to highlight gestational age, invasive mechanical ventilation, and birth weight as key predictors of symptomatic PDA. Similarly, Zhang et al. (<xref ref-type="bibr" rid="B50">50</xref>) used SHAP to show that higher systolic PAP, larger defect size, younger age, lower weight, and increased pulmonary valve velocity were associated with elevated risk of post-interventional platelet count decline. Chang et al. (<xref ref-type="bibr" rid="B45">45</xref>) also applied SHAP, identifying radiographic features such as the cardiothoracic ratio as influential in their XGBoost model.</p>
<p>Visualization techniques were present in three imaging-based studies. Chang et al. (<xref ref-type="bibr" rid="B45">45</xref>) used GradCAM&#x002B;&#x002B; to identify thoracic regions associated with PDA-related changes, such as cardiomegaly and lung opacity, aiding the validation of radiographic predictors like the cardiothoracic ratio for symptomatic PDA. Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>) generated saliency maps to highlight image regions relevant to predictions, publishing one effective and ineffective example. Erno et al. (<xref ref-type="bibr" rid="B44">44</xref>), provided a single figure visualizing frame-level attention changes during one echocardiogram video. Lei et al. (<xref ref-type="bibr" rid="B33">33</xref>) did not report any explainability methods.</p>
<p>Park et al. (<xref ref-type="bibr" rid="B43">43</xref>) demonstrated the only study focused on clinical usability, developing an AI-based diagnostic support system featuring interactive dashboards. These dashboards included components for visualizing long-term electronic health record trends, comparing clinical patterns, and integrating classification probabilities with symptom development predictions.</p>
</sec>
<sec id="s3p"><label>3.16</label><title>Fairness</title>
<p>None of the examined studies explicitly mentioned aspects of AI fairness, such as addressing biases in training data or ensuring equitable performance across demographic groups in their publications.</p>
</sec>
<sec id="s3q"><label>3.17</label><title>Risk of bias</title>
<p>The risk of bias across included studies was assessed using the Prediction Model Risk of Bias Assessment Tool (PROBAST) (<xref ref-type="bibr" rid="B39">39</xref>), summarized in <xref ref-type="table" rid="T5">Table&#x00A0;5</xref> and available in more detail in the <xref ref-type="sec" rid="s11">Supplementary Material</xref>. The overall risk of bias across studies was high, with most concerns concentrated in the analysis domain. Common issues included small sample sizes leading to low events-per-predictor ratios, which increased susceptibility to overfitting&#x2014;particularly in high-dimensional models. Many studies selectively reported performance metrics, often limiting evaluation to AUC alone, and employed inadequate internal validation strategies without any attempt at external validation. Additionally, missing data were frequently handled suboptimally, either through case exclusion without imputation or the via simplistic imputation methods.</p>
<table-wrap id="T5" position="float"><label>Table&#x00A0;5</label>
<caption><p>A summary of the results of the risk of bias assessment performed using the Prediction Model Risk of Bias Assessment Tool (PROBAST) (<xref ref-type="bibr" rid="B39">39</xref>), comprising four assessment categories: participants, predictors, outcome and analysis. These results are available in more detail in <xref ref-type="sec" rid="s11">Supplementary Table S2</xref>.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left" rowspan="2">PROBAST assessment category</th>
<th valign="top" align="center" colspan="10">Study</th>
</tr>
<tr>
<th valign="top" align="center">G&#x00F3;mez-Quintana et al. (<xref ref-type="bibr" rid="B42">42</xref>)</th>
<th valign="top" align="center">Liu et al. (<xref ref-type="bibr" rid="B46">46</xref>)</th>
<th valign="top" align="center">Na et al. (<xref ref-type="bibr" rid="B34">34</xref>)</th>
<th valign="top" align="center">Park et al. (<xref ref-type="bibr" rid="B43">43</xref>)</th>
<th valign="top" align="center">Erno et al. (<xref ref-type="bibr" rid="B44">44</xref>)</th>
<th valign="top" align="center">Chang et al. (<xref ref-type="bibr" rid="B45">45</xref>)</th>
<th valign="top" align="center">Lei et al. (<xref ref-type="bibr" rid="B33">33</xref>)</th>
<th valign="top" align="center">Jura et al. (<xref ref-type="bibr" rid="B48">48</xref>)</th>
<th valign="top" align="center">Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>)</th>
<th valign="top" align="center">Zhang et al. (<xref ref-type="bibr" rid="B50">50</xref>)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Participants</td>
<td valign="top" align="left">Low</td>
<td valign="top" align="left">Unclear</td>
<td valign="top" align="left">Unclear</td>
<td valign="top" align="left">Low</td>
<td valign="top" align="left">Unclear</td>
<td valign="top" align="left">Low</td>
<td valign="top" align="left">Unclear</td>
<td valign="top" align="left">Unclear</td>
<td valign="top" align="left">Unclear</td>
<td valign="top" align="left">Unclear</td>
</tr>
<tr>
<td valign="top" align="left">Predictors</td>
<td valign="top" align="left">Low</td>
<td valign="top" align="left">Low</td>
<td valign="top" align="left">Low</td>
<td valign="top" align="left">Low</td>
<td valign="top" align="left">Low</td>
<td valign="top" align="left">Low</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">Low</td>
<td valign="top" align="left">Low</td>
<td valign="top" align="left">Low</td>
</tr>
<tr>
<td valign="top" align="left">Outcome</td>
<td valign="top" align="left">Low</td>
<td valign="top" align="left">Low</td>
<td valign="top" align="left">Unclear</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">Low</td>
<td valign="top" align="left">Low</td>
<td valign="top" align="left">Low</td>
<td valign="top" align="left">Unclear</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">Unclear</td>
</tr>
<tr>
<td valign="top" align="left">Analysis</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">Low</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">High</td>
</tr>
<tr>
<td valign="top" align="left">Overall Risk of Bias</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">Unclear</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">High</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As the study by Matsushita et al. (<xref ref-type="bibr" rid="B49">49</xref>) did not involve a predictive model, risk of bias was instead evaluated using the Joanna Briggs Institute (JBI) Checklist (<xref ref-type="bibr" rid="B40">40</xref>). In this instance, while most criteria were met, two items remained unclear: 1) the use of objective, standard criteria for measuring the condition, as echocardiography&#x2014;though considered the gold standard&#x2014;lacks standardized definitions for parameters and cut-offs; and 2) the handling of confounding variables, due to the absence of explicit discussion on potential confounders and the lack of a matched control group.</p>
</sec>
<sec id="s3r"><label>3.18</label><title>Minimum information about clinical artificial intelligence modeling (MI-CLAIM)</title>
<p>While reporting quality was generally sound, several checklist items were consistently unaddressed. Clinical utility metrics, such as positive predictive value (PPV) were only reported by Lei et al. (<xref ref-type="bibr" rid="B33">33</xref>) and Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>) and no study displayed adequate proof of reliability under data distribution shifts via external validation. Additionally, significant variability was observed in baseline comparisons and reproducibility practices.</p>
<sec id="s3r1"><label>3.18.1</label><title>Comparison with baseline</title>
<p>Five of eleven studies compared their models to some form of baseline method (<xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B42">42</xref>, <xref ref-type="bibr" rid="B43">43</xref>, <xref ref-type="bibr" rid="B47">47</xref>, <xref ref-type="bibr" rid="B48">48</xref>). G&#x00F3;mez-Quintana et al. (<xref ref-type="bibr" rid="B42">42</xref>) compared their Top-15 features model to a trained neonatologist analyzing heart sound recordings, with the model outperforming the neonatologist in sensitivity (0.72 vs. 0.62 at fixed specificity) and specificity (0.82 vs. 0.71 at fixed sensitivity). Na et al. (<xref ref-type="bibr" rid="B34">34</xref>) used multiple logistic regression as the baseline, enabling direct comparisons with ML methods, while Park et al. (<xref ref-type="bibr" rid="B43">43</xref>) evaluated the performance of neonatal intensive care unit professionals with and without AI support. Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>) used random forest and logistic regression models trained on perinatal data alone as baselines for evaluating the added value of multimodal and imaging-only CNN models. Similarly, Jura et al. (<xref ref-type="bibr" rid="B48">48</xref>) employed logistic regression and chi-squared tests as traditional statistical baselines, with random forest also serving as a point of comparison for XGBoost.</p>
</sec>
<sec id="s3r2"><label>3.18.2</label><title>Public availability of data, models, or code</title>
<p>The availability of data, models, or code varied across studies, with most citing privacy concerns or omitting details on accessibility. Matsushita et al. (<xref ref-type="bibr" rid="B49">49</xref>) was the only study offering a publicly accessible resource, sharing code at <ext-link ext-link-type="uri" xlink:href="https://github.com/fymatsushita/PDA">https://github.com/fymatsushita/PDA</ext-link>, though no data were made available. G&#x00F3;mez-Quintana et al. (<xref ref-type="bibr" rid="B42">42</xref>) withheld data for privacy reasons but planned to deploy their models as a cloud-based tool at <ext-link ext-link-type="uri" xlink:href="https://www.hearttone.org">https://www.hearttone.org</ext-link>; however, the website was listed as &#x201C;not found&#x201D; at the time of this review. Chang et al. (<xref ref-type="bibr" rid="B45">45</xref>) and Erno et al. (<xref ref-type="bibr" rid="B44">44</xref>) stated that anonymized data could be requested but were not publicly available due to privacy restrictions, and neither provided code or models. Na et al. (<xref ref-type="bibr" rid="B34">34</xref>) similarly referenced confidentiality policies from the Korean Neonatal Network, restricting data access to approved research activities, with no mention of public code or models. The remaining studies did not report on the availability of data, models, or code.</p>
</sec>
</sec>
</sec>
<sec id="s4" sec-type="discussion"><label>4</label><title>Discussion</title>
<p>This review highlights the emerging but still limited role of artificial intelligence in the context of PDA.</p>
<p>PDA represents a uniquely complex clinical challenge: its hemodynamic impact lies on a continuum, evolves dynamically over time, and predominantly affects a highly vulnerable patient population often burdened with multiple comorbidities. These factors are further complicated by the absence of standardized subgroup definitions, such as &#x201C;symptomatic&#x201D; or &#x201C;hemodynamically significant&#x201D; PDA, as well as the lack of consensus regarding which patients benefit from which interventions. As a result, management relies on institution specific standards and case-by-case expert appraisal, limiting consistency in care and disadvantaging those in underserviced areas. These factors have led to a fragmented research landscape, where study designs, outcome measures, and clinical relevance are difficult to compare. Against this backdrop, AI holds significant potential to support more consistent, data-driven decision-making by processing high-dimensional clinical data and capturing subtle, temporal patterns beyond human capability. The promise of AI in this setting lies not in replacing clinicians, but in augmenting their ability to interpret complex information and tailor management more precisely to the individual patient. Despite growing interest and demonstrated feasibility across a range of applications, current research remains constrained by methodological, clinical, and ethical limitations. The following discussion critically evaluates these challenges, synthesizes common themes across the included studies, and outlines key directions for future research and clinical translation.</p>
<p>Given the novelty of AI for PDA and the subsequent lack of precedent, investigators of included studies were forced to make methodological decisions within an uncharted research landscape. Despite these obstacles, authors not only showcased feasibility but also made commendable efforts in areas such as model interpretability, edge deployment, and clinical integration. Nevertheless, the studies included in this review are not without their limitations, including high risk of bias, heterogeneous non-standardized study group definitions, issues with generalizability, methodological challenges in machine learning approaches, gaps in model evaluation and validation, insufficient focus on explainability, clinical utility and fairness, and limitations in transparency and reproducibility.</p>
<sec id="s4a"><label>4.1</label><title>Performance metrics do not equate clinical applicability</title>
<p>Although many of the included studies report AUCs in the moderate-to-high range, these results must be interpreted with caution and should not be mistaken for evidence of clinical applicability. As highlighted by the PROBAST evaluation, all but one study [Na et al. (<xref ref-type="bibr" rid="B34">34</xref>); risk of bias &#x2018;unclear&#x2019;] exhibit a high risk of bias, meaning that seemingly strong model performance rests on methodologically fragile foundations: most are developed from retrospective, single-center datasets, rely on limited event numbers relative to model complexity, and all lack adequate external validation. Under such conditions, high AUCs likely reflect overfitting to idiosyncrasies of the development dataset, inflating internal performance, rather than genuine clinical signal.</p>
<p>G&#x00F3;mez-Quintana et al. (<xref ref-type="bibr" rid="B42">42</xref>) derived their AUC exclusively from cross-validation without an independent test set, Erno et al. (<xref ref-type="bibr" rid="B44">44</xref>) tested performance on only 11 patients, while others relied on surrogate study group and outcome definitions, such as treatment initiation (<xref ref-type="bibr" rid="B43">43</xref>) or the absence of subsequent intervention (<xref ref-type="bibr" rid="B47">47</xref>), that do not necessarily represent true ductal physiology or clinically meaningful endpoints.</p>
<p>Na et al. (<xref ref-type="bibr" rid="B34">34</xref>) was the only analysis not deemed high risk of bias according to PROBAST. While their Light Gradient Boosting Machine model showed reasonable performance (AUC 0.82; sensitivity 0.65; specificity 0.84), the model&#x0027;s target outcome warrants scrutiny. &#x201C;Symptomatic PDA&#x201D; was anchored to treatment necessity rather than objective clinical or echocardiographic criteria, rendering it a surrogate of limited validity&#x2014;particularly in the context of a retrospective 5-year cohort during which PDA management strategies shifted substantially from early targeted treatment toward more conservative, expectant approaches.</p>
<p>Furthermore, closer examination of the reported metrics&#x2014;such as diagnostic models achieving high specificity but very low sensitivity (<xref ref-type="bibr" rid="B45">45</xref>), treatment response models reporting moderate discrimination but extremely wide confidence intervals (<xref ref-type="bibr" rid="B46">46</xref>), or PDA detection models with 24&#x0025;&#x2013;42&#x0025; precision (<xref ref-type="bibr" rid="B43">43</xref>)&#x2014;highlight that superficially moderate to high performing models are not necessarily generalizable or aligned with clinically meaningful decision thresholds, where missed diagnoses or misclassified treatment candidates carry significant consequences.</p>
<p>The absence of external validation and prospective assessment precludes any meaningful appraisal of model performance in heterogeneous real-world settings, which significantly affect model behavior.</p>
<p>For AI to influence PDA care in a reliable and ethically defensible manner, future studies must adopt larger populations, multicenter designs; employ standardized study group and outcome definitions; incorporate robust internal and external validation frameworks with subsequent prospective evaluation; and demonstrate value beyond established clinical assessment pathways. Only through such methodology can performance metrics be translated into clinically applicable and beneficial AI tools.</p>
</sec>
<sec id="s4b"><label>4.2</label><title>Heterogeneous study group definitions</title>
<p>Significant heterogeneity in diagnostic criteria for PDA and its hemodynamically significant or symptomatic subgroups was observed across the studies included in this review, leading to inconsistent study groups. For example, Na et al. relied on clinical signs for PDA diagnosis without echocardiographic confirmation and Park et al. (<xref ref-type="bibr" rid="B43">43</xref>) defined &#x0027;symptomatic&#x2019; PDA groups based on treatment decisions rather than symptoms. Matsushita et al. (<xref ref-type="bibr" rid="B49">49</xref>)&#x0027;s use of static measures like worst diameter and LA/Ao ratios overlooked longitudinal hemodynamic changes important for guiding treatment. This variability reflects the ongoing lack of consensus that challenges PDA research and clinical practice more broadly, impeding both the comparability of results and their translation into clinical care. Furthermore, the exclusion of neonates with other congenital heart disease or pulmonary hypertension created artificially clean datasets that fail to represent the clinical reality of high comorbidity rates (<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B42">42</xref>, <xref ref-type="bibr" rid="B44">44</xref>, <xref ref-type="bibr" rid="B46">46</xref>, <xref ref-type="bibr" rid="B47">47</xref>, <xref ref-type="bibr" rid="B50">50</xref>). Chang et al. (<xref ref-type="bibr" rid="B45">45</xref>) was the exception, better accounting for such complexities. While Liu et al. (<xref ref-type="bibr" rid="B46">46</xref>) utilized echocardiographic confirmation, Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>) defined successful ductal closure based solely on the absence of further treatment, rendering the treatment response prediction models incomparable and raising concerns about possible misclassification. These limitations hinder the interpretability, comparability, and clinical utility of the included studies, underscoring the general need for standardized criteria defining hemodynamically significant or symptomatic subgroups.</p>
</sec>
<sec id="s4c"><label>4.3</label><title>Generalizability</title>
<p>The generalizability of findings was constrained by the single-center design of all but three studies (<xref ref-type="bibr" rid="B34">34</xref>, <xref ref-type="bibr" rid="B42">42</xref>, <xref ref-type="bibr" rid="B47">47</xref>). In G&#x00F3;mez-Quintana et al. (<xref ref-type="bibr" rid="B42">42</xref>), one of two hospitals contributed only three patients, rendering Na et al. (<xref ref-type="bibr" rid="B34">34</xref>) and Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>) the only valid examples of multi-center design. External validation was rare, with Liu et al. (<xref ref-type="bibr" rid="B46">46</xref>) offering the sole attempt, albeit on just two patients, rendering results effectively non-generalizable. Small study populations and event rates were another limitation, with the exception of Na et al. (<xref ref-type="bibr" rid="B34">34</xref>) and Chang et al. (<xref ref-type="bibr" rid="B45">45</xref>). Imaging-focused studies (<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B44">44</xref>, <xref ref-type="bibr" rid="B45">45</xref>), except for Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>), compounded these issues by omitting population demographics, likely due to the resource-intensive process of matching imaging data with clinical records. The reliance on retrospective datasets in all studies except G&#x00F3;mez-Quintana et al. (<xref ref-type="bibr" rid="B42">42</xref>) introduced additional challenges to generalizability. These datasets spanned 1&#x2013;10 years, during which advances in technology and shifts in treatment practices likely affected data consistency and relevance. Na et al.&#x0027;s (<xref ref-type="bibr" rid="B34">34</xref>) dataset, while large, lacked PDA-specific clinical details, and Matsushita et al.&#x0027;s (<xref ref-type="bibr" rid="B49">49</xref>) use of averaged laboratory values may have obscured clinically relevant fluctuations. These challenges underscore the need for multicenter, prospectively designed studies with diverse, well-characterized populations and external validation.</p>
</sec>
<sec id="s4d"><label>4.4</label><title>Methodological challenges in machine learning approaches</title>
<p>Traditional ML methods dominated (nine of eleven studies), often reflecting constraints of small datasets, while DL approaches and modern state-of-the-art architectures were underutilized. Chang et al. (<xref ref-type="bibr" rid="B45">45</xref>) and Lei et al. (<xref ref-type="bibr" rid="B33">33</xref>) made this decision consciously, prioritizing explainability and edge deployment, respectively. Unsupervised ML, valuable for discovering patterns in unlabeled data, particularly helpful in the medical domain where data sharing and expert knowledge limit available labeled datasets, was employed only by Matsushita et al. (<xref ref-type="bibr" rid="B49">49</xref>). Echocardiography studies utilized data from single time points, missing the insights of serial appraisal. Lei (<xref ref-type="bibr" rid="B33">33</xref>) and Erno et al. (<xref ref-type="bibr" rid="B44">44</xref>) failed to capture temporal dynamics, relying on clip-level thresholds of frame-independent analysis, restricting their ability to detect dynamic features relevant to PDA. Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>) did not explore a perinatal-only CNN model, which could have provided a fairer benchmark for comparison against their proposed multimodal CNN, potentially overestimating the added value of multimodality. Manual preprocessing steps, such as phonocardiogram segmentation (<xref ref-type="bibr" rid="B42">42</xref>) and frame filtering (<xref ref-type="bibr" rid="B33">33</xref>), introduced labor-intensive processes that reduced scalability and potentially biased training. Overfitting was evident in non-ensemble models (<xref ref-type="bibr" rid="B34">34</xref>), while convolutional neural network architectures were employed where newer methods, such as Vision Transformers, may have improved performance (<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B44">44</xref>). Techniques such as data augmentation and transfer learning were employed in only one (<xref ref-type="bibr" rid="B33">33</xref>) and three studies (<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B44">44</xref>, <xref ref-type="bibr" rid="B47">47</xref>) respectively. The lack of self-supervised pre-training, a promising method for leveraging unlabeled data, was another missed opportunity. Future research should adopt modern, scalable methodologies that balance interpretability with performance.</p>
</sec>
<sec id="s4e"><label>4.5</label><title>Model evaluation and validation gaps</title>
<p>Model evaluation and validation exhibited significant gaps, and overall model performance was generally modest, with the exception of echocardiography-based diagnostic prediction studies by Erno et al. (<xref ref-type="bibr" rid="B44">44</xref>) and Lei et al. (<xref ref-type="bibr" rid="B33">33</xref>). However, due to substantial variations in study design&#x2014;including differences in inclusion and exclusion criteria, study populations, and methodologies&#x2014;along with high risk of bias, performance metrics are likely inflated, are not directly comparable and must be interpreted within the context of each study. For example, the highest-performing diagnostic model by Erno et al. (<xref ref-type="bibr" rid="B44">44</xref>) was evaluated on a sample of just eleven patients, limiting confidence in its generalizability. G&#x00F3;mez-Quintana et al. (<xref ref-type="bibr" rid="B42">42</xref>) relied on internal cross-validation without a withheld test set and reported only the AUC, omitting metrics such as sensitivity and specificity. Chang et al. (<xref ref-type="bibr" rid="B45">45</xref>) demonstrated poor sensitivity, while Park et al.&#x0027;s (<xref ref-type="bibr" rid="B43">43</xref>) model exhibited low precision (24&#x0025;&#x2013;42&#x0025;), likely attributable to class imbalance; their additional reporting of the false negative rate (the inverse of recall) added little interpretative value. Jura et al. (<xref ref-type="bibr" rid="B48">48</xref>) failed to report the number of patients with and without PDA, making it impossible to evaluate class balance or contextualize sensitivity and specificity. Their reported odds ratios showed wide confidence intervals for some univariate predictors [e.g., PROM OR 13.03 [1.72&#x2013;98.7], SARS-CoV-2 infection OR 2.44 [0.28&#x2013;21.3]], reflecting low event counts. Similarly, Liu et al.&#x0027;s (<xref ref-type="bibr" rid="B46">46</xref>) wide AUC confidence interval (0.457&#x2013;0.841) underscored considerable variability and uncertainty in their model&#x0027;s performance. Finally, external validation was either absent or insufficient across all studies. To enhance reliability and generalizability, incorporating independent test sets, carefully selecting metrics, interpreting performance metrics appropriately, and conducting external validation are essential steps forward.</p>
</sec>
<sec id="s4f"><label>4.6</label><title>Explainability and clinical utility</title>
<p>Explainability, while addressed to some extent in most studies, exhibited notable limitations. Some imaging studies incorporated visualization methods, including saliency mapping and attention or activation heatmaps, though these were often demonstrated using only single or illustrative examples. While few studies applied more advanced techniques like SHAP or GradCAM&#x002B;&#x002B;, others relied solely on global feature importance, which may not translate to individual patient explanations necessary for clinical implementation. Furthermore, explainability methods were not evaluated within the context of clinical workflows, raising uncertainty about their practical utility in supporting accurate clinical decisions.</p>
<p>Clinical utility was also constrained. Many studies employed artificially clean study populations that excluded common comorbidities, thereby failing to reflect the complexity of real-world PDA management. In addition, modest model performance with high risk of bias and a lack of external validation further undermined the clinical applicability of the proposed tools. For example, Park et al. (<xref ref-type="bibr" rid="B43">43</xref>) reported only a marginal improvement in diagnostic timing&#x2014;0.1 days&#x2014;when using their AI-assisted diagnostic workflow compared to clinician performance without AI. Moreover, many studies focused narrowly on binary classification tasks without accounting for disease severity, which reduces the clinical relevance of their outputs. Although echocardiography-based diagnosis prediction models (<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B44">44</xref>) achieved high classification accuracy, they concentrated on identifying PDA presence in preselected ductal views with color Doppler&#x2014;a task that is typically straightforward for clinicians and offers limited value. The more pressing challenge lies in performing comprehensive severity appraisal across multiple full echocardiographic exams and the prediction of clinically relevant outcomes&#x2014;areas where AI could provide meaningful support, as demonstrated by Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>). Similarly, G&#x00F3;mez-Quintana et al.&#x0027;s (<xref ref-type="bibr" rid="B42">42</xref>) model classified PDA but failed to specify severity or co-occurring congenital heart disease (CHD), limiting its clinical benefit. Regarding integration into clinical practice, whilst Lei et al. (<xref ref-type="bibr" rid="B33">33</xref>) opted for a lightweight architecture to accommodate future edge deployment, Park et al. (<xref ref-type="bibr" rid="B43">43</xref>) was the only study to present a user interface for real-time model deployment. Moving forward, future research should focus on developing interpretable models that address clinically meaningful tasks and demonstrate tangible benefits at the point of care.</p>
</sec>
<sec id="s4g"><label>4.7</label><title>Fairness</title>
<p>The absence of explicit consideration of AI fairness in the examined studies highlights a critical gap in current research. Addressing biases in training data and ensuring equitable performance across demographic groups is essential for developing inclusive and clinically applicable artificial intelligence models.</p>
</sec>
<sec id="s4h"><label>4.8</label><title>Transparency and reproducibility</title>
<p>Transparency and reproducibility were major shortcomings. None of the studies provided publicly available datasets, reflecting a significant challenge when working with sensitive patient data, and only Matsushita et al. (<xref ref-type="bibr" rid="B49">49</xref>) made code accessible. Manual preprocessing steps, such as frame filtering (<xref ref-type="bibr" rid="B33">33</xref>) and phonocardiogram segmentation (<xref ref-type="bibr" rid="B42">42</xref>), added further barriers to reproducibility. Sharma et al. (<xref ref-type="bibr" rid="B47">47</xref>) did not report essential details regarding the number of frames per clip, subsampling strategies, or how multi-view data were processed, limiting transparency and hindering reproducibility. Addressing these gaps through the sharing of anonymized data, models, and code is an important step toward advancing research and fostering progress in this domain.</p>
</sec>
<sec id="s4i"><label>4.9</label><title>Limitations of the review process</title>
<p>Despite a comprehensive search across multiple databases, only eleven studies met the inclusion criteria. This small sample size, whilst reflecting the emerging nature of the field, limits the generalizability of the findings and the strength of the conclusions that can be drawn. The review was restricted to peer-reviewed articles published in English, excluding non-English publications and grey literature, which may have omitted relevant studies and introduced potential language and publication bias. The included studies exhibited significant heterogeneity in methodologies, data sources, population characteristics, and performance metrics, which precluded formal meta-analysis or quantitative synthesis. Key aspects requiring interdisciplinary judgment&#x2014;including review conceptualization, interpretation of the evidence, narrative synthesis, and formulation of conclusions&#x2014;were addressed by consensus within a multidisciplinary author team comprising experts in neonatology, computer science, and pediatric cardiology. However, data extraction was performed by a single reviewer, which may have introduced subjective interpretation or oversight; this risk was partially mitigated through independent verification by a second reviewer and consensus-based resolution of discrepancies within the author team. While the Prediction Model Risk of Bias Assessment Tool (PROBAST) (<xref ref-type="bibr" rid="B39">39</xref>) and Joanna Briggs Institute (JBI) tools (<xref ref-type="bibr" rid="B40">40</xref>) were used to assess risk of bias, their traditional design for conventional prediction models and cross-sectional studies does not fully align with the diverse and complex methodologies of AI-based clinical research, limiting their suitability for the included studies. Finally, the reliance on narrative synthesis, although necessary given the heterogeneity of the studies, is inherently less objective and reproducible compared to quantitative meta-analyses.</p>
</sec>
<sec id="s4j"><label>4.10</label><title>Implications for practice, policy, and future research</title>
<p>The results of this review offer several implications for practice, policy, and future research.</p>
<sec id="s4j1"><label>4.10.1</label><title>Practice</title>
<p>The approaches discussed show potential to support clinicians in diagnosing, screening, and managing PDA, particularly in resource-limited settings with limited specialist expertise. Enhanced diagnostic accuracy and earlier identification of hemodynamically significant PDA may reduce delays in intervention and associated complications in preterm neonates. AI-based risk factor identification may inform the development of preventive strategies and targeted public health initiatives. Additionally, identifying subpopulations and factors influencing treatment outcomes could aid in evaluating treatment efficacy and pave the way for more individualized management strategies. However, as noted by McAdams et al. (<xref ref-type="bibr" rid="B32">32</xref>) and Sharma et al. (<xref ref-type="bibr" rid="B27">27</xref>), the application of AI in neonatology and pediatric cardiology remains in its infancy. The included studies demonstrate feasibility rather than high performing, validated models ready for clinical integration.</p>
</sec>
<sec id="s4j2"><label>4.10.2</label><title>Policy</title>
<p>Standardizing definitions for PDA study groups, particularly hemodynamically significant PDA, is essential for improving study comparability and enabling model generalization&#x2014;a need emphasized in the broader PDA literature and reinforced by this review. National and international neonatology associations have a key role to play in leading efforts toward such standardization, facilitating the development and validation of severity scores to guide evidence-based criteria that can serve both clinical decision-making and research comparability. To support broader and more representative studies, secure, ethically responsible data sharing should be promoted across institutions, whilst safeguarding patient privacy. The endorsement of open-source collaboration&#x2014;through the sharing of models, code, and anonymized datasets&#x2014;would accelerate innovation and enhance transparency. Regulatory frameworks should guide the clinical validation and approval of artificial intelligence models to establish reliability, mitigate biases, address fairness, and ensure widespread applicability before clinical adoption.</p>
</sec>
<sec id="s4j3"><label>4.10.3</label><title>Future research</title>
<p>Future research should prioritize reaching consensus on standardized definitions for PDA subgroup classification&#x2014;particularly regarding what constitutes hemodynamic significance. Establishing clear and uniform criteria is essential for enabling comparability across studies and providing a reliable ground truth from which AI systems can learn. Given the pilot nature of the included studies, many were designed primarily to demonstrate feasibility and were therefore simplified in ways that limit clinical utility. Common comorbidities such as congenital heart defects and pulmonary hypertension were excluded, and tasks were narrowly defined&#x2014;for example, focusing on binary PDA detection rather than clinically nuanced assessments. With feasibility now established, future work should shift toward addressing more clinically meaningful tasks while striving for improved model performance and broader applicability. Multicenter studies should be implemented to improve generalizability across diverse populations and clinical settings. Larger datasets are needed to reduce the risk of bias and enable advanced DL modeling techniques. Incorporating temporal data, such as changes in cardiac motion or clinical trends, could enhance model accuracy and provide better insights into PDA evolution. Model explainability techniques should be implemented to facilitate clinical trust and usability. External validation on datasets from other institutions is necessary to assess generalizability. Advanced methodologies, including foundation models, vision transformers, self-supervised learning, and unsupervised approaches, have the potential to leverage high-dimensional or unlabeled data effectively and should be explored. Combining multimodal data&#x2014;such as imaging, clinical variables, and biomarkers&#x2014;may improve diagnostic precision and treatment predictions. Future studies should address real-world conditions, including class imbalance and population heterogeneity, and evaluate a variety of artificial intelligence-driven solutions against traditional baselines. Ethical concerns, including bias, fairness, and equitable access, require attention, and research should adopt standardized reporting frameworks like the Minimum Information for Clinical Artificial Intelligence Modeling (MI-CLAIM) (<xref ref-type="bibr" rid="B41">41</xref>) to ensure transparency and comparability.</p>
</sec>
</sec>
</sec>
<sec id="s5" sec-type="conclusions"><label>5</label><title>Conclusion</title>
<p>Patent ductus arteriosus represents a complex and heterogeneous clinical condition, characterized by dynamic physiology, evolving disease severity, and frequent comorbidities in a highly vulnerable population. The absence of standardized subgroup definitions and consensus on optimal management strategies has contributed to variability in clinical practice and a fragmented research landscape. Within this context, artificial intelligence has been proposed as a potential tool to support more consistent, data-driven decision-making through the integration of high-dimensional clinical information.</p>
<p>Current AI-based approaches in PDA research demonstrate feasibility for supporting diagnostic processes, risk assessment, and prediction of treatment-related outcomes. However, most studies remain exploratory and lack the methodological maturity required for clinical validation or implementation. Key limitations include a high risk of bias, small sample sizes, limited external validation, and reliance on data that may not adequately reflect real-world clinical complexity.</p>
<p>Meaningful progress will depend on the use of comparable and clinically well-defined study populations and larger, more representative sample sizes, ideally achieved through prospective multicenter collaboration. Systematic risk-of-bias assessment and external validation should become standard practice. AI models should target clinically meaningful tasks using data representative of routine clinical practice, with explicit attention to fairness and potential sources of bias. Ongoing benchmarking of state-of-the-art AI methodologies against appropriate baseline methods, alongside the exploration of advanced techniques such as temporal modeling and multimodal data integration, may further enhance predictive performance and clinical relevance. Transparent reporting, sharing of code and model weights, and data-sharing strategies that protect patient privacy are essential for reproducibility and independent validation. Equally, explainability should be embedded in model design to enable clinicians to appropriately evaluate AI outputs and integrate them into clinical workflows, with AI intended to support, rather than replace, clinical judgment.</p>
<p>While the studies reviewed represent important early contributions, existing AI models are not yet positioned to meaningfully influence PDA care. Feasibility has been demonstrated, but further progress will require methodology attentive to bias, external validation and a sustained focus on clinical applicability. Addressing current limitations in future work will be pivotal in guiding AI applications for PDA from early feasibility toward clinically relevant and ethically sound translation. Only through such efforts can the potential of this technology be responsibly realized for this complex and highly vulnerable patient population.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability"><title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s11">Supplementary Material</xref>, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s7" sec-type="author-contributions"><title>Author contributions</title>
<p>SL: Conceptualization, Data curation, Formal analysis, Investigation, Methodology, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. TU: Conceptualization, Methodology, Supervision, Validation, Writing &#x2013; review &#x0026; editing. CP: Conceptualization, Methodology, Supervision, Validation, Writing &#x2013; review &#x0026; editing. SO-J: Conceptualization, Methodology, Supervision, Validation, Writing &#x2013; review &#x0026; editing. PB: Conceptualization, Methodology, Supervision, Validation, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec id="s9" sec-type="COI-statement"><title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="ai-statement"><title>Generative AI statement</title>
<p>The author(s) declared that generative AI was used in the creation of this manuscript. Generative AI models developed by OpenAI (specifically o4-mini-2025-04-16 and 4o-2024-11-20) were used during manuscript preparation to support tasks such as generating alternative phrasings, reducing verbosity, and correcting spelling and grammar. The process was heavily interactive and iterative, with the authors engaging in multiple rounds of refinement. All AI-generated content was critically reviewed before integration.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec id="s12" sec-type="disclaimer"><title>Publisher&#x0027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s11" sec-type="supplementary-material"><title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fped.2026.1648943/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fped.2026.1648943/full&#x0023;supplementary-material</ext-link></p>
<supplementary-material xlink:href="Table1.xlsx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"/>
<supplementary-material xlink:href="Table2.xlsx" id="SM2" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"/>
<supplementary-material xlink:href="Table3.xlsx" id="SM3" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"/>
</sec>
<ref-list><title>References</title>
<ref id="B1"><label>1.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pugnaloni</surname> <given-names>F</given-names></name> <name><surname>Doni</surname> <given-names>D</given-names></name> <name><surname>Lucente</surname> <given-names>M</given-names></name> <name><surname>Fiocchi</surname> <given-names>S</given-names></name> <name><surname>Capolupo</surname> <given-names>I</given-names></name></person-group>. <article-title>Ductus arteriosus in fetal and perinatal life</article-title>. <source>J Cardiovasc Dev Dis</source>. (<year>2024</year>) <volume>11</volume>(<issue>4</issue>):<fpage>113</fpage>. <pub-id pub-id-type="doi">10.3390/jcdd11040113</pub-id><pub-id pub-id-type="pmid">38667731</pub-id></mixed-citation></ref>
<ref id="B2"><label>2.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wren</surname> <given-names>JT</given-names></name> <name><surname>McNamara</surname> <given-names>PJ</given-names></name> <name><surname>Gillam-Krakauer</surname> <given-names>M</given-names></name></person-group>. <article-title>Contemporary perspectives on the patent ductus arteriosus in preterm neonates: a hemodynamics-driven approach</article-title>. <source>Curr Treat Options Pediatr</source>. (<year>2024</year>) <volume>10</volume>(<issue>3</issue>):<fpage>147</fpage>&#x2013;<lpage>65</lpage>. <pub-id pub-id-type="doi">10.1007/s40746-024-00296-3</pub-id></mixed-citation></ref>
<ref id="B3"><label>3.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ro&#x0219;ca</surname> <given-names>I</given-names></name> <name><surname>Constantin</surname> <given-names>AT</given-names></name> <name><surname>Popescu</surname> <given-names>DE</given-names></name> <name><surname>Jura</surname> <given-names>AMC</given-names></name> <name><surname>Miu</surname> <given-names>A</given-names></name> <name><surname>Turenschi</surname> <given-names>A</given-names></name></person-group>. <article-title>Are we able to prevent neonatal readmission? A retrospective analysis from a pediatrics department in ploie&#x0219;ti, Romania</article-title>. <source>Medicina (B Aires)</source>. (<year>2024</year>) <volume>60</volume>(<issue>5</issue>):<fpage>705</fpage>. <pub-id pub-id-type="doi">10.3390/medicina60050705</pub-id></mixed-citation></ref>
<ref id="B4"><label>4.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Smith</surname> <given-names>A</given-names></name> <name><surname>EL-Khuffash</surname> <given-names>AF</given-names></name></person-group>. <article-title>Defining &#x201C;haemodynamic significance&#x201D; of the patent ductus arteriosus: do we have all the answers?</article-title> <source>Neonatology</source>. (<year>2020</year>) <volume>117</volume>(<issue>2</issue>):<fpage>225</fpage>&#x2013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1159/000506988</pub-id><pub-id pub-id-type="pmid">32450558</pub-id></mixed-citation></ref>
<ref id="B5"><label>5.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Benitz</surname> <given-names>WE</given-names></name> <name><surname>Backes</surname> <given-names>CH</given-names></name></person-group>. <article-title>At a crossroads for early medical treatment of persistent patent ductus arteriosus in preterm infants</article-title>. <source>J Perinatol</source>. (<year>2024</year>) <volume>44</volume>(<issue>10</issue>):<fpage>1534</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1038/s41372-024-02022-1</pub-id><pub-id pub-id-type="pmid">38918573</pub-id></mixed-citation></ref>
<ref id="B6"><label>6.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Semberova</surname> <given-names>J</given-names></name> <name><surname>Sirc</surname> <given-names>J</given-names></name> <name><surname>Miletin</surname> <given-names>J</given-names></name> <name><surname>Kucera</surname> <given-names>J</given-names></name> <name><surname>Berka</surname> <given-names>I</given-names></name> <name><surname>Sebkova</surname> <given-names>S</given-names></name><etal/></person-group> <article-title>Spontaneous closure of patent ductus arteriosus in infants &#x2264;1500&#x2005;g</article-title>. <source>Pediatrics</source>. (<year>2017</year>) <volume>140</volume>(<issue>2</issue>):<fpage>e20164258</fpage>. <pub-id pub-id-type="doi">10.1542/peds.2016-4258</pub-id><pub-id pub-id-type="pmid">28701390</pub-id></mixed-citation></ref>
<ref id="B7"><label>7.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tolia</surname> <given-names>VN</given-names></name> <name><surname>Powers</surname> <given-names>GC</given-names></name> <name><surname>Kelleher</surname> <given-names>AS</given-names></name> <name><surname>Walker</surname> <given-names>MW</given-names></name> <name><surname>Herrman</surname> <given-names>KK</given-names></name> <name><surname>Ahmad</surname> <given-names>KA</given-names></name><etal/></person-group> <article-title>Low rate of spontaneous closure in premature infants discharged with a patent ductus arteriosus: a multicenter prospective study</article-title>. <source>J Pediatr</source>. (<year>2022</year>) <volume>240</volume>:<fpage>31</fpage>&#x2013;<lpage>36.e2</lpage>. <pub-id pub-id-type="doi">10.1016/j.jpeds.2021.07.035</pub-id><pub-id pub-id-type="pmid">34293369</pub-id></mixed-citation></ref>
<ref id="B8"><label>8.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>McNamara</surname> <given-names>PJ</given-names></name> <name><surname>Jain</surname> <given-names>A</given-names></name> <name><surname>El-Khuffash</surname> <given-names>A</given-names></name> <name><surname>Giesinger</surname> <given-names>R</given-names></name> <name><surname>Weisz</surname> <given-names>D</given-names></name> <name><surname>Freud</surname> <given-names>L</given-names></name><etal/></person-group> <article-title>Guidelines and recommendations for targeted neonatal echocardiography and cardiac point-of-care ultrasound in the neonatal intensive care unit: an update from the American society of echocardiography</article-title>. <source>J Am Soc Echocardiogr</source>. (<year>2024</year>) <volume>37</volume>(<issue>2</issue>):<fpage>171</fpage>&#x2013;<lpage>215</lpage>. <pub-id pub-id-type="doi">10.1016/j.echo.2023.11.016</pub-id><pub-id pub-id-type="pmid">38309835</pub-id></mixed-citation></ref>
<ref id="B9"><label>9.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Babla</surname> <given-names>K</given-names></name> <name><surname>Duffy</surname> <given-names>D</given-names></name> <name><surname>Dumitru</surname> <given-names>R</given-names></name> <name><surname>Richards</surname> <given-names>J</given-names></name> <name><surname>Kulkarni</surname> <given-names>A</given-names></name></person-group>. <article-title>Repeatability of PDA diameter measurements on echocardiography</article-title>. <source>Eur J Pediatr</source>. (<year>2022</year>) <volume>181</volume>(<issue>1</issue>):<fpage>403</fpage>&#x2013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.1007/s00431-021-04178-w</pub-id><pub-id pub-id-type="pmid">34184120</pub-id></mixed-citation></ref>
<ref id="B10"><label>10.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Singh</surname> <given-names>Y</given-names></name> <name><surname>Chan</surname> <given-names>B</given-names></name> <name><surname>Noori</surname> <given-names>S</given-names></name> <name><surname>Ramanathan</surname> <given-names>R</given-names></name></person-group>. <article-title>Narrative review on echocardiographic evaluation of patent ductus arteriosus in preterm infants</article-title>. <source>J Cardiovasc Dev Dis</source>. (<year>2024</year>) <volume>11</volume>(<issue>7</issue>):<fpage>199</fpage>. <pub-id pub-id-type="doi">10.3390/jcdd11070199</pub-id><pub-id pub-id-type="pmid">39057619</pub-id></mixed-citation></ref>
<ref id="B11"><label>11.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kindler</surname> <given-names>A</given-names></name> <name><surname>Seipolt</surname> <given-names>B</given-names></name> <name><surname>Heilmann</surname> <given-names>A</given-names></name> <name><surname>Range</surname> <given-names>U</given-names></name> <name><surname>R&#x00FC;diger</surname> <given-names>M</given-names></name> <name><surname>Hofmann</surname> <given-names>SR</given-names></name></person-group>. <article-title>Development of a diagnostic clinical score for hemodynamically significant patent ductus arteriosus</article-title>. <source>Front Pediatr</source>. (<year>2017</year>) <volume>5</volume>:<fpage>280</fpage>. <pub-id pub-id-type="doi">10.3389/fped.2017.00280</pub-id><pub-id pub-id-type="pmid">29312911</pub-id></mixed-citation></ref>
<ref id="B12"><label>12.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gokulakrishnan</surname> <given-names>G</given-names></name> <name><surname>Kulkarni</surname> <given-names>M</given-names></name> <name><surname>He</surname> <given-names>S</given-names></name> <name><surname>Leeflang</surname> <given-names>MM</given-names></name> <name><surname>Cabrera</surname> <given-names>AG</given-names></name> <name><surname>Fernandes</surname> <given-names>CJ</given-names></name><etal/></person-group> <article-title>Brain natriuretic peptide and N-terminal brain natriuretic peptide for the diagnosis of haemodynamically significant patent ductus arteriosus in preterm neonates</article-title>. <source>Cochrane Database Syst Rev</source>. (<year>2022</year>) <volume>12</volume>(<issue>12</issue>):<fpage>CD013129</fpage>. <pub-id pub-id-type="doi">10.1002/14651858.CD013129.pub2</pub-id><pub-id pub-id-type="pmid">36478359</pub-id></mixed-citation></ref>
<ref id="B13"><label>13.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Smith</surname> <given-names>A</given-names></name> <name><surname>El-Khuffash</surname> <given-names>A</given-names></name></person-group>. <article-title>Patent ductus arteriosus clinical trials: lessons learned and future directions</article-title>. <source>Child Basel Switz</source>. (<year>2021</year>) <volume>8</volume>(<issue>1</issue>):<fpage>47</fpage>. <pub-id pub-id-type="doi">10.3390/children8010047</pub-id></mixed-citation></ref>
<ref id="B14"><label>14.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mitra</surname> <given-names>S</given-names></name> <name><surname>Florez</surname> <given-names>ID</given-names></name> <name><surname>Tamayo</surname> <given-names>ME</given-names></name> <name><surname>Mbuagbaw</surname> <given-names>L</given-names></name> <name><surname>Vanniyasingam</surname> <given-names>T</given-names></name> <name><surname>Veroniki</surname> <given-names>AA</given-names></name><etal/></person-group> <article-title>Association of placebo, indomethacin, ibuprofen, and acetaminophen with closure of hemodynamically significant patent ductus arteriosus in preterm infants: a systematic review and meta-analysis</article-title>. <source>JAMA</source>. (<year>2018</year>) <volume>319</volume>(<issue>12</issue>):<fpage>1221</fpage>&#x2013;<lpage>38</lpage>. <pub-id pub-id-type="doi">10.1001/jama.2018.1896</pub-id><pub-id pub-id-type="pmid">29584842</pub-id></mixed-citation></ref>
<ref id="B15"><label>15.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mosalli</surname> <given-names>R</given-names></name> <name><surname>Alfaleh</surname> <given-names>K</given-names></name></person-group>. <article-title>Prophylactic surgical ligation of patent ductus arteriosus for prevention of mortality and morbidity in extremely low birth weight infants</article-title>. <source>Cochrane Database Syst Rev</source>. (<year>2008</year>) <volume>2008</volume>(<issue>1</issue>):<fpage>CD006181</fpage>. <pub-id pub-id-type="doi">10.1002/14651858.CD006181.pub2</pub-id><pub-id pub-id-type="pmid">18254095</pub-id></mixed-citation></ref>
<ref id="B16"><label>16.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Reese</surname> <given-names>J</given-names></name> <name><surname>Shelton</surname> <given-names>EL</given-names></name> <name><surname>Slaughter</surname> <given-names>JC</given-names></name> <name><surname>McNamara</surname> <given-names>PJ</given-names></name></person-group>. <article-title>Prophylactic indomethacin revisited</article-title>. <source>J Pediatr</source>. (<year>2017</year>) <volume>186</volume>:<fpage>11</fpage>&#x2013;<lpage>14.e1</lpage>. <pub-id pub-id-type="doi">10.1016/j.jpeds.2017.03.036</pub-id><pub-id pub-id-type="pmid">28396028</pub-id></mixed-citation></ref>
<ref id="B17"><label>17.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kluckow</surname> <given-names>M</given-names></name> <name><surname>Jeffery</surname> <given-names>M</given-names></name> <name><surname>Gill</surname> <given-names>A</given-names></name> <name><surname>Evans</surname> <given-names>N</given-names></name></person-group>. <article-title>A randomised placebo-controlled trial of early treatment of the patent ductus arteriosus</article-title>. <source>Arch Dis Child Fetal Neonatal Ed</source>. (<year>2014</year>) <volume>99</volume>(<issue>2</issue>):<fpage>F99</fpage>&#x2013;<lpage>104</lpage>. <pub-id pub-id-type="doi">10.1136/archdischild-2013-304695</pub-id><pub-id pub-id-type="pmid">24317704</pub-id></mixed-citation></ref>
<ref id="B18"><label>18.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hundscheid</surname> <given-names>T</given-names></name> <name><surname>Onland</surname> <given-names>W</given-names></name> <name><surname>Kooi</surname> <given-names>EMW</given-names></name> <name><surname>Vijlbrief</surname> <given-names>DC</given-names></name> <name><surname>De Vries</surname> <given-names>WB</given-names></name> <name><surname>Dijkman</surname> <given-names>KP</given-names></name><etal/></person-group> <article-title>Expectant management or early ibuprofen for patent ductus arteriosus</article-title>. <source>N Engl J Med</source>. (<year>2023</year>) <volume>388</volume>(<issue>11</issue>):<fpage>980</fpage>&#x2013;<lpage>90</lpage>. <pub-id pub-id-type="doi">10.1056/NEJMoa2207418</pub-id><pub-id pub-id-type="pmid">36477458</pub-id></mixed-citation></ref>
<ref id="B19"><label>19.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>EL-Khuffash</surname> <given-names>A</given-names></name> <name><surname>Weisz</surname> <given-names>DE</given-names></name> <name><surname>McNamara</surname> <given-names>PJ</given-names></name></person-group>. <article-title>Reflections of the changes in patent ductus arteriosus management during the last 10 years</article-title>. <source>Arch Dis Child Fetal Neonatal Ed</source>. (<year>2016</year>) <volume>101</volume>(<issue>5</issue>):<fpage>F474</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1136/archdischild-2014-306214</pub-id><pub-id pub-id-type="pmid">27118761</pub-id></mixed-citation></ref>
<ref id="B20"><label>20.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jansen</surname> <given-names>EJS</given-names></name> <name><surname>Hundscheid</surname> <given-names>T</given-names></name> <name><surname>Onland</surname> <given-names>W</given-names></name> <name><surname>Kooi</surname> <given-names>EMW</given-names></name> <name><surname>Andriessen</surname> <given-names>P</given-names></name> <name><surname>de Boode</surname> <given-names>WP</given-names></name></person-group>. <article-title>Factors associated with benefit of treatment of patent ductus arteriosus in preterm infants: a systematic review and meta-analysis</article-title>. <source>Front Pediatr</source>. (<year>2021</year>) <volume>9</volume>:<fpage>626262</fpage>. <pub-id pub-id-type="doi">10.3389/fped.2021.626262</pub-id><pub-id pub-id-type="pmid">33634058</pub-id></mixed-citation></ref>
<ref id="B21"><label>21.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hundscheid</surname> <given-names>T</given-names></name> <name><surname>Jansen</surname> <given-names>EJS</given-names></name> <name><surname>Onland</surname> <given-names>W</given-names></name> <name><surname>Kooi</surname> <given-names>EMW</given-names></name> <name><surname>Andriessen</surname> <given-names>P</given-names></name> <name><surname>De Boode</surname> <given-names>WP</given-names></name></person-group>. <article-title>Conservative management of patent ductus arteriosus in preterm infants&#x2014;a systematic review and meta-analyses assessing differences in outcome measures between randomized controlled trials and cohort studies</article-title>. <source>Front Pediatr</source>. (<year>2021</year>) <volume>9</volume>:<fpage>626261</fpage>. <pub-id pub-id-type="doi">10.3389/fped.2021.626261</pub-id><pub-id pub-id-type="pmid">33718300</pub-id></mixed-citation></ref>
<ref id="B22"><label>22.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mitra</surname> <given-names>S</given-names></name> <name><surname>Scrivens</surname> <given-names>A</given-names></name> <name><surname>von Kursell</surname> <given-names>AM</given-names></name> <name><surname>Disher</surname> <given-names>T</given-names></name></person-group>. <article-title>Early treatment versus expectant management of hemodynamically significant patent ductus arteriosus for preterm infants</article-title>. <source>Cochrane Database Syst Rev</source>. (<year>2020</year>) <volume>12</volume>(<issue>12</issue>):<fpage>CD013278</fpage>. <pub-id pub-id-type="doi">10.1002/14651858.CD013278.pub2</pub-id><pub-id pub-id-type="pmid">33301630</pub-id></mixed-citation></ref>
<ref id="B23"><label>23.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zonnenberg</surname> <given-names>I</given-names></name> <name><surname>de Waal</surname> <given-names>K</given-names></name></person-group>. <article-title>The definition of a haemodynamic significant duct in randomized controlled trials: a systematic literature review</article-title>. <source>Acta Paediatr</source>. (<year>2012</year>) <volume>101</volume>(<issue>3</issue>):<fpage>247</fpage>&#x2013;<lpage>51</lpage>. <pub-id pub-id-type="doi">10.1111/j.1651-2227.2011.02468.x</pub-id><pub-id pub-id-type="pmid">21913976</pub-id></mixed-citation></ref>
<ref id="B24"><label>24.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Al-Turkait</surname> <given-names>A</given-names></name> <name><surname>Szatkowski</surname> <given-names>L</given-names></name> <name><surname>Choonara</surname> <given-names>I</given-names></name> <name><surname>Ojha</surname> <given-names>S</given-names></name></person-group>. <article-title>Management of patent ductus arteriosus in very preterm infants in England and Wales: a retrospective cohort study</article-title>. <source>BMJ Paediatr Open</source>. (<year>2022</year>) <volume>6</volume>(<issue>1</issue>):<fpage>e001424</fpage>. <pub-id pub-id-type="doi">10.1136/bmjpo-2022-001424</pub-id><pub-id pub-id-type="pmid">36053632</pub-id></mixed-citation></ref>
<ref id="B25"><label>25.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ambalavanan</surname> <given-names>N</given-names></name> <name><surname>Aucott</surname> <given-names>SW</given-names></name> <name><surname>Salavitabar</surname> <given-names>A</given-names></name> <name><surname>Levy</surname> <given-names>VY</given-names></name></person-group>. <collab>Committee on Fetus and Newborn, Section on Cardiology and Cardiac Surgery</collab>. <article-title>Patent ductus arteriosus in preterm infants</article-title>. <source>Pediatrics</source>. (<year>2025</year>) <volume>155</volume>(<issue>5</issue>):<fpage>e2025071425</fpage>. <pub-id pub-id-type="doi">10.1542/peds.2025-071425</pub-id><pub-id pub-id-type="pmid">40288780</pub-id></mixed-citation></ref>
<ref id="B26"><label>26.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sehgal</surname> <given-names>A</given-names></name> <name><surname>McNamara</surname> <given-names>PJ</given-names></name></person-group>. <article-title>International perspective on management of a patent ductus arteriosus: lessons learned</article-title>. <source>Semin Fetal Neonatal Med</source>. (<year>2018</year>) <volume>23</volume>(<issue>4</issue>):<fpage>278</fpage>&#x2013;<lpage>84</lpage>. <pub-id pub-id-type="doi">10.1016/j.siny.2018.03.002</pub-id><pub-id pub-id-type="pmid">29534972</pub-id></mixed-citation></ref>
<ref id="B27"><label>27.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sharma</surname> <given-names>P</given-names></name> <name><surname>Beam</surname> <given-names>K</given-names></name> <name><surname>Levy</surname> <given-names>P</given-names></name> <name><surname>Beam</surname> <given-names>AL</given-names></name></person-group>. <article-title>PD(AI): the role of artificial intelligence in the management of patent ductus arteriosus</article-title>. <source>J Perinatol</source>. (<year>2023</year>) <volume>43</volume>(<issue>2</issue>):<fpage>257</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1038/s41372-023-01606-7</pub-id><pub-id pub-id-type="pmid">36646822</pub-id></mixed-citation></ref>
<ref id="B28"><label>28.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ghorbani</surname> <given-names>A</given-names></name> <name><surname>Ouyang</surname> <given-names>D</given-names></name> <name><surname>Abid</surname> <given-names>A</given-names></name> <name><surname>He</surname> <given-names>B</given-names></name> <name><surname>Chen</surname> <given-names>JH</given-names></name> <name><surname>Harrington</surname> <given-names>RA</given-names></name><etal/></person-group> <article-title>Deep learning interpretation of echocardiograms</article-title>. <source>Npj Digit Med</source>. (<year>2020</year>) <volume>3</volume>(<issue>1</issue>):<fpage>10</fpage>. <pub-id pub-id-type="doi">10.1038/s41746-019-0216-8</pub-id><pub-id pub-id-type="pmid">31993508</pub-id></mixed-citation></ref>
<ref id="B29"><label>29.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Maturi</surname> <given-names>B</given-names></name> <name><surname>Dulal</surname> <given-names>S</given-names></name> <name><surname>Sayana</surname> <given-names>SB</given-names></name> <name><surname>Ibrahim</surname> <given-names>A</given-names></name> <name><surname>Ramakrishna</surname> <given-names>M</given-names></name> <name><surname>Chinta</surname> <given-names>V</given-names></name><etal/></person-group> <article-title>Revolutionizing cardiology: the role of artificial intelligence in echocardiography</article-title>. <source>J Clin Med</source>. (<year>2025</year>) <volume>14</volume>(<issue>2</issue>):<fpage>625</fpage>. <pub-id pub-id-type="doi">10.3390/jcm14020625</pub-id><pub-id pub-id-type="pmid">39860630</pub-id></mixed-citation></ref>
<ref id="B30"><label>30.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kelly</surname> <given-names>CJ</given-names></name> <name><surname>Karthikesalingam</surname> <given-names>A</given-names></name> <name><surname>Suleyman</surname> <given-names>M</given-names></name> <name><surname>Corrado</surname> <given-names>G</given-names></name> <name><surname>King</surname> <given-names>D</given-names></name></person-group>. <article-title>Key challenges for delivering clinical impact with artificial intelligence</article-title>. <source>BMC Med</source>. (<year>2019</year>) <volume>17</volume>(<issue>1</issue>):<fpage>195</fpage>. <pub-id pub-id-type="doi">10.1186/s12916-019-1426-2</pub-id><pub-id pub-id-type="pmid">31665002</pub-id></mixed-citation></ref>
<ref id="B31"><label>31.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Markus</surname> <given-names>AF</given-names></name> <name><surname>Kors</surname> <given-names>JA</given-names></name> <name><surname>Rijnbeek</surname> <given-names>PR</given-names></name></person-group>. <article-title>The role of explainability in creating trustworthy artificial intelligence for health care: a comprehensive survey of the terminology, design choices, and evaluation strategies</article-title>. <source>J Biomed Inform</source>. (<year>2021</year>) <volume>113</volume>:<fpage>103655</fpage>. <pub-id pub-id-type="doi">10.1016/j.jbi.2020.103655</pub-id><pub-id pub-id-type="pmid">33309898</pub-id></mixed-citation></ref>
<ref id="B32"><label>32.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>McAdams</surname> <given-names>RM</given-names></name> <name><surname>Kaur</surname> <given-names>R</given-names></name> <name><surname>Sun</surname> <given-names>Y</given-names></name> <name><surname>Bindra</surname> <given-names>H</given-names></name> <name><surname>Cho</surname> <given-names>SJ</given-names></name> <name><surname>Singh</surname> <given-names>H</given-names></name></person-group>. <article-title>Predicting clinical outcomes using artificial intelligence and machine learning in neonatal intensive care units: a systematic review</article-title>. <source>J Perinatol</source>. (<year>2022</year>) <volume>42</volume>(<issue>12</issue>):<fpage>1561</fpage>&#x2013;<lpage>75</lpage>. <pub-id pub-id-type="doi">10.1038/s41372-022-01392-8</pub-id><pub-id pub-id-type="pmid">35562414</pub-id></mixed-citation></ref>
<ref id="B33"><label>33.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lei</surname> <given-names>H</given-names></name> <name><surname>Ashrafi</surname> <given-names>A</given-names></name> <name><surname>Chang</surname> <given-names>P</given-names></name> <name><surname>Chang</surname> <given-names>A</given-names></name> <name><surname>Lai</surname> <given-names>W</given-names></name></person-group>. <article-title>Patent ductus arteriosus (PDA) detection in echocardiograms using deep learning</article-title>. <source>Intell Based Med</source>. (<year>2022</year>) <volume>6</volume>:<fpage>100054</fpage>. <pub-id pub-id-type="doi">10.1016/j.ibmed.2022.100054</pub-id></mixed-citation></ref>
<ref id="B34"><label>34.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Na</surname> <given-names>JY</given-names></name> <name><surname>Kim</surname> <given-names>D</given-names></name> <name><surname>Kwon</surname> <given-names>AM</given-names></name> <name><surname>Jeon</surname> <given-names>JY</given-names></name> <name><surname>Kim</surname> <given-names>H</given-names></name> <name><surname>Kim</surname> <given-names>CR</given-names></name><etal/></person-group> <article-title>Artificial intelligence model comparison for risk factor analysis of patent ductus arteriosus in nationwide very low birth weight infants cohort</article-title>. <source>Sci Rep</source>. (<year>2021</year>) <volume>11</volume>(<issue>1</issue>):<fpage>22353</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-021-01640-5</pub-id><pub-id pub-id-type="pmid">34785709</pub-id></mixed-citation></ref>
<ref id="B35"><label>35.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>El-Khuffash</surname> <given-names>A</given-names></name> <name><surname>James</surname> <given-names>AT</given-names></name> <name><surname>Corcoran</surname> <given-names>JD</given-names></name> <name><surname>Dicker</surname> <given-names>P</given-names></name> <name><surname>Franklin</surname> <given-names>O</given-names></name> <name><surname>Elsayed</surname> <given-names>YN</given-names></name><etal/></person-group> <article-title>A patent ductus arteriosus severity score predicts chronic lung disease or death before discharge</article-title>. <source>J Pediatr</source>. (<year>2015</year>) <volume>167</volume>(<issue>6</issue>):<fpage>1354</fpage>&#x2013;<lpage>1361.e2</lpage>. <pub-id pub-id-type="doi">10.1016/j.jpeds.2015.09.028</pub-id><pub-id pub-id-type="pmid">26474706</pub-id></mixed-citation></ref>
<ref id="B36"><label>36.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Umapathi</surname> <given-names>KK</given-names></name> <name><surname>Muller</surname> <given-names>B</given-names></name> <name><surname>Sosnowski</surname> <given-names>C</given-names></name> <name><surname>Thavamani</surname> <given-names>A</given-names></name> <name><surname>Murphy</surname> <given-names>J</given-names></name> <name><surname>Awad</surname> <given-names>S</given-names></name><etal/></person-group> <article-title>A novel patent ductus arteriosus severity score to predict clinical outcomes in premature neonates</article-title>. <source>J Cardiovasc Dev Dis</source>. (<year>2022</year>) <volume>9</volume>(<issue>4</issue>):<fpage>114</fpage>. <pub-id pub-id-type="doi">10.3390/jcdd9040114</pub-id><pub-id pub-id-type="pmid">35448090</pub-id></mixed-citation></ref>
<ref id="B37"><label>37.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Page</surname> <given-names>MJ</given-names></name> <name><surname>McKenzie</surname> <given-names>JE</given-names></name> <name><surname>Bossuyt</surname> <given-names>PM</given-names></name> <name><surname>Boutron</surname> <given-names>I</given-names></name> <name><surname>Hoffmann</surname> <given-names>TC</given-names></name> <name><surname>Mulrow</surname> <given-names>CD</given-names></name><etal/></person-group> <article-title>The PRISMA 2020 statement: an updated guideline for reporting systematic reviews</article-title>. <source>Br Med J</source>. (<year>2021</year>) <volume>372</volume>:<fpage>n71</fpage>. <pub-id pub-id-type="doi">10.1136/bmj.n71</pub-id></mixed-citation></ref>
<ref id="B38"><label>38.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sridharan</surname> <given-names>K</given-names></name> <name><surname>Doss C</surname> <given-names>GP</given-names></name> <name><surname>Cathryn R</surname> <given-names>H</given-names></name> <name><surname>Kumar D</surname> <given-names>T</given-names></name> <name><surname>Al Jufairi</surname> <given-names>M</given-names></name></person-group>. <article-title>Comparative analysis of machine learning algorithms evaluating the single nucleotide polymorphisms of metabolizing enzymes with clinical outcomes following intravenous paracetamol in preterm neonates with patent ductus arteriosus</article-title>. <source>Curr Drug Metab</source>. (<year>2024</year>) <volume>25</volume>(<issue>2</issue>):<fpage>128</fpage>&#x2013;<lpage>39</lpage>. <pub-id pub-id-type="doi">10.2174/0113892002289238240222072027</pub-id><pub-id pub-id-type="pmid">38445694</pub-id></mixed-citation></ref>
<ref id="B39"><label>39.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wolff</surname> <given-names>RF</given-names></name> <name><surname>Moons</surname> <given-names>KGM</given-names></name> <name><surname>Riley</surname> <given-names>RD</given-names></name> <name><surname>Whiting</surname> <given-names>PF</given-names></name> <name><surname>Westwood</surname> <given-names>M</given-names></name> <name><surname>Collins</surname> <given-names>GS</given-names></name><etal/></person-group> <article-title>PROBAST: a tool to assess the risk of bias and applicability of prediction model studies</article-title>. <source>Ann Intern Med</source>. (<year>2019</year>) <volume>170</volume>(<issue>1</issue>):<fpage>51</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.7326/M18-1376</pub-id><pub-id pub-id-type="pmid">30596875</pub-id></mixed-citation></ref>
<ref id="B40"><label>40.</label><mixed-citation publication-type="other"><collab>JBI</collab>. <comment>Chapter 7: Systematic reviews of etiology and risk. In: <italic>JBI Manual for Evidence Synthesis</italic>. JBI</comment>. (<year>2020</year>). <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://jbi-global-wiki.refined.site/space/MANUAL/355863557/Previous+versions?attachment=/download/attachments/355863557/JBI_Reviewers_Manual_2020June.pdf%26type=application/pdf%26filename=JBI_Reviewers_Manual_2020June.pdf#page=217">https://jbi-global-wiki.refined.site/space/MANUAL/355863557/Previous&#x002B;versions?attachment&#x003D;/download/attachments/355863557/JBI_Reviewers_Manual_2020June.pdf&#x0026;type&#x003D;application/pdf&#x0026;filename&#x003D;JBI_ Reviewers_Manual_2020June.pdf&#x0023;page&#x003D;217</ext-link> <comment>(Accessed 17 June 2025)</comment>.</mixed-citation></ref>
<ref id="B41"><label>41.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Norgeot</surname> <given-names>B</given-names></name> <name><surname>Quer</surname> <given-names>G</given-names></name> <name><surname>Beaulieu-Jones</surname> <given-names>BK</given-names></name> <name><surname>Torkamani</surname> <given-names>A</given-names></name> <name><surname>Dias</surname> <given-names>R</given-names></name> <name><surname>Gianfrancesco</surname> <given-names>M</given-names></name><etal/></person-group> <article-title>Minimum information about clinical artificial intelligence modeling: the MI-CLAIM checklist</article-title>. <source>Nat Med</source>. (<year>2020</year>) <volume>26</volume>(<issue>9</issue>):<fpage>1320</fpage>&#x2013;<lpage>4</lpage>. <pub-id pub-id-type="doi">10.1038/s41591-020-1041-y</pub-id><pub-id pub-id-type="pmid">32908275</pub-id></mixed-citation></ref>
<ref id="B42"><label>42.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>G&#x00F3;mez-Quintana</surname> <given-names>S</given-names></name> <name><surname>Schwarz</surname> <given-names>CE</given-names></name> <name><surname>Shelevytsky</surname> <given-names>I</given-names></name> <name><surname>Shelevytska</surname> <given-names>V</given-names></name> <name><surname>Semenova</surname> <given-names>O</given-names></name> <name><surname>Factor</surname> <given-names>A</given-names></name><etal/></person-group> <article-title>A framework for AI-assisted detection of patent ductus arteriosus from neonatal phonocardiogram</article-title>. <source>Healthcare</source>. (<year>2021</year>) <volume>9</volume>(<issue>2</issue>):<fpage>169</fpage>. <pub-id pub-id-type="doi">10.3390/healthcare9020169</pub-id></mixed-citation></ref>
<ref id="B43"><label>43.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Park</surname> <given-names>S</given-names></name> <name><surname>Moon</surname> <given-names>J</given-names></name> <name><surname>Eun</surname> <given-names>H</given-names></name> <name><surname>Hong</surname> <given-names>JH</given-names></name> <name><surname>Lee</surname> <given-names>K</given-names></name></person-group>. <article-title>Artificial intelligence-based diagnostic support system for patent ductus arteriosus in premature infants</article-title>. <source>J Clin Med</source>. (<year>2024</year>) <volume>13</volume>(<issue>7</issue>):<fpage>2089</fpage>. <pub-id pub-id-type="doi">10.3390/jcm13072089</pub-id><pub-id pub-id-type="pmid">38610854</pub-id></mixed-citation></ref>
<ref id="B44"><label>44.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Erno</surname> <given-names>J</given-names></name> <name><surname>Gomes</surname> <given-names>T</given-names></name> <name><surname>Baltimore</surname> <given-names>C</given-names></name> <name><surname>Lineberger</surname> <given-names>JP</given-names></name> <name><surname>Smith</surname> <given-names>DH</given-names></name> <name><surname>Baker</surname> <given-names>GH</given-names></name></person-group>. <article-title>Automated identification of patent ductus arteriosus using a computer vision model</article-title>. <source>J Ultrasound Med</source>. (<year>2023</year>) <volume>42</volume>(<issue>12</issue>):<fpage>2707</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1002/jum.16305</pub-id><pub-id pub-id-type="pmid">37449663</pub-id></mixed-citation></ref>
<ref id="B45"><label>45.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chang</surname> <given-names>P</given-names></name> <name><surname>Choi</surname> <given-names>HS</given-names></name> <name><surname>Lee</surname> <given-names>J</given-names></name> <name><surname>Kim</surname> <given-names>HH</given-names></name></person-group>. <article-title>Extraction and evaluation of features of preterm patent ductus arteriosus in chest x-ray images using deep learning</article-title>. <source>Sci Rep</source>. (<year>2024</year>) <volume>14</volume>(<issue>1</issue>):<fpage>29382</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-024-79361-8</pub-id><pub-id pub-id-type="pmid">39592675</pub-id></mixed-citation></ref>
<ref id="B46"><label>46.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>TX</given-names></name> <name><surname>Zheng</surname> <given-names>JX</given-names></name> <name><surname>Chen</surname> <given-names>Z</given-names></name> <name><surname>Zhang</surname> <given-names>ZC</given-names></name> <name><surname>Li</surname> <given-names>D</given-names></name> <name><surname>Shi</surname> <given-names>LP</given-names></name></person-group>. <article-title>An interpretable machine-learning model for predicting the efficacy of nonsteroidal anti-inflammatory drugs for closing hemodynamically significant patent ductus arteriosus in preterm infants</article-title>. <source>Front Pediatr</source>. (<year>2023</year>) <volume>11</volume>:<fpage>1097950</fpage>. <pub-id pub-id-type="doi">10.3389/fped.2023.1097950</pub-id><pub-id pub-id-type="pmid">37082702</pub-id></mixed-citation></ref>
<ref id="B47"><label>47.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sharma</surname> <given-names>P</given-names></name> <name><surname>Gearhart</surname> <given-names>A</given-names></name> <name><surname>Luo</surname> <given-names>G</given-names></name> <name><surname>Palepu</surname> <given-names>A</given-names></name> <name><surname>Wang</surname> <given-names>C</given-names></name> <name><surname>Mayourian</surname> <given-names>J</given-names></name><etal/></person-group> <article-title>Development and validation of a novel deep learning model to predict pharmacologic closure of patent ductus arteriosus in premature infants</article-title>. <source>J Am Soc Echocardiogr</source>. (<year>2025</year>) <volume>38</volume>(<issue>7</issue>):<fpage>S0894731725002093</fpage>. <pub-id pub-id-type="doi">10.1016/j.echo.2025.03.018</pub-id></mixed-citation></ref>
<ref id="B48"><label>48.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jura</surname> <given-names>AMC</given-names></name> <name><surname>Popescu</surname> <given-names>DE</given-names></name> <name><surname>C&#x00EE;tu</surname> <given-names>C</given-names></name> <name><surname>Biri&#x0219;</surname> <given-names>M</given-names></name> <name><surname>Pienar</surname> <given-names>C</given-names></name> <name><surname>Paul</surname> <given-names>C</given-names></name><etal/></person-group> <article-title>Predicting risk for patent ductus arteriosus in the neonate: a machine learning analysis</article-title>. <source>Medicina (B Aires)</source>. (<year>2025</year>) <volume>61</volume>(<issue>4</issue>):<fpage>603</fpage>. <pub-id pub-id-type="doi">10.3390/medicina61040603</pub-id></mixed-citation></ref>
<ref id="B49"><label>49.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Matsushita</surname> <given-names>FY</given-names></name> <name><surname>Krebs</surname> <given-names>VLJ</given-names></name> <name><surname>De Carvalho</surname> <given-names>WB</given-names></name></person-group>. <article-title>Identifying two distinct subphenotypes of patent ductus arteriosus in preterm infants using machine learning</article-title>. <source>Eur J Pediatr</source>. (<year>2023</year>) <volume>182</volume>(<issue>5</issue>):<fpage>2173</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1007/s00431-023-04882-9</pub-id><pub-id pub-id-type="pmid">36853570</pub-id></mixed-citation></ref>
<ref id="B50"><label>50.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>SY</given-names></name> <name><surname>Zhang</surname> <given-names>YD</given-names></name> <name><surname>Li</surname> <given-names>H</given-names></name> <name><surname>Wang</surname> <given-names>QY</given-names></name> <name><surname>Ye</surname> <given-names>QF</given-names></name> <name><surname>Wang</surname> <given-names>XM</given-names></name><etal/></person-group> <article-title>Explainable machine learning model for predicting decline in platelet count after interventional closure in children with patent ductus arteriosus</article-title>. <source>Front Pediatr</source>. (<year>2025</year>) <volume>13</volume>:<fpage>1519002</fpage>. <pub-id pub-id-type="doi">10.3389/fped.2025.1519002</pub-id><pub-id pub-id-type="pmid">39981204</pub-id></mixed-citation></ref>
<ref id="B51"><label>51.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Reddy</surname> <given-names>CD</given-names></name> <name><surname>Lopez</surname> <given-names>L</given-names></name> <name><surname>Ouyang</surname> <given-names>D</given-names></name> <name><surname>Zou</surname> <given-names>JY</given-names></name> <name><surname>He</surname> <given-names>B</given-names></name></person-group>. <article-title>Video-Based deep learning for automated assessment of left ventricular ejection fraction in pediatric patients</article-title>. <source>J Am Soc Echocardiogr</source>. (<year>2023</year>) <volume>36</volume>(<issue>5</issue>):<fpage>482</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1016/j.echo.2023.01.015</pub-id><pub-id pub-id-type="pmid">36754100</pub-id></mixed-citation></ref>
<ref id="B52"><label>52.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sehgal</surname> <given-names>A</given-names></name> <name><surname>Paul</surname> <given-names>E</given-names></name> <name><surname>Menahem</surname> <given-names>S</given-names></name></person-group>. <article-title>Functional echocardiography in staging for ductal disease severity&#x202F;: role in predicting outcomes</article-title>. <source>Eur J Pediatr</source>. (<year>2013</year>) <volume>172</volume>(<issue>2</issue>):<fpage>179</fpage>&#x2013;<lpage>84</lpage>. <pub-id pub-id-type="doi">10.1007/s00431-012-1851-0</pub-id><pub-id pub-id-type="pmid">23052621</pub-id></mixed-citation></ref>
<ref id="B53"><label>53.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fink</surname> <given-names>D</given-names></name> <name><surname>El-Khuffash</surname> <given-names>A</given-names></name> <name><surname>McNamara</surname> <given-names>PJ</given-names></name> <name><surname>Nitzan</surname> <given-names>I</given-names></name> <name><surname>Hammerman</surname> <given-names>C</given-names></name></person-group>. <article-title>Tale of two patent ductus arteriosus severity scores: similarities and differences</article-title>. <source>Am J Perinatol</source>. (<year>2018</year>) <volume>35</volume>(<issue>1</issue>):<fpage>55</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1055/s-0037-1605576</pub-id><pub-id pub-id-type="pmid">28787748</pub-id></mixed-citation></ref>
<ref id="B54"><label>54.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Giesinger</surname> <given-names>RE</given-names></name> <name><surname>Hobson</surname> <given-names>AA</given-names></name> <name><surname>Bischoff</surname> <given-names>AR</given-names></name> <name><surname>Klein</surname> <given-names>JM</given-names></name> <name><surname>McNamara</surname> <given-names>PJ</given-names></name></person-group>. <article-title>Impact of early screening echocardiography and targeted PDA treatment on neonatal outcomes in &#x201C;22&#x2013;23&#x201D; week and &#x201C;24&#x2013;26&#x201D; infants</article-title>. <source>Semin Perinatol</source>. (<year>2023</year>) <volume>47</volume>(<issue>2</issue>):<fpage>151721</fpage>. <pub-id pub-id-type="doi">10.1016/j.semperi.2023.151721</pub-id><pub-id pub-id-type="pmid">36882362</pub-id></mixed-citation></ref>
<ref id="B55"><label>55.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Masutani</surname> <given-names>S</given-names></name> <name><surname>Isayama</surname> <given-names>T</given-names></name> <name><surname>Kobayashi</surname> <given-names>T</given-names></name> <name><surname>Pak</surname> <given-names>K</given-names></name> <name><surname>Tomotaki</surname> <given-names>S</given-names></name> <name><surname>Iwami</surname> <given-names>H</given-names></name><etal/></person-group> <article-title>Generation of PLASE score for patent ductus arteriosus using the PLASE study database</article-title>. <source>Pediatr Res</source>. (<year>2025</year>) <volume>98</volume>:<fpage>152</fpage>&#x2013;<lpage>60</lpage>. <pub-id pub-id-type="doi">10.1038/s41390-025-03803-w</pub-id><pub-id pub-id-type="pmid">39922923</pub-id></mixed-citation></ref>
<ref id="B56"><label>56.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gearhart</surname> <given-names>A</given-names></name> <name><surname>Elrod</surname> <given-names>M</given-names></name> <name><surname>Gomes</surname> <given-names>T</given-names></name> <name><surname>Golbus</surname> <given-names>A</given-names></name> <name><surname>Baltimore</surname> <given-names>C</given-names></name> <name><surname>Wakser</surname> <given-names>C</given-names></name><etal/></person-group> <article-title>Abstract 4135928: externally validated deep learning model for patent ductus arteriosus detection by echocardiography in preterm infants</article-title>. <source>Circulation</source>. (<year>2024</year>) <volume>150</volume>(<issue>Suppl_1</issue>):<fpage>A4135928</fpage>. <pub-id pub-id-type="doi">10.1161/circ.150.suppl_1.4135928</pub-id></mixed-citation></ref></ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by"><p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/264489/overview">Fernando Caba&#x00F1;as</ext-link>, Quironsalud Madrid University Hospital, Spain</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by"><p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1485434/overview">Xin Zhang</ext-link>, Capital Medical University, China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3157434/overview">Ioana Rosca</ext-link>, Carol Davila University of Medicine and Pharmacy, Romania</p></fn>
</fn-group>
</back>
</article>