<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Cell. Infect. Microbiol.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Cellular and Infection Microbiology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Cell. Infect. Microbiol.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2235-2988</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fcimb.2026.1740707</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Explainable machine learning for early detection of <italic>Escherichia coli</italic> urinary tract infections: integrating SHAP interpretation and bacterial epidemiology</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Zhang</surname><given-names>Jie</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn003"><sup>&#x2020;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Jiang</surname><given-names>Ying-Ying</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn003"><sup>&#x2020;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Zhu</surname><given-names>Ying</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn003"><sup>&#x2020;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Pan</surname><given-names>Chu-Ying</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Yao</surname><given-names>Ling-Hui</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Zheng</surname><given-names>Ying-Ying</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Zhang</surname><given-names>Shi-Yan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<xref ref-type="author-notes" rid="fn004"><sup>&#x2021;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2804534/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project-administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Shi</surname><given-names>Jinbao</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<xref ref-type="author-notes" rid="fn004"><sup>&#x2021;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project-administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Department of Clinical Laboratory, Fuding Hospital, Fujian University of Traditional Chinese Medicine</institution>, <city>Fuding</city>, <state>Fujian</state>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>Department of Nephrology, Fuding Hospital, Fujian University of Traditional Chinese Medicine</institution>, <city>Fuding</city>, <state>Fujian</state>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff3"><label>3</label><institution>Department of Nephrology, Ningde Hospital of Traditional Chinese Medicine</institution>, <city>Ningde</city>, <state>Fujian</state>,&#xa0;<country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Shi-Yan Zhang, <email xlink:href="mailto:myebox@139.com">myebox@139.com</email>; Jinbao Shi, <email xlink:href="mailto:1301803387@qq.com">1301803387@qq.com</email></corresp>
<fn fn-type="equal" id="fn003">
<label>&#x2020;</label>
<p>These authors have contributed equally to this work</p></fn>
<fn fn-type="other" id="fn004">
<label>&#x2021;</label>
<p>ORCID: Shi-Yan Zhang, <uri xlink:href="https://orcid.org/0000-0003-4305-8213">orcid.org/0000-0003-4305-8213</uri>; Jinbao Shi, <uri xlink:href="https://orcid.org/0009-0009-2663-8030">orcid.org/0009-0009-2663-8030</uri></p></fn>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-13">
<day>13</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>16</volume>
<elocation-id>1740707</elocation-id>
<history>
<date date-type="received">
<day>06</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>09</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="rev-recd">
<day>04</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Zhang, Jiang, Zhu, Pan, Yao, Zheng, Zhang and Shi.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Zhang, Jiang, Zhu, Pan, Yao, Zheng, Zhang and Shi</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-13">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Background</title>
<p><italic>Escherichia coli</italic> is the predominant uropathogen in urinary tract infections (UTIs), but culture-based identification is time-consuming. This study aimed to develop an explainable, culture-independent model to distinguish <italic>E. coli</italic> from other uropathogens using routinely collected clinical data.</p>
</sec>
<sec>
<title>Methods</title>
<p>We retrospectively analyzed 308 hospitalized patients with culture-confirmed UTIs at Fuding Hospital, Fujian University of Traditional Chinese Medicine (January&#x2013;December 2023), classified as <italic>E. coli</italic> (n = 158) or non&#x2013;<italic>E. col</italic>i (n = 150). Species identification was performed using an automated microbiology system. Nineteen predictors (sex, urinary leukocyte grade, and 17 routine laboratory variables) were used. Associations with <italic>E. coli</italic> UTI were examined using univariate and multivariable logistic regression. A Random Forest (RF) classifier was developed with SHapley Additive exPlanations (SHAP) for interpretability. Data were split using a stratified 70/30 train&#x2013;test split; 5-fold stratified cross-validation within the training set was used for hyperparameter tuning, and final performance (discrimination and calibration) was reported on the held-out test set. RF was additionally benchmarked against regularized logistic regression, calibrated linear SVM, and gradient boosting using the same protocol.</p>
</sec>
<sec>
<title>Results</title>
<p><italic>E. coli</italic> accounted for 51.3% of isolates, followed by <italic>Enterococcus</italic> spp. (18.5%) and <italic>Klebsiella</italic> spp. (7.8%). Compared with non&#x2013;<italic>E. coli</italic> cases, <italic>E. coli</italic> infections were more common in females and showed higher lymphocyte counts (LYM), alanine aminotransferase (ALT), and albumin (ALB) (all P &lt; 0.05). Multivariable logistic regression identified sex, LYM, and urinary leukocyte grade as independent predictors. On the held-out test set, RF achieved moderate discrimination (ROC-AUC = 0.66; average precision = 0.66) with calibration assessed by Brier score and calibration slope. SHAP highlighted Sex, LYM, and ALT as the most influential predictors and revealed patient-level heterogeneity in feature effects.</p>
</sec>
<sec>
<title>Conclusions</title>
<p><italic>E. coli</italic> remains the predominant pathogen among hospitalized UTIs. An explainable RF model using routine laboratory variables provided moderate, reproducible discrimination of <italic>E. coli</italic> vs non&#x2013;<italic>E. coli</italic> UTIs and may support earlier decision-making while awaiting culture results.</p>
</sec>
</abstract>
<kwd-group>
<kwd>biomarkers</kwd>
<kwd><italic>Escherichia coli</italic></kwd>
<kwd>machine learning</kwd>
<kwd>Random Forest</kwd>
<kwd>SHAP</kwd>
<kwd>urinary tract infection</kwd>
<kwd>urine culture</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. Financial support from Project on Clinical Research of Fujian University of Traditional Chinese Medicine, China, Grant/Award Number: XB2024107.</funding-statement>
</funding-group>
<counts>
<fig-count count="9"/>
<table-count count="7"/>
<equation-count count="0"/>
<ref-count count="25"/>
<page-count count="16"/>
<word-count count="7445"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Clinical and Diagnostic Microbiology and Immunology</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<title>Introduction</title>
<p>Urinary tract infections (UTIs) are among the most prevalent bacterial infections across all age groups, with particularly high incidence in women and older adults (<xref ref-type="bibr" rid="B1">Mancuso et&#xa0;al., 2023</xref>). Between 1990 and 2019, the global number of UTI cases increased substantially, with cases rising from approximately 252 million to 405 million (<xref ref-type="bibr" rid="B2">Yang et&#xa0;al., 2022</xref>). <italic>Escherichia coli</italic> remains a leading uropathogen, accounting for a large proportion of uncomplicated community-acquired UTIs and remaining among the most common pathogens in healthcare-associated infections (<xref ref-type="bibr" rid="B3">Zagaglia et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B4">Chowdhury et&#xa0;al., 2024</xref>). Although empirical antibiotic therapy is often effective, timely and accurate pathogen identification is essential to guide targeted treatment, reduce antimicrobial misuse, and prevent serious complications such as pyelonephritis and urosepsis (<xref ref-type="bibr" rid="B5">Chardavoyne and Kasmire, 2020</xref>).</p>
<p>Standard diagnostic approaches, notably urine culture combined with biochemical identification methods, are widely regarded as the gold standard for confirming UTIs (<xref ref-type="bibr" rid="B6">Nelson et&#xa0;al., 2024</xref>). However, the turnaround time is typically 24&#x2013;48 hours for organism identification and may extend to 48&#x2013;72 hours when susceptibility results are included, which can delay clinical decision-making and increase reliance on broad-spectrum empirical antibiotics. Accordingly, diagnostic stewardship strategies have emphasized earlier risk stratification and targeted testing to optimize antimicrobial use (<xref ref-type="bibr" rid="B7">Morado and Wong, 2022</xref>).</p>
<p>In this context, machine learning (ML) techniques have garnered increasing interest for enhancing diagnostic precision by leveraging structured clinical data (<xref ref-type="bibr" rid="B9">Jeng et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B8">Shen et&#xa0;al., 2024</xref>). To support early, culture-independent differentiation of <italic>E. coli</italic> from non&#x2013;<italic>E. coli</italic> UTIs using routinely available laboratory variables, we selected the Random Forest (RF) algorithm as a pragmatic, widely used baseline model because it can capture nonlinear relationships and higher-order feature interactions that are common in clinical laboratory data without requiring prespecified transformations (<xref ref-type="bibr" rid="B10">Barre&#xf1;ada et&#xa0;al., 2024</xref>). RF is well suited to mixed-type predictors and correlated laboratory features, and it integrates naturally with tree-based SHAP (TreeExplainer) to enable clinically interpretable global and patient-level explanations (<xref ref-type="bibr" rid="B10">Barre&#xf1;ada et&#xa0;al., 2024</xref>).</p>
<p>While ML models have been applied to infection-related tasks (e.g., recurrence or antimicrobial resistance prediction), evidence remains relatively limited for culture-independent pathogen differentiation in hospitalized UTI cohorts using only routinely available clinical and laboratory data. For example, a <italic>Scientific Reports</italic> study reported an AUC of 0.88 for identifying <italic>E. coli</italic> infections in elderly sepsis patients (<xref ref-type="bibr" rid="B11">Li et&#xa0;al., 2024</xref>). However, the target condition (sepsis) and clinical context differ substantially from UTIs, and the results are therefore not directly comparable. Similarly, a 2025 cohort study using 8,065 urinalysis and demographic records achieved an AUC of 0.79 for predicting overall urine culture positivity, but it was not designed to distinguish <italic>E. coli</italic> from other uropathogens among confirmed UTI patients (<xref ref-type="bibr" rid="B12">Sergounioti et&#xa0;al., 2025</xref>). These differences in outcome definition, population, and data sources underscore the need for task-specific models in hospitalized UTI settings.</p>
<p>In this study, we developed and internally validated an RF&#x2013;based model to differentiate <italic>E. coli</italic>&#x2013;associated UTIs from those caused by other uropathogens in hospitalized patients, based on retrospectively collected clinical data. We hypothesized that routinely available inflammatory and biochemical markers exhibit distinct profiles between pathogen groups, enabling clinically useful risk stratification prior to culture results. This model is intended to facilitate early, culture-independent decision support and potentially inform more timely and targeted clinical management in patients with suspected UTIs.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<title>Materials and methods</title>
<sec id="s2_1">
<title>Study design and data source</title>
<p>This retrospective cohort study was conducted at Fuding Hospital, Fujian University of Traditional Chinese Medicine. We consecutively screened hospitalized patients with a diagnosis of UTI and available urine culture results between January and December 2023. For patients with repeated admissions during the study period, only the first eligible admission was retained to avoid within-patient correlation. Records missing the primary outcome (urine culture&#x2013;based pathogen group) were excluded; missing values in predictors were handled as described in the Data preprocessing section (imputation within the modeling pipeline). The overall study design and workflow are summarized in <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Study design and patient selection flowchart.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcimb-16-1740707-g001.tif">
<alt-text content-type="machine-generated">Flowchart detailing a study on urinary tract infections (UTIs). Patient selection criteria include midstream urine culture results and clinical UTI symptoms. Exclusions cover immunocompromised patients, severe hepatic/renal failure, and more. The study includes 308 patients, with 150 in the non-Escherichia coli group and 158 in the Escherichia coli group. Data collection involves clinical and lab data, demographics, and urine tests. Data analysis methods include descriptive statistics, logistic regression, correlation analysis, machine learning (Random Forest), and performance metrics. Interpretability is assessed using feature importance and SHAP values.</alt-text>
</graphic></fig>
<p>Before analysis, the electronic medical record extract was pseudonymized (de-identified) by the hospital information team. All direct personal identifiers (e.g., name, national ID number, phone number, and address) were removed and each record was assigned a unique study code. The code key linking study codes to patient identities was stored separately by the hospital and was not accessible to the research team. Therefore, the analytic dataset contained no direct identifiers and included only variables required for this study.</p>
<p>The study protocol was approved by the Ethics Committee of Fuding Hospital, Fujian University of Traditional Chinese Medicine (Approval No. 2024015). All data were de-identified (anonymized) by the hospital information team prior to analysis, and the linkage key was not accessible to the research team, ensuring compliance with ethical and privacy standards. Due to the retrospective nature of the study, the requirement for written informed consent was waived by the ethics committee. The study was conducted in accordance with applicable local regulations and the Declaration of Helsinki.</p>
<p>Participants were classified into two groups based on urine culture results (midstream clean-catch specimens; catheter specimens were handled according to routine clinical practice, if applicable):</p>
<p>Non<italic>-E. coli</italic> group (Group 0, n = 150): Patients infected with uropathogens other than <italic>E. coli</italic>, including <italic>Klebsiella pneumoniae</italic>, <italic>Acinetobacter baumannii</italic>, <italic>Enterococcus</italic> spp., and other bacterial species.</p>
<p><italic>E. coli</italic> group (Group 1, n = 158): Patients infected with <italic>E. coli</italic> identified as the predominant uropathogen.</p>
<p>Inclusion criteria comprised a positive urine culture with bacterial colony counts meeting standard clinical microbiology thresholds: &#x2265;10<sup>5</sup> colony-forming units (CFU)/mL, or 10<sup>4</sup>&#x2013;10<sup>5</sup> CFU/mL accompanied by clinical signs and symptoms consistent with UTIs. Exclusion criteria included: (1) immunocompromised status (e.g., human immunodeficiency virus (HIV) infection or long-term immunosuppressive therapy), (2) severe hepatic or renal failure, (3) disseminated malignancy, (4) concomitant infections outside the urinary tract, or (5) recent antibiotic use within 2 weeks before hospital admission to reduce culture-negative misclassification and biomarker distortion.</p>
</sec>
<sec id="s2_2">
<title>Data collection and biomarker assessment</title>
<p>Demographic characteristics (age, sex) and laboratory test results were extracted from the electronic medical records. All laboratory analyses were performed in accordance with standard operating procedures for clinical diagnostics and were those obtained at/near the time of urine culture collection (index encounter).</p>
</sec>
<sec id="s2_3">
<title>Blood and urine sample collection</title>
<p>Peripheral venous blood samples were collected under sterile conditions according to routine clinical practice as follows:</p>
<p>2.0 mL in EDTA tubes for complete blood count (CBC);</p>
<p>2.0 mL in citrate tubes for coagulation testing (e.g., D-dimer);</p>
<p>5.0 mL in plain tubes for serum biochemical analyses.</p>
<p>Urine specimens for urinalysis and microbial culture were collected using standard clinical procedures. Midstream clean-catch urine was preferred; when clean-catch was not feasible (e.g., catheterized patients), urine was collected via catheter specimen according to hospital protocol. Samples were obtained in sterile containers and transported to the microbiology laboratory promptly for analysis.</p>
</sec>
<sec id="s2_4">
<title>Microbial culture and identification</title>
<p>Urine cultures were performed by inoculating 1 &#x3bc;L of well-mixed urine onto 5% sheep blood agar plates using a calibrated loop. Samples were streaked in a standardized pattern and incubated at 35&#x2013;37&#xb0;C under aerobic conditions for 18&#x2013;24 hours, with extended incubation up to 48 hours when needed. Where appropriate, MacConkey agar was used in parallel for differentiation of Gram-negative bacilli. Significant growth was defined as &#x2265;10<sup>5</sup> CFU/mL, or 10<sup>4</sup>&#x2013;10<sup>5</sup> CFU/mL in symptomatic patients. Urine specimens were primarily midstream clean-catch; catheter specimens were processed according to the same laboratory protocol when applicable.</p>
<p>Preliminary identification was based on colony morphology and Gram staining. Definitive species-level identification was conducted using the VITEK MS mass spectrometry system (bioM&#xe9;rieux, France), following the manufacturer&#x2019;s protocols. Internal quality control was maintained using standard reference strains, such as <italic>Escherichia coli</italic> ATCC 25922.</p>
</sec>
<sec id="s2_5">
<title>Biomarker measurements</title>
<sec id="s2_5_1">
<title>Inflammatory markers</title>
<p>PCT levels were measured using electrochemiluminescence immunoassay (MCL60, Rismay, Nanjing, China); CRP levels were measured using latex-enhanced immunoturbidimetry (BC-7500CS, Mindray, Shenzhen, China).</p>
</sec>
<sec id="s2_5_2">
<title>Hematological parameters</title>
<p>CBC indices, including neutrophils, LYM, hemoglobin (HGB), red cell distribution width (RDW), and platelets (PLT) were analyzed using an automated hematology analyzer (BC-7500CS, Mindray, Shenzhen, China).</p>
</sec>
<sec id="s2_5_3">
<title>Coagulation markers</title>
<p>D-dimer levels were quantified by immunoturbidimetry (ExC810, Mindray, Shenzhen, China).</p>
</sec>
<sec id="s2_5_4">
<title>Biochemical markers</title>
<p>Serum levels of ALB, blood urea nitrogen (BUN), glucose (GLU), total bilirubin (TBIL), high-density lipoprotein (HDL), total cholesterol (CHO), ALT, and aspartate aminotransferase (AST) were measured using the AU5800 automated biochemical analyzer (Beckman Coulter, USA).</p>
</sec>
<sec id="s2_5_5">
<title>Urinalysis</title>
<p>Urinary leukocyte (urinary WBC) counts were assessed using an automated urine analyzer (UF-500i, Sysmex, Japan) and recorded as semi-quantitative grades (ordinal categories) according to the analyzer output, consistent with the modeling strategy.</p>
</sec>
</sec>
<sec id="s2_6">
<title>Random forest modeling</title>
<p>Random forest modeling was performed using Python 3.7. The computational environment incorporated the following libraries: NumPy (v1.21.0) for numerical operations, Pandas (v1.3.0) for data manipulation, Scikit-learn (v0.24.2) for machine learning implementation and performance evaluation, and Matplotlib (v3.4.2) for visualization.</p>
</sec>
<sec id="s2_7">
<title>Data preprocessing</title>
<sec id="s2_7_1">
<title>Missing data handling</title>
<p>Missing values in predictors were imputed using the median within the modeling pipeline (SimpleImputer, strategy = &#x201c;median&#x201d;). Imputation parameters were learned from the training data only; within five-fold cross-validation, the imputer was fit on each fold&#x2019;s training subset and applied to its corresponding validation subset, and the final imputer was then refit on the full training set and applied to the held-out test set to prevent information leakage. Variable-wise missingness rates are reported in <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table S1</bold></xref>.</p>
</sec>
<sec id="s2_7_2">
<title>Categorical encoding</title>
<p>Sex was coded as a single binary indicator, and urinary leukocyte grade (Urinary_WBC) was treated as an ordinal variable. No nominal categorical predictors were included in the final model; therefore, one-hot encoding was not required.</p>
</sec>
<sec id="s2_7_3">
<title>Feature dimensionality</title>
<p>In total, the RF model used 19 predictors (17 continuous laboratory variables plus Sex and urinary leukocyte grade), yielding a feature matrix of 308 &#xd7; 19 prior to the train/test split.</p>
</sec>
<sec id="s2_7_4">
<title>Model validation</title>
<p>The dataset was split once into a training set (70%) and a held-out test set (30%) using stratification by the outcome. Five-fold stratified cross-validation was performed within the training set for hyperparameter tuning and internal performance estimation. All final discrimination, calibration, and operating-point metrics were reported on the held-out test set.</p>
</sec>
</sec>
<sec id="s2_8">
<title>Model evaluation metrics</title>
<sec id="s2_8_1">
<title>Classification performance</title>
<p>Model performance was assessed using standard classification metrics derived from the confusion matrix, including true positives (TP), false positives (FP), true negatives (TN), and false negatives (FN). The following indices were calculated:</p>
<p>Precision = TP/(TP + FP).</p>
<p>Recall (Sensitivity) = TP/(TP + FN).</p>
<p>F1-score = 2 &#xd7; (Precision &#xd7; Recall)/(Precision + Recall).</p>
<p>ROC curves were plotted to evaluate the model&#x2019;s discriminative capacity. The AUC was computed as a global measure of diagnostic discrimination. Uncertainty for AUC, AP, Brier score, and operating-point metrics was quantified by bootstrap resampling of the held-out test set (1,000 iterations), with 95% CIs defined by the 2.5th and 97.5th percentiles.</p>
<p>In addition, Precision&#x2013;Recall (P&#x2013;R) curves were generated to characterize model performance in identifying the positive class (<italic>E.&#xa0;coli</italic> infection). The AP score was used to summarize the trade-off between precision and recall across thresholds. Although class proportions were similar in this cohort, PR-based evaluation remains informative for assessing positive-class performance and threshold-dependent trade-offs.</p>
<p>Clinically meaningful operating points were reported at the default threshold (0.50) and at a threshold selected on the training set using the Youden index computed from five-fold out-of-fold predicted probabilities, which was then applied to the held-out test set.</p>
</sec>
<sec id="s2_8_2">
<title>Model calibration</title>
<p>Calibration was evaluated using the Brier score (mean squared error between predicted probabilities and observed outcomes) and the calibration slope, estimated by fitting a logistic regression model of the outcome on the logit-transformed predicted probabilities. A calibration curve (reliability diagram) was generated to visually assess agreement between predicted and observed risks.</p>
</sec>
</sec>
<sec id="s2_9">
<title>Statistical analysis</title>
<p>All statistical analyses were performed using SPSS software (version 22.0, IBM Corp., Armonk, NY, USA). The distribution of continuous variables was assessed using the Shapiro&#x2013;Wilk test. Variables conforming to normal distribution were expressed as mean &#xb1; standard deviation (SD). Non-normally distributed variables were reported as median with interquartile range (IQR) and analyzed using the Mann&#x2013;Whitney U test. Categorical variables were compared using the chi-square (&#x3c7;&#xb2;) test.</p>
<p>To identify independent predictors of <italic>E. coli</italic> infection, univariate logistic regression was first conducted. Variables with a p-value &lt; 0.20 were subsequently entered into a multivariable logistic regression model using a backward stepwise elimination strategy. Adjusted odds ratios (ORs) and corresponding 95% confidence intervals (CIs) were reported. All statistical tests were two-tailed, and a P value &lt; 0.05 was considered statistically significant.</p>
</sec>
<sec id="s2_10">
<title>Random forest model and hyperparameter tuning</title>
<p>A RF classifier (scikit-learn) was trained to distinguish between <italic>E. coli</italic> (Group = 1) and non&#x2013;<italic>E. coli</italic> (Group = 0) UTIs. Missing predictor values were imputed using a median strategy within a pipeline. Imputation parameters were estimated from the training set only, and then applied to the held-out test set to prevent information leakage. The dataset was split once using a stratified 70/30 train-test split (random_state = 42).</p>
<p>Hyperparameter tuning was conducted on the training set using RandomizedSearchCV with 5-fold stratified cross-validation, optimizing ROC-AUC. The candidate search space included: n_estimators &#x2208; {300, 500, 800, 1200}, max_depth &#x2208; {None, 3, 5, 7, 10, 15}, min_samples_split &#x2208; {2, 5, 10, 20}, min_samples_leaf &#x2208; {1, 2, 3, 5, 8}, and max_features &#x2208; {&#x201c;sqrt&#x201d;, &#x201c;log2&#x201d;, 0.3, 0.5, 0.8}. To address potential class imbalance, the class_weight parameter was set to &#x201c;balanced&#x201d;. The final RF hyperparameters selected by RandomizedSearchCV were: n_estimators = 300, max_depth = 15, min_samples_split = 10, min_samples_leaf = 8, max_features = log2, with class_weight = &#x201c;balanced&#x201d; and random_state = 42.</p>
</sec>
<sec id="s2_11">
<title>Baseline models and robustness checks</title>
<p>To contextualize the RF model and reduce the risk of model-specific findings, we benchmarked RF against commonly used baseline classifiers trained on the same feature set and data split. These baselines included (i) regularized logistic regression (L2-penalized), (ii) support vector machine (SVM), and (iii) gradient boosting (tree-based boosting). For all models, missing predictors were imputed using the same median-imputation strategy within a training-only pipeline to prevent information leakage; continuous features were standardized for linear models (logistic regression/SVM). Hyperparameters were tuned using stratified cross-validation within the training set, and final performance was reported on the held-out stratified test set using the same discrimination (ROC-AUC, PR-AUC) and calibration metrics (Brier score; calibration intercept/slope).</p>
</sec>
<sec id="s2_12">
<title>Feature importance and SHAP analysis</title>
<p>To interpret the RF model, we used SHAP to quantify each feature&#x2019;s contribution to the predicted probability of <italic>E. coli</italic> at the individual level. SHAP values were computed using TreeExplainer (TreeSHAP) following the framework described by Lundberg et&#xa0;al (<xref ref-type="bibr" rid="B13">Lundberg et&#xa0;al., 2020</xref>), which provides consistent additive attributions for tree-based models and can reflect nonlinear and interaction-driven effects. Global importance was summarized as the mean absolute SHAP value computed on the held-out test set, and signed SHAP summary (beeswarm) plots were used to visualize directionality and heterogeneity. SHAP analyses were conducted using the Python SHAP package (v0.45.1).</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<title>Results</title>
<sec id="s3_1">
<title>Normality testing of variables</title>
<p>The Shapiro&#x2013;Wilk test revealed that most continuous variables, including CRP, ALT, AST, PCT, and D-dimer, deviated from a normal distribution in both groups (all P &lt; 0.05). Only HGB) and ALB in the non-<italic>E. coli</italic> group demonstrated normal distribution (P&#xa0;= 0.309 and 0.773, respectively). Accordingly, non-parametric tests (e.g., Mann&#x2013;Whitney U test) were employed for intergroup comparisons of non-normally distributed variables.</p>
</sec>
<sec id="s3_2">
<title>Distribution of microorganisms</title>
<p>A total of 308 bacterial isolates were identified from patients with UTIs. As shown in <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>, <italic>E. coli</italic> was the most prevalent pathogen, comprising 158 isolates (51.3%), followed by <italic>Enterococcus</italic> spp. (57, 18.5%) and <italic>Klebsiella</italic> spp. (24, 7.8%). Other detected organisms included non-fermenting Gram-negative bacilli (20, 6.5%), <italic>Streptococcus</italic> spp. (14, 4.5%), <italic>Staphylococcus</italic> spp. (2, 0.7%), and other members of the <italic>Enterobacteriaceae</italic> family (33, 10.7%). A detailed breakdown of bacterial species and their respective proportions is presented in <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Distribution of bacterial isolates in patients with urinary tract infections: The donut chart illustrates the proportional representation of 308 isolates across taxonomic groups.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcimb-16-1740707-g002.tif">
<alt-text content-type="machine-generated">Pie chart showing bacterial distribution in a sample of 308. Escherichia coli represents the largest portion with 158 cases (51.3%), followed by Enterococcus spp. with 57 cases (18.5%), and Klebsiella spp. with 24 cases (7.8%). Other categories include Non-fermenters (6.5%), Streptococcus spp. (4.5%), Staphylococcus spp. (0.7%), and Other Enterobacteriaceae (10.7%). Each category is color-coded.</alt-text>
</graphic></fig>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Distribution of bacterial isolates identified from midstream urine samples (n = 308).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">No.</th>
<th valign="middle" align="left">Bacterial category</th>
<th valign="middle" align="left">Species</th>
<th valign="middle" align="left">Count</th>
<th valign="middle" align="left">Percentage (%)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">1</td>
<td valign="middle" align="left"><italic>Escherichia coli</italic> (Total = 158)&#x2003;</td>
<td valign="middle" align="left">&#x2013;</td>
<td valign="middle" align="left">158</td>
<td valign="middle" align="left">51.30</td>
</tr>
<tr>
<td valign="middle" align="left">2</td>
<td valign="middle" align="left"><italic>Enterococcus</italic> spp. (Total = 57)</td>
<td valign="middle" align="left"><italic>Enterococcus faecium</italic></td>
<td valign="middle" align="left">32</td>
<td valign="middle" align="left">10.39</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left"/>
<td valign="middle" align="left"><italic>Enterococcus faecalis</italic></td>
<td valign="middle" align="left">25</td>
<td valign="middle" align="left">8.12</td>
</tr>
<tr>
<td valign="middle" align="left">3</td>
<td valign="middle" align="left"><italic>Klebsiella</italic> spp. (Total = 24)</td>
<td valign="middle" align="left"><italic>Klebsiella pneumoniae</italic></td>
<td valign="middle" align="left">21</td>
<td valign="middle" align="left">6.82</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left"/>
<td valign="middle" align="left"><italic>Citrobacter braakii</italic></td>
<td valign="middle" align="left">2</td>
<td valign="middle" align="left">0.65</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left"/>
<td valign="middle" align="left"><italic>Citrobacter freundii</italic></td>
<td valign="middle" align="left">1</td>
<td valign="middle" align="left">0.33</td>
</tr>
<tr>
<td valign="middle" align="left">4</td>
<td valign="middle" align="left">Non-fermenters Gram negative bacilli (Total = 20)</td>
<td valign="middle" align="left"><italic>Acinetobacter baumannii</italic></td>
<td valign="middle" align="left">7</td>
<td valign="middle" align="left">2.27</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left"/>
<td valign="middle" align="left"><italic>Stenotrophomonas maltophilia</italic></td>
<td valign="middle" align="left">7</td>
<td valign="middle" align="left">2.27</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left"/>
<td valign="middle" align="left"><italic>Pseudomonas aeruginosa</italic></td>
<td valign="middle" align="left">5</td>
<td valign="middle" align="left">1.62</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left"/>
<td valign="middle" align="left"><italic>Sphingomonas paucimobilis</italic></td>
<td valign="middle" align="left">1</td>
<td valign="middle" align="left">0.33</td>
</tr>
<tr>
<td valign="middle" align="left">5</td>
<td valign="middle" align="left"><italic>Streptococcus agalactiae</italic> (Total = 14)</td>
<td valign="middle" align="left">&#x2013;</td>
<td valign="middle" align="left">14</td>
<td valign="middle" align="left">4.55</td>
</tr>
<tr>
<td valign="middle" align="left">6</td>
<td valign="middle" align="left"><italic>Staphylococcus</italic> spp. (Total = 2)</td>
<td valign="middle" align="left">&#x2013;</td>
<td valign="middle" align="left">2</td>
<td valign="middle" align="left">0.65</td>
</tr>
<tr>
<td valign="middle" align="left">7</td>
<td valign="middle" align="left">Other <italic>Enterobacteriaceae</italic> (Total = 33)</td>
<td valign="middle" align="left"><italic>Morganella morganii</italic></td>
<td valign="middle" align="left">10</td>
<td valign="middle" align="left">3.25</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left"/>
<td valign="middle" align="left"><italic>Proteus mirabilis</italic></td>
<td valign="middle" align="left">10</td>
<td valign="middle" align="left">3.25</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left"/>
<td valign="middle" align="left"><italic>Enterobacter cloacae</italic></td>
<td valign="middle" align="left">5</td>
<td valign="middle" align="left">1.62</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left"/>
<td valign="middle" align="left"><italic>Hafnia alvei/Edwardsiella tarda</italic></td>
<td valign="middle" align="left">4</td>
<td valign="middle" align="left">1.30</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left"/>
<td valign="middle" align="left"><italic>Serratia marcescens</italic></td>
<td valign="middle" align="left">3</td>
<td valign="middle" align="left">0.97</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left"/>
<td valign="middle" align="left"><italic>Salmonella</italic> spp.</td>
<td valign="middle" align="left">1</td>
<td valign="middle" align="left">0.33</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left">Total</td>
<td valign="middle" align="left">&#x2013;</td>
<td valign="middle" align="left">308</td>
<td valign="middle" align="left">100.00</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bacterial isolates were obtained from midstream urine specimens. Standard culture techniques were applied with internal quality controls using reference strains.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3_3">
<title>Baseline characteristics and laboratory findings</title>
<p>A total of 308 patients were included: 150 in the non-<italic>E. coli</italic> group and 158 in the <italic>E. coli</italic> group. No significant difference was observed in median age between the groups (68.0 vs. 66.0 years, P&#xa0;=&#xa0;0.301). However, gender distribution differed significantly, with a higher proportion of females in the <italic>E. coli</italic> group (74.3% vs. 44.7%, P&#xa0;&lt; 0.001) (<xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>).</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Demographic characteristics of patients in the <italic>Escherichia coli</italic> and non-<italic>E. coli</italic> urinary tract infection groups.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Variable</th>
<th valign="middle" align="center">Non<italic>-E. coli</italic> group (n = 150)</th>
<th valign="middle" align="left"><italic>E. coli</italic> group (n = 158)</th>
<th valign="middle" align="center">P value</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">Age (Year)</td>
<td valign="middle" align="center">68.0 (57.8 - 77.0)</td>
<td valign="middle" align="center">66.0 (55.8 - 75.0)</td>
<td valign="middle" align="center">0.301</td>
</tr>
<tr>
<td valign="middle" align="center">Age range</td>
<td valign="middle" align="center">22.0 - 88.0</td>
<td valign="middle" align="center">23.00 - 91.0</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Gender, n (%)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="center">&lt;0.001</td>
</tr>
<tr>
<td valign="middle" align="center">Female</td>
<td valign="middle" align="center">69 (46.0)</td>
<td valign="middle" align="center">117 (74.1)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Male</td>
<td valign="middle" align="center">81 (54.0)</td>
<td valign="middle" align="center">41 (25.9)</td>
<td valign="middle" align="center"/>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Data are shown as median (interquartile range, IQR) for age and n (%) for categorical variable. Group comparisons were performed using the Mann&#x2013;Whitney U test for non-normally distributed continuous data and the chi-square test (&#x3c7;&#xb2;) for categorical variable.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>Compared to the non -<italic>E. coli</italic> group, the <italic>E. coli</italic> group exhibited significantly higher values of HGB (P = 0.004), LYM (P = 0.003), ALT (P = 0.001), and ALB (P = 0.021). In contrast, RDW (P = 0.016) and D-dimer levels (P = 0.014) were lower in the <italic>E. coli</italic> group (<xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref>).</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Comparison of laboratory parameters between the <italic>Escherichia coli</italic> and non-<italic>E. coli</italic> urinary tract infection groups.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Variable</th>
<th valign="middle" align="center">Non<italic>-E. coli</italic> group (n = 150)</th>
<th valign="middle" align="center"><italic>E. coli</italic> group (n = 158)</th>
<th valign="middle" align="center">P value</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">HGB (g/L)</td>
<td valign="middle" align="center">112.05 &#xb1; 23.26</td>
<td valign="middle" align="left">121.50 (108.75 - 133.00)</td>
<td valign="middle" align="center">0.004</td>
</tr>
<tr>
<td valign="middle" align="left">RDW (%)</td>
<td valign="middle" align="center">14.03 (13.10 - 14.90)</td>
<td valign="middle" align="center">13.90 (12.90 - 14.03)</td>
<td valign="middle" align="center">0.016</td>
</tr>
<tr>
<td valign="middle" align="left">Neutrophils (&#xd7;10<sup>9</sup>/L)</td>
<td valign="middle" align="center">4.28 (3.20 - 6.16)</td>
<td valign="middle" align="center">3.89 (3.10 - 5.43)</td>
<td valign="middle" align="center">0.382</td>
</tr>
<tr>
<td valign="middle" align="left">LYM (&#xd7;10<sup>9</sup>/L)</td>
<td valign="middle" align="center">1.33 (0.943 - 1.78)</td>
<td valign="middle" align="center">1.58 (1.17 -1.92)</td>
<td valign="middle" align="center">0.003</td>
</tr>
<tr>
<td valign="middle" align="left">Platelets (&#xd7;10<sup>9</sup>/L)</td>
<td valign="middle" align="center">223.00 (180.00 - 292.25)</td>
<td valign="middle" align="center">237.50 (189.50 - 303.25)</td>
<td valign="middle" align="center">0.354</td>
</tr>
<tr>
<td valign="middle" align="left">C-reactive protein (mg/L)</td>
<td valign="middle" align="center">10.57 (4.90 - 26.78)</td>
<td valign="middle" align="center">6.63 (4.90 - 17.93)</td>
<td valign="middle" align="center">0.073</td>
</tr>
<tr>
<td valign="middle" align="left">Procalcitonin (ng/mL)</td>
<td valign="middle" align="center">0.11 (0.05 - 0.78)</td>
<td valign="middle" align="center">0.11 (0.07 - 0.78)</td>
<td valign="middle" align="center">0.426</td>
</tr>
<tr>
<td valign="middle" align="left">ALT (U/L)</td>
<td valign="middle" align="center">15.00 (9.00 - 27.00)</td>
<td valign="middle" align="center">20.00 (13.00 - 31.00)</td>
<td valign="middle" align="center">0.001</td>
</tr>
<tr>
<td valign="middle" align="left">AST (IU/L)</td>
<td valign="middle" align="center">20.00 (15.00 - 28.00)</td>
<td valign="middle" align="center">21.00 (17.00 - 28.00)</td>
<td valign="middle" align="center">0.253</td>
</tr>
<tr>
<td valign="middle" align="left">ALB (g/L)</td>
<td valign="middle" align="center">36.07 &#xb1; 5.47</td>
<td valign="middle" align="center">37.90 (33.80 - 41.73)</td>
<td valign="middle" align="center">0.021</td>
</tr>
<tr>
<td valign="middle" align="left">TBIL (&#x3bc;mol/L)</td>
<td valign="middle" align="left">9.20 (6.18 - 12.20)</td>
<td valign="middle" align="center">8.75 (6.70 -11.93)</td>
<td valign="middle" align="center">0.747</td>
</tr>
<tr>
<td valign="middle" align="left">BUN (mmol/L)</td>
<td valign="middle" align="center">5.16 (3.78 - 8.53)</td>
<td valign="middle" align="center">4.90 (3.90 - 6.64)</td>
<td valign="middle" align="center">0.316</td>
</tr>
<tr>
<td valign="middle" align="left">HDL (mmol/L)</td>
<td valign="middle" align="center">1.02 (0.82 - 1.15)</td>
<td valign="middle" align="center">1.01 (0.82 -1.16)</td>
<td valign="middle" align="center">0.886</td>
</tr>
<tr>
<td valign="middle" align="left">Glucose (mmol/L)</td>
<td valign="middle" align="center">6.23 (4.97 - 7.79)</td>
<td valign="middle" align="center">6.28 (5.09 - 7.74)</td>
<td valign="middle" align="center">0.839</td>
</tr>
<tr>
<td valign="middle" align="left">Cholesterol (mmol/L)</td>
<td valign="middle" align="center">4.21 (3.39 - 4.63)</td>
<td valign="middle" align="center">4.21 (3.49 - 4.84)</td>
<td valign="middle" align="center">0.198</td>
</tr>
<tr>
<td valign="middle" align="left">D-dimer (ng/mL)</td>
<td valign="middle" align="center">1.17 (0.61 - 1.71)</td>
<td valign="middle" align="center">0.80 (0.41 - 1.71)</td>
<td valign="middle" align="center">0.014</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Data are presented as median (interquartile range, IQR). Non-normally distributed variables were analyzed using the Mann&#x2013;Whitney U test. HGB, hemoglobin; LYM, lymphocytes; RDW, red cell distribution width; ALT, alanine aminotransferase; AST, aspartate aminotransferase; ALB, albumin; BUN, blood urea nitrogen; TBIL, total bilirubin; HDL, high-density lipoprotein cholesterol.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3_4">
<title>Logistic regression analysis of risk factors</title>
<p>As shown in <xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref>, univariate logistic regression analysis identified eight variables significantly associated with the presence of <italic>E. coli</italic> UTI, including HGB, RDW, LYM, ALT, ALB, BUN, sex, and urinary WBC (grade 1). These variables were subsequently entered into a multivariate logistic regression model to determine independent predictors (<xref ref-type="table" rid="T5"><bold>Table&#xa0;5</bold></xref>).</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Univariate logistic regression analysis of potential predictors for <italic>Escherichia coli</italic> urinary tract infection.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Variable</th>
<th valign="middle" align="center">Reference group</th>
<th valign="middle" align="center">P value</th>
<th valign="middle" align="center">OR (95% CI)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">Age (year)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.351</td>
<td valign="middle" align="center">0.993 (0.977 - 1.008)</td>
</tr>
<tr>
<td valign="middle" align="center">Hemoglobin (g/L)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"><bold>0.009</bold></td>
<td valign="middle" align="center">1.014 (1.003 - 1.025)</td>
</tr>
<tr>
<td valign="middle" align="center">RDW (%)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"><bold>0.061</bold></td>
<td valign="middle" align="center">0.879 (0.768 - 1.006)</td>
</tr>
<tr>
<td valign="middle" align="center">Neutrophils (&#xd7;10<sup>9</sup>/L)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.264</td>
<td valign="middle" align="center">0.962 (0.900 - 1.029)</td>
</tr>
<tr>
<td valign="middle" align="center">LYM (&#xd7;10<sup>9</sup>/L)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"><bold>0.006</bold></td>
<td valign="middle" align="center">1.730 (1.168 - 2.563)</td>
</tr>
<tr>
<td valign="middle" align="center">Platelets (&#xd7;10<sup>9</sup>/L)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.555</td>
<td valign="middle" align="center">1.001 (0.998 - 1.003)</td>
</tr>
<tr>
<td valign="middle" align="center">CRP (mg/L)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.246</td>
<td valign="middle" align="left">0.997 (0.991-1.002)</td>
</tr>
<tr>
<td valign="middle" align="center">PCT (ng/mL)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.921</td>
<td valign="middle" align="center">0.999 (0.980 - 1.019)</td>
</tr>
<tr>
<td valign="middle" align="center">ALT (U/L)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"><bold>0.024</bold></td>
<td valign="middle" align="center">1.014 (1.002 - 1.027)</td>
</tr>
<tr>
<td valign="middle" align="center">AST (U/L)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.293</td>
<td valign="middle" align="center">1.004 (0.996 - 1.013)</td>
</tr>
<tr>
<td valign="middle" align="center">ALB (g/L)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"><bold>0.045</bold></td>
<td valign="middle" align="center">1.043 (1.001 - 1.088)</td>
</tr>
<tr>
<td valign="middle" align="center">TBIL (&#x3bc;mol/L)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.236</td>
<td valign="middle" align="center">1.020 (0.987 - 1.055)</td>
</tr>
<tr>
<td valign="middle" align="center">BUN (mmol/L)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"><bold>0.064</bold></td>
<td valign="middle" align="center">0.955 (0.909 - 1.003)</td>
</tr>
<tr>
<td valign="middle" align="center">HDL (mmol/L)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.990</td>
<td valign="middle" align="center">0.996 (0.517 - 1.917)</td>
</tr>
<tr>
<td valign="middle" align="center">Glucose (mmol/L)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.432</td>
<td valign="middle" align="center">1.023 (0.966 - 1.084)</td>
</tr>
<tr>
<td valign="middle" align="center">D-dimer</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.491</td>
<td valign="middle" align="center">0.976 (0.912 - 1.045)</td>
</tr>
<tr>
<td valign="middle" align="center">Cholesterol (mmol/L)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.127</td>
<td valign="middle" align="center">1.151 (0.961-1.380)</td>
</tr>
<tr>
<td valign="middle" align="center">Sex</td>
<td valign="middle" align="center">Female</td>
<td valign="middle" align="center"><bold>&lt;0.001</bold></td>
<td valign="middle" align="center">0.299 (0.185 - 0.482)</td>
</tr>
<tr>
<td valign="middle" align="center">Urinary WBC (Grade 1)</td>
<td valign="middle" align="center">Grade 0</td>
<td valign="middle" align="center">0.002</td>
<td valign="middle" align="center">2.857 (1.462 - 5.583)</td>
</tr>
<tr>
<td valign="middle" align="center">Urinary WBC (Grade 2)</td>
<td valign="middle" align="center">Grade 0</td>
<td valign="middle" align="center">0.912</td>
<td valign="middle" align="center">0.964 (0.505 - 1.843)</td>
</tr>
<tr>
<td valign="middle" align="center">Urinary WBC (Grade 3)</td>
<td valign="middle" align="center">Grade 0</td>
<td valign="middle" align="center">0.126</td>
<td valign="middle" align="center">1.564 (0.881 - 2.774)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Univariate logistic regression was conducted to evaluate associations between clinical variables and the likelihood of <italic>E. coli</italic> urinary tract infection. OR &gt; 1 indicate increased risk, and OR &lt; 1 indicate decreased risk. Variables with P &lt; 0.20 were considered candidates for multivariate analysis. SE, standard deviation; OR, odds ratio; CI, confidence interval; HGB; hemoglobin; LYM, lymphocytes; RDW, red cell distribution width; CRP, C-reactive protein; PCT, procalcitonin; ALT, alanine aminotransferase; AST, aspartate aminotransferase; ALB, albumin; TBIL, total bilirubin; BUN, blood urea nitrogen; HDL, high-density lipoprotein; WBC, white blood cells.</p></fn>
<fn>
<p>Bold values indicate statistical significance (P &lt;. 0.05).</p></fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>Multivariable logistic regression analysis for independent predictors of <italic>Escherichia coli</italic> urinary tract infects.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Variable</th>
<th valign="middle" align="center">Reference group</th>
<th valign="middle" align="center">P-value</th>
<th valign="middle" align="center">OR (95% CI)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">RDW (%)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.095</td>
<td valign="middle" align="center">0.880 (0.758 - 1.022)</td>
</tr>
<tr>
<td valign="middle" align="center">LYM (&#xd7;10<sup>9</sup>/L)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"><bold>0.016</bold></td>
<td valign="middle" align="center">1.690 (1.104 - 2.587)</td>
</tr>
<tr>
<td valign="middle" align="center">ALT (U/L)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.057</td>
<td valign="middle" align="center">1.013 (1.000 - 1.027)</td>
</tr>
<tr>
<td valign="middle" align="center">Sex</td>
<td valign="middle" align="center">Female</td>
<td valign="middle" align="center"><bold>&lt;0.001</bold></td>
<td valign="middle" align="center">0.299 (0.181 - 0.496)</td>
</tr>
<tr>
<td valign="middle" align="left">Urinary WBC (Grade 1)</td>
<td valign="middle" align="center">Grade 0</td>
<td valign="middle" align="center"><bold>0.002</bold></td>
<td valign="middle" align="center">3.100 (1.514 - 6.346)</td>
</tr>
<tr>
<td valign="middle" align="left">Urinary WBC (Grade 2)</td>
<td valign="middle" align="center">Grade 0</td>
<td valign="middle" align="center">0.771</td>
<td valign="middle" align="center">1.109 (0.553 - 2.224)</td>
</tr>
<tr>
<td valign="middle" align="left">Urinary WBC (Grade 3)</td>
<td valign="middle" align="center">Grade 0</td>
<td valign="middle" align="center">0.080</td>
<td valign="middle" align="center">1.738 (0.936 - 3.226)</td>
</tr>
<tr>
<td valign="middle" align="center">Constant</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">0.466</td>
<td valign="middle" align="center">2.348</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>SE, standard deviation; OR, odds ratio; CI, confidence interval; RDW, red cell distribution width; LYM, lymphocytes; ALT, alanine aminotransferase; WBC, white blood cells.</p></fn>
<fn>
<p>Bold values indicate statistical significance (P &lt;. 0.05).</p></fn>
</table-wrap-foot>
</table-wrap>
<p>The multivariable analysis revealed three statistically significant independent predictors of <italic>E. coli</italic> UTIs: LYM (OR = 1.690, P&#xa0;=&#xa0;0.016); sex (male) (OR = 0.299, P &lt; 0.001); Urinary WBC (Grade 1) (OR = 3.100, P = 0.002) (<xref ref-type="table" rid="T5"><bold>Table&#xa0;5</bold></xref>).</p>
<p>These results indicate that females are more likely to develop <italic>E.&#xa0;coli</italic>-associated UTIs. Additionally, elevated LYM and the presence of urinary WBC (grade 1) significantly increased the risk of <italic>E. coli</italic> UTIs.</p>
</sec>
<sec id="s3_5">
<title>Correlation analysis</title>
<p>Pearson correlation heatmaps were constructed separately for the non-<italic>E. coli</italic> group (Group 0) and the <italic>E. coli</italic> group (Group 1) to examine inter-variable associations (<xref ref-type="fig" rid="f3"><bold>Figures&#xa0;3A, B</bold></xref>).</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Pearson correlation heatmaps of clinical variables in the two groups: <bold>(A)</bold> Non-<italic>Escherichia coli</italic> group (Group 0); <bold>(B)</bold><italic>Escherichia coli</italic> group (Group 1): The heatmaps illustrate pairwise Pearson correlation coefficients (r) among clinical and biochemical variables. Color gradients reflect the strength and direction of associations, from strong positive (red, r = 1.0) to strong negative (blue, r = &#x2212;1.0).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcimb-16-1740707-g003.tif">
<alt-text content-type="machine-generated">Two correlation heatmaps labeled A and B. Heatmap A represents correlations for Group 0, while heatmap B is for Group 1. Both feature variables like Age, RDW, NEU, CRP, and others on the axes. Correlations range from -1.0 to 1.0, shown by a color gradient from blue to red.</alt-text>
</graphic></fig>
<p>In the non-<italic>E. coli</italic> group (Group 0), most variable pairs demonstrated weak correlations (|r| &lt; 0.3). Notable moderate correlations were observed between CRP and neutrophils (r&#xa0;=&#xa0;0.61), between HDL and CHO (r = 0.56), and RDW and HGB (r = 0.56).</p>
<p>In contrast, Group 1 exhibited more pronounced inter-variable relationships. A strong correlation was observed between PCT and AST (r = 0.62), while a moderate correlation was noted between PCT and GLU (r = 0.53).</p>
<p>These distinct patterns suggest that the systemic inflammatory and metabolic responses may differ between <italic>E. coli</italic> and non-<italic>E.&#xa0;coli</italic> UTIs.</p>
</sec>
<sec id="s3_6">
<title>Confusion matrix of the random forest model</title>
<p>The confusion matrix for the random forest model on the test set is shown in <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref>. The model correctly identified 30 patients in the non-<italic>E. coli</italic> group and 29 in the <italic>E. coli</italic> group. Misclassifications included 15 false positives (non-<italic>E. coli</italic> cases incorrectly predicted as <italic>E. coli</italic>) and 19 false negatives. This distribution reflects a relatively balanced classification performance, though slight misclassification bias toward both classes was observed.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Confusion matrix of the Random Forest model on the held-out test set (n = 93) using a fixed decision threshold of 0.50 for predicting <italic>E. coli</italic> (Group = 1). Cells show counts with row-normalized percentages in parentheses (each row sums to 100%), enabling interpretation of error patterns within each true class. The title reports sensitivity (true positive rate) and specificity (true negative rate) computed on the test set at the same threshold.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcimb-16-1740707-g004.tif">
<alt-text content-type="machine-generated">Confusion matrix for a test set with a threshold of 0.50, showing sensitivity of 0.58 and specificity of 0.62. True Non-E. coli is correctly predicted in 28 cases (62.2%) and misclassified in 17 cases (37.8%). True E. coli is correctly predicted in 28 cases (58.3%) and misclassified in 20 cases (41.7%). A color scale on the right indicates row-normalized percentage values.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_7">
<title>Performance of the random forest model</title>
<p>Because the final predictor dimensionality was limited (P = 19) relative to the sample size (n = 308), the model was not exposed to high-dimensional one-hot expansion. Model complexity was further controlled by cross-validated hyperparameter tuning within the training set and evaluated on a held-out test set.</p>
<p>On the held-out test set (n = 93), the RF model achieved an overall accuracy of 0.63. Class-wise precision/recall/F1 were 0.61/0.67/0.64 for the non&#x2013;<italic>E. coli</italic> group and 0.66/0.60/0.63 for the <italic>E. coli</italic> group (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table S2</bold></xref>).</p>
</sec>
<sec id="s3_8">
<title>Discrimination and calibration are summarized</title>
<p>The model showed moderate discrimination with a ROC-AUC of 0.66 (95% CI 0.54&#x2013;0.77) and a PR-AUC of 0.66 (95% CI 0.54&#x2013;0.81). Calibration was acceptable, with a Brier score of 0.233 (95% CI 0.210&#x2013;0.255) and a calibration slope of 1.130 (intercept 0.124) (<xref ref-type="table" rid="T6"><bold>Table&#xa0;6</bold></xref>).</p>
<table-wrap id="T6" position="float">
<label>Table&#xa0;6</label>
<caption>
<p>Discrimination and calibration on the held-out test set (n=93).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Metric</th>
<th valign="middle" align="left">Estimate (95% CI)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">ROC-AUC</td>
<td valign="middle" align="left">0.66 (0.54-0.77)</td>
</tr>
<tr>
<td valign="middle" align="left">PR-AUC (Average precision)</td>
<td valign="middle" align="left">0.66 (0.54-0.81)</td>
</tr>
<tr>
<td valign="middle" align="left">Brier score</td>
<td valign="middle" align="left">0.233 (0.210-0.255)</td>
</tr>
<tr>
<td valign="middle" align="left">Calibration intercept</td>
<td valign="middle" align="left">0.124</td>
</tr>
<tr>
<td valign="middle" align="left">Calibration slope</td>
<td valign="middle" align="left">1.130</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>CI, confidence interval; ROC-AUC, area under the receiver operating characteristic curve; PR-AUC, area under the precision&#x2013;recall curve (average precision, AP). ROC-AUC and PR-AUC were calculated on the held-out test set (n = 93). The Brier score reflects the mean squared error between predicted probabilities and observed outcomes (lower is better). Calibration intercept and slope were estimated by regressing the observed outcome on the logit of predicted probabilities (intercept &#x2248; 0 and slope &#x2248; 1 indicate ideal calibration). 95% confidence intervals for AUC, PR-AUC, and Brier score were obtained by bootstrap resampling of the held-out test set (1,000 iterations).</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3_9">
<title>Clinically meaningful operating points are reported</title>
<p>At the default threshold of 0.50, the model achieved an accuracy of 0.65 (95% CI 0.55&#x2013;0.74) with sensitivity 0.62 (0.49&#x2013;0.76) and specificity 0.67 (0.52&#x2013;0.80) (<xref ref-type="table" rid="T7"><bold>Table&#xa0;7</bold></xref>). Using the Youden threshold determined on the training set (0.476) yielded sensitivity 0.65 (0.52&#x2013;0.79) and specificity 0.60 (0.44&#x2013;0.74).</p>
<table-wrap id="T7" position="float">
<label>Table&#xa0;7</label>
<caption>
<p>Clinically meaningful operating points on the held-out test set (n=93, 95% CI).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Threshold</th>
<th valign="middle" align="left">Accuracy</th>
<th valign="middle" align="left">Sensitivity</th>
<th valign="middle" align="left">Specificity</th>
<th valign="middle" align="left">PPV</th>
<th valign="middle" align="left">NPV</th>
<th valign="middle" align="left">F1</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">0.50 (default)</td>
<td valign="middle" align="left">0.65 (0.55&#x2013;0.74)</td>
<td valign="middle" align="left">0.62 (0.49&#x2013;0.76)</td>
<td valign="middle" align="left">0.67 (0.52-0.80)</td>
<td valign="middle" align="left">0.67 (0.52&#x2013;0.80)</td>
<td valign="middle" align="left">0.62 (0.49&#x2013;0.76)</td>
<td valign="middle" align="left">0.65 (0.52&#x2013;0.75)</td>
</tr>
<tr>
<td valign="middle" align="left">0.476 (Youden; training OOF)</td>
<td valign="middle" align="left">0.62 (0.53&#x2013;0.72)</td>
<td valign="middle" align="left">0.65 (0.52&#x2013;0.79)</td>
<td valign="middle" align="left">0.60 (0.44 - 0.74)</td>
<td valign="middle" align="left">0.63 (0.50&#x2013;0.76)</td>
<td valign="middle" align="left">0.61 (0.46&#x2013;0.76)</td>
<td valign="middle" align="left">0.64 (0.52&#x2013;0.74)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>PPV, positive predictive value; NPV, negative predictive value; CI, confidence interval; OOF, out-of-fold. Operating-point metrics were evaluated on the held-out test set (n = 93). The default threshold was 0.50. The Youden threshold (0.476) was selected on the training set using 5-fold out-of-fold predicted probabilities by maximizing Youden&#x2019;s J (sensitivity + specificity &#x2212; 1), and then applied unchanged to the test set. 95% confidence intervals were obtained by bootstrap resampling of the held-out test set (1,000 iterations).</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3_10">
<title>ROC and precision&#x2013;recall curve analysis</title>
<p>The diagnostic performance of the RF model was further evaluated using the ROC curve and the precision&#x2013;recall (P&#x2013;R) curve (<xref ref-type="fig" rid="f5"><bold>Figures&#xa0;5</bold></xref>, <xref ref-type="fig" rid="f6"><bold>6</bold></xref>). As shown in <xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref>, the ROC curve remained above the diagonal reference line across most thresholds, indicating discriminative ability beyond chance. The corresponding ROC-AUC, with the 95% confidence interval, is reported in <xref ref-type="table" rid="T6"><bold>Table&#xa0;6</bold></xref>. As shown in <xref ref-type="fig" rid="f6"><bold>Figure&#xa0;6</bold></xref>, the P&#x2013;R curve was generally above the baseline precision determined by the positive-class prevalence, supporting clinically meaningful precision&#x2013;recall trade-offs. The corresponding PR-AUC (average precision), with 95% confidence interval, is summarized in <xref ref-type="table" rid="T6"><bold>Table&#xa0;6</bold></xref>.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Receiver operating characteristic (ROC) curve of the Random Forest model on the held-out test set (n = 93), constructed from predicted probabilities for <italic>E. coli</italic> (Group = 1). The area under the ROC curve (ROC-AUC) is reported with a 95% confidence interval estimated by bootstrap resampling (1,000 iterations) of the test set.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcimb-16-1740707-g005.tif">
<alt-text content-type="machine-generated">ROC curve illustrating the test set performance with a blue line representing the true positive rate against the false positive rate. The area under the curve (AUC) is 0.66, with a confidence interval of 0.54 to 0.77, displayed as a dashed orange line indicating random performance.</alt-text>
</graphic></fig>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Precision&#x2013;recall (PR) curve of the Random Forest model on the held-out test set (n = 93), constructed from predicted probabilities for <italic>E. coli</italic> (Group = 1). The area under the PR curve (PR-AUC/average precision) is reported with a 95% bootstrap confidence interval (1,000 iterations).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcimb-16-1740707-g006.tif">
<alt-text content-type="machine-generated">Precision-recall curve for a test set, displaying an area under the curve (AP) of 0.66 with a ninety-five percent confidence interval of 0.54 to 0.81. The precision values sharply decrease before fluctuating moderately as recall increases from zero to one.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_11">
<title>Benchmarking against baseline models</title>
<p>The RF model&#x2019;s performance was benchmarked against several standard baselines, including regularized logistic regression, SVM, and gradient boosting, using the same stratified held-out evaluation. The RF model demonstrated comparable performance to these baselines, with ROC-AUC values ranging from 0.59 to 0.66 and PR-AUC values from 0.61 to 0.66 (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table S3</bold></xref>). Notably, the discrimination remained moderate across all methods, justifying the decision to report conservative performance estimates derived from stratified evaluation rather than potentially optimistic non-stratified splits.</p>
</sec>
<sec id="s3_12">
<title>Feature stability checks</title>
<p>Further stability checks were conducted to compare the top features selected by each model. The overlap of the top-10 features, measured by the Jaccard index, and the rank correlation of feature importance were assessed to evaluate the consistency of the selected biomarkers across different models. <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure S1</bold></xref> presents the Jaccard index of top-10 features across the models, while <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure S2</bold></xref> shows the Spearman rank correlation between the importance rankings of these features.</p>
</sec>
<sec id="s3_13">
<title>Calibration performance</title>
<p>The model showed acceptable calibration on the held-out test set (Brier score = 0.233; calibration intercept = 0.124; calibration slope = 1.130; <xref ref-type="table" rid="T6"><bold>Table&#xa0;6</bold></xref>). The calibration curve is shown in <xref ref-type="fig" rid="f7"><bold>Figure&#xa0;7</bold></xref>.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Calibration plot (reliability diagram) of the Random Forest model on the held-out test set (n = 93). The dashed line indicates perfect calibration; points represent the observed proportion of <italic>E. coli</italic> (Group = 1) within quantile-based bins of predicted probabilities, plotted against the mean predicted probability in each bin.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcimb-16-1740707-g007.tif">
<alt-text content-type="machine-generated">Calibration plot for a test set showing the observed proportion against mean predicted probability. A blue line with markers fluctuates around an orange dashed diagonal line, indicating model calibration.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_14">
<title>Feature importance</title>
<p>Model interpretability was assessed using SHAP. Global SHAP ranking (mean |SHAP| on the test set) identified Sex, LYM, and ALT as the most influential predictors contributing to the model&#x2019;s output for <italic>E. coli</italic> classification (<xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8</bold></xref>).</p>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>Global feature importance based on mean absolute SHAP values (mean |SHAP|) for the Random Forest model, computed on the held-out test set (n = 93). Larger mean |SHAP| indicates a greater average contribution magnitude of the feature to the model&#x2019;s prediction for <italic>E. coli</italic> (Group = 1). RDW, red cell distribution width; CRP, C-reactive protein; PCT, procalcitonin; ALT, alanine aminotransferase; AST, aspartate aminotransferase; TBIL, total bilirubin; BUN, blood urea nitrogen; HDL, high-density lipoprotein; WBC, white blood cells.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcimb-16-1740707-g008.tif">
<alt-text content-type="machine-generated">Bar chart showing feature importance in a model according to mean SHAP values. The features, from highest to lowest importance, are Sex, LYM, ALT, AST, PLT, D-dimer, RDW, Urinary_WBC, ALB, TBIL, PCT, HGB, Age, BUN, CRP, HDL, NEU, GLU, and CHO. The x-axis represents the mean absolute SHAP value indicating average impact on model output magnitude, ranging from 0 to 0.06.</alt-text>
</graphic></fig>
<p>The SHAP beeswarm plot further illustrated both directionality and inter-individual heterogeneity (<xref ref-type="fig" rid="f9"><bold>Figure&#xa0;9</bold></xref>). Positive SHAP values indicate an increased predicted probability of <italic>E. coli</italic> (Group = 1), whereas negative values indicate a decreased probability. For Sex (0&#xa0;= female, 1 = male), higher values (male) were predominantly associated with negative SHAP values, while lower values (female) were more often associated with positive SHAP values, suggesting that female sex increased and male sex decreased the model-predicted probability of <italic>E. coli</italic> in this cohort. Wider horizontal dispersion for certain variables indicates greater heterogeneity in effects and possible interaction patterns at the patient level.</p>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>SHAP summary (beeswarm) plot of the Random Forest model. Each dot represents one patient on the held-out test set (n = 93). The x-axis shows signed SHAP values (impact on the prediction of Group=1 (<italic>E. coli</italic>); values &gt; 0 indicate an increased contribution toward predicting <italic>E. coli</italic>, whereas values &lt; 0 indicate a decreased contribution. Dot color encodes the feature value (red = high, blue = low). For Sex (0 = female, 1 = male), red indicates male and blue indicates female.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcimb-16-1740707-g009.tif">
<alt-text content-type="machine-generated">SHAP summary plot displaying the impact of various features on model output, with features listed vertically alongside SHAP values on the horizontal axis. Points are color-coded by feature values, ranging from low (blue) to high (pink). Features include Sex, LYM, ALT, AST, and others, showing varied impacts.</alt-text>
</graphic></fig>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<title>Discussion</title>
<p>This study presents a machine learning&#x2013;based approach for the early identification of <italic>E. coli</italic> in patients with UTIs, leveraging routine clinical and laboratory parameters. The RF classifier achieved moderate diagnostic performance, with an AUC of 0.66 and an average precision of 0.66, demonstrating its feasibility as a clinical decision-support tool. Our microbial profiling confirmed <italic>E. coli</italic> as the predominant uropathogen, accounting for over half of all isolates (51.3%), consistent with global epidemiological trends. <italic>Enterococcus</italic> spp. and <italic>Klebsiella</italic> spp. followed as the next most common agents. These findings align with previous reports underscoring the dominance of <italic>E. coli</italic> in both community and nosocomial UTIs (<xref ref-type="bibr" rid="B14">Hossain et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B15">Zhan et&#xa0;al., 2024</xref>).</p>
<p>Consistent with existing literature, <italic>E. coli</italic> infections were significantly more common in women, likely due to anatomical predisposition (<xref ref-type="bibr" rid="B16">Johny et&#xa0;al., 2025</xref>). Our analysis revealed that <italic>E. coli</italic> infections were associated with significantly elevated absolute lymphocyte counts&#x2014;a finding that has received limited attention in prior literature. Most existing studies have focused on total leukocyte counts or composite indices such as the neutrophil-to-lymphocyte ratio (NLR) when characterizing the immune response in UTIs (<xref ref-type="bibr" rid="B17">Saheb Sharif-Askari et&#xa0;al., 2020</xref>). In contrast, the observed lymphocytosis in <italic>E. coli</italic> UTIs may reflect a distinct immunological signature, potentially linked to adaptive immune activation or pathogen-specific host responses (<xref ref-type="bibr" rid="B18">Hou et&#xa0;al., 2025</xref>). To our knowledge, few clinical studies have systematically quantified LYM elevation in <italic>E. coli</italic> UTIs, underscoring the novelty and potential diagnostic relevance of this finding within our cohort, while requiring confirmation in external datasets. Future investigations are warranted to validate this association in larger, multicenter cohorts and to assess whether this signal remains consistent across alternative modeling approaches, to elucidate its mechanistic basis.</p>
<p>Besides LYM elevation, we also observed modest increased serum ALT and ALB levels in patients with <italic>E. coli</italic> UTIs. A Korean pediatric study reported that some UTIs exhibited elevated liver enzyme levels, which normalized following infection resolution, indicating transient hepatic involvement associated with UTIs (<xref ref-type="bibr" rid="B19">Lee et&#xa0;al., 2021</xref>). More broadly, systemic infections&#x2014;including UTIs&#x2014;can lead to mild to moderate aminotransferase elevations through immune-mediated hepatic stress. Meanwhile, higher ALB levels have been associated with improved outcomes in infection contexts, reflecting better nutritional or immune status, as demonstrated in studies of febrile infections and postoperative UTIs (<xref ref-type="bibr" rid="B20">Wang et&#xa0;al., 2024</xref>). Collectively, these findings suggest that slight ALT elevation and ALB may serve as biomarkers of systemic response rather than direct hepatic injury, highlighting their potential utility in distinguishing <italic>E. coli</italic>-driven UTI phenotypes.</p>
<p><italic>E. coli</italic>&#x2013;associated UTIs in our cohort were characterized by lower RDW and reduced D-dimer levels compared to non-<italic>E. coli</italic> infections, suggesting pathogen-specific hematologic and coagulative responses. While elevated RDW and D-dimer are well-documented markers of severe infections&#x2014;particularly Gram-negative bacteremia and sepsis&#x2014;they are more typically associated with greater illness severity and poor prognosis, rather than pathogen-specific differences (<xref ref-type="bibr" rid="B21">Xia et&#xa0;al., 2021</xref>). For example, higher RDW has been linked with adverse outcomes in sepsis, whereas elevated D-dimer reflects systemic inflammation and thrombotic activity, and its increase is nonspecific across bacterial infections (<xref ref-type="bibr" rid="B22">Lee et&#xa0;al., 2018</xref>). The comparatively lower levels of these markers in <italic>E. coli</italic> UTIs may indicate a milder systemic response or less extensive endothelial activation relative to other pathogens. Although these findings require further investigation, they point towards potentially valuable clues for early pathogen differentiation and deserve exploration in larger prospective cohorts.</p>
<p>Interpretability was primarily based on SHAP (TreeSHAP), which provides signed, patient-level attributions and can capture nonlinear and interaction effects (<xref ref-type="bibr" rid="B23">Rodr&#xed;guez-P&#xe9;rez and Bajorath, 2020</xref>). To contextualize the RF model and reduce the risk of model-specific findings, we benchmarked RF against commonly used baseline classifiers, including regularized logistic regression, SVM, and gradient boosting. This benchmarking reduces the risk of model-specific artifacts and allows us to more robustly interpret the features identified as significant. Because routinely collected laboratory variables can be correlated, feature attributions may be shared across correlated predictors; therefore, the top SHAP contributors should be interpreted as candidate predictors rather than definitive pathogen-specific biomarkers. In addition, SHAP provides local explanations for individual predictions, which can support clinical review and improve transparency and trust in model outputs (<xref ref-type="bibr" rid="B25">Janssen et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B24">Bifarin, 2023</xref>).</p>
<p>The RF model achieved moderate discriminative performance (AUC = 0.66), which was confirmed in our benchmarking comparisons. Nevertheless, it offers clinical value due to the rapid and noninvasive nature of the input features. The model also demonstrated balanced precision and recall between <italic>E. coli</italic> and non-<italic>E. coli</italic> UTIs, helping to minimize classification bias. Accordingly, it may serve as a supportive triage tool to prioritize early decision-making while awaiting culture-based confirmation.</p>
<sec id="s4_1">
<title>Limitations and future directions</title>
<p>This study is limited by its single-center, retrospective design, and external validation is needed to confirm generalizability across diverse patient populations. Although multiple models (Random Forest, logistic regression, SVM, and gradient boosting) were compared, the findings should be interpreted within the context of the dataset and applied classifiers. Future research should assess the stability of biomarkers across different algorithms and evaluate model performance in external cohorts. Additionally, prospective validation and temporal testing are needed to assess the model&#x2019;s generalizability and clinical applicability over time.</p>
</sec>
</sec>
<sec id="s5" sec-type="conclusions">
<title>Conclusions</title>
<p>In this study, we developed an explainable machine learning model to facilitate the early identification of <italic>E. coli</italic> urinary tract infections using routine clinical data. The random forest classifier demonstrated moderate discriminative performance (AUC = 0.66), with balanced precision and recall across pathogen classes, underscoring its potential clinical applicability. SHAP-based interpretability revealed important, and in some cases non-linear, feature interactions&#x2014;most notably the strong predictive role of sex and LYM&#x2014;offering a transparent framework for AI (artificial intelligence)-driven decision support.</p>
<p>Notably, modest elevations in ALT and ALB levels, alongside lower RDW and D-dimer in <italic>E. coli</italic> infections, may represent distinct systemic responses, meriting further investigation. These findings collectively highlight the feasibility of integrating interpretable machine learning with conventional biomarkers to enhance diagnostic efficiency, reduce reliance on empirical therapy, and inform targeted interventions in urinary tract infection management.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p></sec>
<sec id="s7" sec-type="ethics-statement">
<title>Ethics statement</title>
<p>The studies involving humans were approved by the Medical Ethics Committee of Fuding Hospital, Fujian University of Traditional Chinese Medicine (Approval Number: Fuding Hospital 2024015). The studies were conducted in accordance with the local legislation and institutional requirements. The ethics committee/institutional review board waived the requirement of written informed consent for participation from the participants or the participants&#x2019; legal guardians/next of kin. Informed consent was waived due to the retrospective nature of the study.</p></sec>
<sec id="s8" sec-type="author-contributions">
<title>Author contributions</title>
<p>JZ: Formal analysis, Writing &#x2013; original draft, Visualization, Data curation, Methodology, Conceptualization, Investigation. Y-YJ:&#xa0;Investigation, Writing &#x2013; original draft, Methodology, Data curation, Conceptualization, Formal analysis, Visualization. YZ:&#xa0;Data curation, Conceptualization, Methodology, Visualization, Writing &#x2013; original draft, Formal analysis, Investigation. C-YP: Validation, Conceptualization, Methodology, Writing &#x2013; original draft, Visualization, Software, Formal analysis. L-HY: Writing &#x2013; review &amp; editing, Software, Data curation, Formal Analysis, Validation. Y-YZ: Validation, Data curation, Writing &#x2013; review &amp; editing, Visualization, Formal analysis, Software. S-YZ:&#xa0;Conceptualization, Investigation, Supervision, Methodology, Writing &#x2013; review &amp; editing, Funding acquisition, Software, Formal analysis, Writing &#x2013; original draft, Project administration, Visualization, Data curation, Validation, Resources. JS: Data curation, Visualization, Methodology, Supervision, Conceptualization, Validation, Project administration, Investigation, Software, Funding acquisition, Formal analysis, Resources, Writing &#x2013; review &amp; editing, Writing &#x2013; original draft.</p></sec>
<ack>
<title>Acknowledgments</title>
<p>We express our gratitude to the staff of the Department of Clinical Laboratory, Fuding Hospital, Fujian University of Traditional Chinese Medicine for their dedication and assistance in data collection and analysis.</p>
</ack>
<sec id="s10" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s11" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was used in the creation of this manuscript. During the preparation of this work the authors used ChatGPT to improve the clarity of the sentences. After using this tool/service, the authors reviewed and edited the content as needed and take full responsibility for the content of the publication.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s12" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<sec id="s13" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fcimb.2026.1740707/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fcimb.2026.1740707/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Image1.png" id="SM1" mimetype="image/png"/>
<supplementary-material xlink:href="Image2.png" id="SM2" mimetype="image/png"/>
<supplementary-material xlink:href="Table1.docx" id="SM3" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
<supplementary-material xlink:href="Table2.docx" id="SM4" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
<supplementary-material xlink:href="Table3.xlsx" id="SM5" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"/></sec>
<ref-list>
<title>References</title>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Barre&#xf1;ada</surname> <given-names>L.</given-names></name>
<name><surname>Dhiman</surname> <given-names>P.</given-names></name>
<name><surname>Timmerman</surname> <given-names>D.</given-names></name>
<name><surname>Boulesteix</surname> <given-names>A. L.</given-names></name>
<name><surname>Van Calster</surname> <given-names>B</given-names></name>
</person-group>. (<year>2024</year>). 
<article-title>Understanding overfitting in random forest for probability estimation: a visualization and simulation study</article-title>. <source>Diagn. Progn Res.</source> <volume>8</volume>, <fpage>14</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s41512-024-00177-1</pub-id>, PMID: <pub-id pub-id-type="pmid">39334348</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bifarin</surname> <given-names>O. O.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Interpretable machine learning with tree-based shapley additive explanations: Application to metabolomics datasets for binary classification</article-title>. <source>PloS One</source> <volume>18</volume>, <elocation-id>e0284315</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0284315</pub-id>, PMID: <pub-id pub-id-type="pmid">37141218</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chardavoyne</surname> <given-names>P. C.</given-names></name>
<name><surname>Kasmire</surname> <given-names>K. E.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Appropriateness of antibiotic prescriptions for urinary tract infections</article-title>. <source>West J. Emerg. Med.</source> <volume>21</volume>, <fpage>633</fpage>&#x2013;<lpage>639</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5811/westjem.2020.1.45944</pub-id>, PMID: <pub-id pub-id-type="pmid">32421512</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chowdhury</surname> <given-names>S. S.</given-names></name>
<name><surname>Tahsin</surname> <given-names>P.</given-names></name>
<name><surname>Xu</surname> <given-names>Y.</given-names></name>
<name><surname>Mosaddek</surname> <given-names>A. S. M.</given-names></name>
<name><surname>Muhamadali</surname> <given-names>H.</given-names></name>
<name><surname>Goodacre</surname> <given-names>R.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Trends in antimicrobial resistance of uropathogens isolated from urinary tract infections in a tertiary care hospital in dhaka, Bangladesh</article-title>. <source>Antibiot (Basel)</source> <volume>13</volume>, <elocation-id>925</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/antibiotics13100925</pub-id>, PMID: <pub-id pub-id-type="pmid">39452192</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hossain</surname> <given-names>M. J.</given-names></name>
<name><surname>Azad</surname> <given-names>A. K.</given-names></name>
<name><surname>Shahid</surname> <given-names>M. S. B.</given-names></name>
<name><surname>Shahjahan</surname> <given-names>M.</given-names></name>
<name><surname>Ferdous</surname> <given-names>J</given-names></name>
</person-group>. (<year>2024</year>). 
<article-title>Prevalence, antibiotic resistance pattern for bacteriuria from patients with urinary tract infections</article-title>. <source>Health Sci. Rep.</source> <volume>7</volume>, <elocation-id>e2039</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/hsr2.2039</pub-id>, PMID: <pub-id pub-id-type="pmid">38617042</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hou</surname> <given-names>Y.</given-names></name>
<name><surname>Lv</surname> <given-names>Z.</given-names></name>
<name><surname>Hu</surname> <given-names>Q.</given-names></name>
<name><surname>Zhu</surname> <given-names>A.</given-names></name>
<name><surname>Niu</surname> <given-names>H</given-names></name>
</person-group>. (<year>2025</year>). 
<article-title>The immune mechanisms of the urinary tract against infections</article-title>. <source>Front. Cell Infect. Microbiol.</source> <volume>15</volume>, <elocation-id>1540149</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fcimb.2025.1540149</pub-id>, PMID: <pub-id pub-id-type="pmid">40308964</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Janssen</surname> <given-names>A.</given-names></name>
<name><surname>Hoogendoorn</surname> <given-names>M.</given-names></name>
<name><surname>Cnossen</surname> <given-names>M. H.</given-names></name>
<name><surname>Math&#xf4;t</surname> <given-names>R. A. A</given-names></name>
</person-group>. (<year>2022</year>). 
<article-title>Application of SHAP values for inferring the optimal functional form of covariates in pharmacokinetic modeling</article-title>. <source>CPT Pharmacom Syst. Pharmacol.</source> <volume>11</volume>, <fpage>1100</fpage>&#x2013;<lpage>1110</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/psp4.12828</pub-id>, PMID: <pub-id pub-id-type="pmid">38100100</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jeng</surname> <given-names>S. L.</given-names></name>
<name><surname>Huang</surname> <given-names>Z. J.</given-names></name>
<name><surname>Yang</surname> <given-names>D. C.</given-names></name>
<name><surname>Teng</surname> <given-names>C. H.</given-names></name>
<name><surname>Wang</surname> <given-names>M. C</given-names></name>
</person-group>. (<year>2022</year>). 
<article-title>Machine learning to predict the development of recurrent urinary tract infection related to single uropathogen, Escherichia coli</article-title>. <source>Sci. Rep.</source> <volume>12</volume>, <fpage>17216</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-022-18920-3</pub-id>, PMID: <pub-id pub-id-type="pmid">36241875</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Johny</surname> <given-names>V. F.</given-names></name>
<name><surname>Menon</surname> <given-names>V. T. K.</given-names></name>
<name><surname>Georgy</surname> <given-names>S.</given-names></name>
<name><surname>Saju</surname> <given-names>C. R.</given-names></name>
<name><surname>Jini</surname> <given-names>M. P</given-names></name>
</person-group>. (<year>2025</year>). 
<article-title>Prevalence of recurrent urinary tract infections and its associated factors in female staff of reproductive age group in a medical college in central Kerala: a cross-sectional study</article-title>. <source>BMC Infect. Dis.</source> <volume>25</volume>, <fpage>276</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12879-025-10634-x</pub-id>, PMID: <pub-id pub-id-type="pmid">40000942</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lee</surname> <given-names>J. W.</given-names></name>
<name><surname>Her</surname> <given-names>S. M.</given-names></name>
<name><surname>Kim</surname> <given-names>J. H.</given-names></name>
<name><surname>Lee</surname> <given-names>K. H.</given-names></name>
<name><surname>Eisenhut</surname> <given-names>M.</given-names></name>
<name><surname>Park</surname> <given-names>S. J.</given-names></name>
<etal/>
</person-group>. (<year>2018</year>). 
<article-title>D-dimer as a marker of acute pyelonephritis in infants younger than 24 months with urinary tract infection</article-title>. <source>Pediatr. Nephrol.</source> <volume>33</volume>, <fpage>631</fpage>&#x2013;<lpage>637</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00467-017-3843-9</pub-id>, PMID: <pub-id pub-id-type="pmid">29306986</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lee</surname> <given-names>Y.</given-names></name>
<name><surname>Yi</surname> <given-names>D. Y.</given-names></name>
<name><surname>Lee</surname> <given-names>Y. M.</given-names></name>
<name><surname>Choi</surname> <given-names>S. Y.</given-names></name>
<name><surname>Choi</surname> <given-names>Y. J.</given-names></name>
<name><surname>Lee</surname> <given-names>K. J.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>A multicenter study of real-world practice for management of abnormal liver function tests in children with acute infectious diseases</article-title>. <source>J. Korean Med. Sci.</source> <volume>36</volume>, <elocation-id>e310</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3346/jkms.2021.36.e310</pub-id>, PMID: <pub-id pub-id-type="pmid">34873882</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>B. R.</given-names></name>
<name><surname>Zhuo</surname> <given-names>Y.</given-names></name>
<name><surname>Jiang</surname> <given-names>Y. Y.</given-names></name>
<name><surname>Zhang</surname> <given-names>S. Y</given-names></name>
</person-group>. (<year>2024</year>). 
<article-title>Random forest differentiation of Escherichia coli in elderly sepsis using biomarkers and infectious sites</article-title>. <source>Sci. Rep.</source> <volume>14</volume>, <fpage>12973</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-024-63944-6</pub-id>, PMID: <pub-id pub-id-type="pmid">38839818</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lundberg</surname> <given-names>S. M.</given-names></name>
<name><surname>Erion</surname> <given-names>G.</given-names></name>
<name><surname>Chen</surname> <given-names>H.</given-names></name>
<name><surname>DeGrave</surname> <given-names>A.</given-names></name>
<name><surname>Prutkin</surname> <given-names>J. M.</given-names></name>
<name><surname>Nair</surname> <given-names>B.</given-names></name>
<etal/>
</person-group>. (<year>2020</year>). 
<article-title>From local explanations to global understanding with explainable AI for trees</article-title>. <source>Nat. Mach. Intell.</source> <volume>2</volume>, <fpage>56</fpage>&#x2013;<lpage>67</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s42256-019-0138-9</pub-id>, PMID: <pub-id pub-id-type="pmid">32607472</pub-id>
</mixed-citation>
</ref>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mancuso</surname> <given-names>G.</given-names></name>
<name><surname>Midiri</surname> <given-names>A.</given-names></name>
<name><surname>Gerace</surname> <given-names>E.</given-names></name>
<name><surname>Marra</surname> <given-names>M.</given-names></name>
<name><surname>Zummo</surname> <given-names>S.</given-names></name>
<name><surname>Biondo</surname> <given-names>C.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Urinary tract infections: the current scenario and future prospects</article-title>. <source>Pathogens</source> <volume>12</volume>, <elocation-id>623</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/pathogens12040623</pub-id>, PMID: <pub-id pub-id-type="pmid">37111509</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Morado</surname> <given-names>F.</given-names></name>
<name><surname>Wong</surname> <given-names>D. W.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Applying diagnostic stewardship to proactively optimize the management of urinary tract infections</article-title>. <source>Antibiot (Basel)</source> <volume>11</volume>, <elocation-id>308</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/antibiotics11030308</pub-id>, PMID: <pub-id pub-id-type="pmid">35326771</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Nelson</surname> <given-names>Z.</given-names></name>
<name><surname>Aslan</surname> <given-names>A. T.</given-names></name>
<name><surname>Beahm</surname> <given-names>N. P.</given-names></name>
<name><surname>Blyth</surname> <given-names>M.</given-names></name>
<name><surname>Cappiello</surname> <given-names>M.</given-names></name>
<name><surname>Casaus</surname> <given-names>D.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Guidelines for the prevention, diagnosis, and management of urinary tract infections in pediatrics and adults: A wikiGuidelines group consensus statement</article-title>. <source>JAMA Netw. Open</source> <volume>7</volume>, <elocation-id>e2444495</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1001/jamanetworkopen.2024.44495</pub-id>, PMID: <pub-id pub-id-type="pmid">39495518</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Rodr&#xed;guez-P&#xe9;rez</surname> <given-names>R.</given-names></name>
<name><surname>Bajorath</surname> <given-names>J.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Interpretation of compound activity predictions from complex machine learning models using local approximations and shapley values</article-title>. <source>J. Med. Chem.</source> <volume>63</volume>, <fpage>8761</fpage>&#x2013;<lpage>8777</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/acs.jmedchem.9b01101</pub-id>, PMID: <pub-id pub-id-type="pmid">31512867</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Saheb Sharif-Askari</surname> <given-names>F.</given-names></name>
<name><surname>Saheb Sharif-Askari</surname> <given-names>N.</given-names></name>
<name><surname>Guella</surname> <given-names>A.</given-names></name>
<name><surname>Alabdullah</surname> <given-names>A.</given-names></name>
<name><surname>Bashar Al Sheleh</surname> <given-names>H</given-names></name>
</person-group>. (<year>2020</year>). 
<article-title>Blood neutrophil-to-lymphocyte ratio and urine IL-8 levels predict the type of bacterial urinary tract infection in type 2 diabetes mellitus patients</article-title>. <source>Infect. Drug Resist.</source> <volume>13</volume>, <fpage>1961</fpage>&#x2013;<lpage>1970</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.2147/IDR.S251966</pub-id>, PMID: <pub-id pub-id-type="pmid">32612372</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sergounioti</surname> <given-names>A.</given-names></name>
<name><surname>Rigas</surname> <given-names>D.</given-names></name>
<name><surname>Zoitopoulos</surname> <given-names>V.</given-names></name>
<name><surname>Kalles</surname> <given-names>D</given-names></name>
</person-group>. (<year>2025</year>). 
<article-title>From preliminary urinalysis to decision support: machine learning for UTI prediction in real-world laboratory data</article-title>. <source>J. Pers. Med.</source> <volume>15</volume>, <elocation-id>200</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/jpm15050200</pub-id>, PMID: <pub-id pub-id-type="pmid">40423071</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Shen</surname> <given-names>L.</given-names></name>
<name><surname>An</surname> <given-names>J.</given-names></name>
<name><surname>Wang</surname> <given-names>N.</given-names></name>
<name><surname>Wu</surname> <given-names>J.</given-names></name>
<name><surname>Yao</surname> <given-names>J.</given-names></name>
<name><surname>Gao</surname> <given-names>Y.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Artificial intelligence and machine learning applications in urinary tract infections identification and prediction: a systematic review and meta-analysis</article-title>. <source>World J. Urol</source> <volume>42</volume>, <fpage>464</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00345-024-05145-4</pub-id>, PMID: <pub-id pub-id-type="pmid">39088072</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>W.</given-names></name>
<name><surname>Tang</surname> <given-names>W.</given-names></name>
<name><surname>Yao</surname> <given-names>W.</given-names></name>
<name><surname>Lv</surname> <given-names>Q.</given-names></name>
<name><surname>Ding</surname> <given-names>W</given-names></name>
</person-group>. (<year>2024</year>). 
<article-title>Glucose-albumin ratio (GAR) as a novel biomarker of postoperative urinary tract infection in elderly hip fracture patients</article-title>. <source>Front. Med. (Laus)</source> <volume>11</volume>, <elocation-id>1366012</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fmed.2024.1366012</pub-id>, PMID: <pub-id pub-id-type="pmid">39076765</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Xia</surname> <given-names>M.</given-names></name>
<name><surname>Liu</surname> <given-names>J.</given-names></name>
<name><surname>Hong</surname> <given-names>Y.</given-names></name>
<name><surname>An</surname> <given-names>L.</given-names></name>
<name><surname>Xiong</surname> <given-names>L.</given-names></name>
<name><surname>Huang</surname> <given-names>X.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>Red blood cell distribution width may be a new factor that influence the selection of invasive treatment in patients with renal abscess</article-title>. <source>Ann. Palliat Med.</source> <volume>10</volume>, <fpage>3916</fpage>&#x2013;<lpage>3923</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.21037/apm-20-2305</pub-id>, PMID: <pub-id pub-id-type="pmid">33691444</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yang</surname> <given-names>X.</given-names></name>
<name><surname>Chen</surname> <given-names>H.</given-names></name>
<name><surname>Zheng</surname> <given-names>Y.</given-names></name>
<name><surname>Qu</surname> <given-names>S.</given-names></name>
<name><surname>Wang</surname> <given-names>H.</given-names></name>
<name><surname>Yi</surname> <given-names>F.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>Disease burden and long-term trends of urinary tract infections: A worldwide report</article-title>. <source>Front. Public Health</source> <volume>10</volume>, <elocation-id>888205</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpubh.2022.888205</pub-id>, PMID: <pub-id pub-id-type="pmid">35968451</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zagaglia</surname> <given-names>C.</given-names></name>
<name><surname>Ammendolia</surname> <given-names>M. G.</given-names></name>
<name><surname>Maurizi</surname> <given-names>L.</given-names></name>
<name><surname>Nicoletti</surname> <given-names>M.</given-names></name>
<name><surname>Longhi</surname> <given-names>C</given-names></name>
</person-group>. (<year>2022</year>). 
<article-title>Urinary tract infections caused by uropathogenic escherichia coli strains-new strategies for an old pathogen</article-title>. <source>Microorganisms</source> <volume>10</volume>, <elocation-id>1425</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/microorganisms10071425</pub-id>, PMID: <pub-id pub-id-type="pmid">35889146</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhan</surname> <given-names>Z. S.</given-names></name>
<name><surname>Shi</surname> <given-names>J.</given-names></name>
<name><surname>Zheng</surname> <given-names>Z. S.</given-names></name>
<name><surname>Zhu</surname> <given-names>X. X.</given-names></name>
<name><surname>Chen</surname> <given-names>J.</given-names></name>
<name><surname>Zhou</surname> <given-names>X. Y.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Epidemiological insights into seasonal, sex&#x2212;specific and age&#x2212;related distribution of bacterial pathogens in urinary tract infections</article-title>. <source>Exp. Ther. Med.</source> <volume>27</volume>, <elocation-id>140</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3892/etm.2024.12428</pub-id>, PMID: <pub-id pub-id-type="pmid">38476915</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1086696">Zhi-Kai Yang</ext-link>, Guangzhou Medical University, China</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2385849">Pierfrancesco Novielli</ext-link>, National Institute for Nuclear Physics of Bari, Italy</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3138028">Volkan Alparslan</ext-link>, Kocaeli University Faculty of Medicine, T&#xfc;rkiye</p></fn>
</fn-group>
<fn-group>
<fn fn-type="abbr" id="abbrev1">
<label>Abbreviations:</label>
<p>UTI, Urinary Tract Infection; <italic>E. coli, Escherichia coli</italic>; RF Random Forest, SHAP SHapley Additive exPlanations; AUC, Area Under the Receiver Operating Characteristic Curve; AP, Average Precision; CBC, complete blood count; ALT, Alanine Aminotransferase; AST, Aspartate Aminotransferase; ALB, Albumin; CRP, C-Reactive Protein; PCT, Procalcitonin; HGB, Hemoglobin; RDW, Red Cell Distribution Width; LYM, Lymphocyte Count; PLT, Platelet Count; GLU, Glucose; CHO, Cholesterol; BUN, Blood Urea Nitrogen; UA, Uric Acid; D-dimer, D-dimer (Fibrin Degradation Product); CBC, Complete Blood Count; ML, Machine Learning; ROC, Receiver Operating Characteristic; TP, True Positive; FP, False Positive; TN, True Negative; FN, False Negative; SD, Standard Deviation; IQR, Interquartile Range; OR, Odds Ratio; CI, Confidence Interval; SVM, support vector machine.</p>
</fn>
</fn-group>
</back>
</article>