<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="brief-report" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Cell. Infect. Microbiol.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Cellular and Infection Microbiology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Cell. Infect. Microbiol.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2235-2988</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fcimb.2025.1641413</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Brief Research Report</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Multi-cohort ensemble learning framework for vaginal microbiome-based endometrial cancer detection</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Dodani</surname><given-names>Dollina</given-names></name>
<uri xlink:href="https://loop.frontiersin.org/people/3090431/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project-administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Talhouk</surname><given-names>Aline</given-names></name>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3090668/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project-administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
</contrib>
</contrib-group>
<aff id="aff1"><institution>Department of Obstetrics and Gynecology, Division of Gynecologic Oncology, University of British Columbia</institution>, <city>Vancouver</city>, <state>BC</state>,&#xa0;<country country="ca">Canada</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Aline Talhouk, <email xlink:href="mailto:a.talhouk@ubc.ca">a.talhouk@ubc.ca</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2025-12-08">
<day>08</day>
<month>12</month>
<year>2025</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>15</volume>
<elocation-id>1641413</elocation-id>
<history>
<date date-type="received">
<day>05</day>
<month>06</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>17</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>29</day>
<month>10</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Dodani and Talhouk.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Dodani and Talhouk</copyright-holder>
<license>
<ali:license_ref start_date="2025-12-08">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Endometrial cancer is the most common gynecological malignancy in high-income countries and lacks an established strategy for early detection. Prior studies suggest that the vaginal microbiome may hold diagnostic potential, but inconsistent findings have limited clinical translation.</p>
</sec>
<sec>
<title>Methods</title>
<p>We conducted a systematic review to collect and analyze vaginal 16S rRNA sequencing data from five independent cohorts (n = 265). These studies included women with histologically confirmed endometrial cancer and controls with benign gynecologic conditions. We used these datasets to identify microbial signatures associated with endometrial cancer and to develop a predictive machine learning model.</p>
</sec>
<sec>
<title>Results</title>
<p>Microbial diversity was significantly higher in endometrial cancer samples, and host characteristics influenced community composition. <italic>Peptoniphilus</italic> was reproducibly enriched in cancer samples across cohorts. An ensemble classifier accurately identified endometrial cancer in a held-out test set, achieving an area under the receiver operating characteristic curve of 0.93 (95% CI: 0.71&#x2013;0.93), sensitivity of 1.0 (95% CI: 0.74&#x2013;1.0), and a negative predictive value of 1.0 (95% CI: 0.59&#x2013;1.0).</p>
</sec>
<sec>
<title>Discussion</title>
<p>These findings support the potential of vaginal microbiome profiling as a minimally invasive approach for early detection of endometrial cancer.</p>
</sec>
</abstract>
<kwd-group>
<kwd>endometrial cancer</kwd>
<kwd>16S rRNA</kwd>
<kwd>machine learning</kwd>
<kwd>data integration</kwd>
<kwd>biomarkers</kwd>
<kwd>reproducibility</kwd>
<kwd>vaginal microbiome</kwd>
<kwd>multi-cohort analysis</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared financial support was received for this work and/or its publication. This work was funded by the Sumiko Kobayashi Marks Memorial OVCARE Research Grants (AT) supported by Vancouver General Hospital &amp; University of British Columbia Hospital Foundation. The funders had no involvement in study conception, data collection, data analysis, data interpretation, writing of the report, or publication decisions.</funding-statement>
</funding-group>
<counts>
<fig-count count="2"/>
<table-count count="2"/>
<equation-count count="0"/>
<ref-count count="74"/>
<page-count count="12"/>
<word-count count="5566"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Extra-intestinal Microbiome</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Endometrial cancer (EC) is the most common gynecological malignancy in high-income countries, with incidence rising globally (<xref ref-type="bibr" rid="B11">Bray et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B69">Siegel et&#xa0;al., 2024</xref>), due to increasing obesity rates, sedentary lifestyles, and aging populations (<xref ref-type="bibr" rid="B59">Reeves et&#xa0;al., 2007</xref>; <xref ref-type="bibr" rid="B53">Onstad et&#xa0;al., 2016</xref>). When diagnosed while still confined to the uterus, EC is treatable with hysterectomy, with a 5-year survival rate exceeding 90% (<xref ref-type="bibr" rid="B71">Uterine cancer survival statistics, 2015</xref>). However, survival drops to 17% in metastatic disease, underscoring the need for earlier detection (<xref ref-type="bibr" rid="B71">Uterine cancer survival statistics, 2015</xref>).</p>
<p>Abnormal uterine bleeding (AUB) is the most common first presenting symptom and prompts diagnostic endometrial biopsy, the current gold standard (<xref ref-type="bibr" rid="B56">Qureshi et&#xa0;al., 2018</xref>). While 90% of women diagnosed with EC report AUB, this common symptom during perimenopause lacks specificity, with fewer than 1% of pre-menopausal and 9% of post-menopausal women with AUB diagnosed with EC (<xref ref-type="bibr" rid="B36">Iram et&#xa0;al., 2010</xref>; <xref ref-type="bibr" rid="B17">CLARKE et&#xa0;al., 2020</xref>). Moreover, endometrial biopsies are invasive and painful (<xref ref-type="bibr" rid="B48">Marcus et&#xa0;al., 2021</xref>). There is a critical need for minimally invasive tests that can rule out malignancy (<xref ref-type="bibr" rid="B19">Costas et&#xa0;al., 2019</xref>).</p>
<p>Advances in next-generation sequencing and fluid-based sampling techniques have accelerated microbiome research, opening opportunities for minimally invasive biomarker-based cancer screening (<xref ref-type="bibr" rid="B43">&#x141;aniewski et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B14">Chambers et&#xa0;al., 2021</xref>). Unlike gut microbial signatures that have led to early detection of colon cancer (<xref ref-type="bibr" rid="B31">Fusco et&#xa0;al., 2024</xref>), clinical translation of vaginal microbiome signatures remains limited. Several studies using 16S rRNA gene sequencing have identified associations between vaginal bacterial composition and EC, but reproducibility across datasets remains a challenge. Small sample sizes, inter-individual microbiome variability, and inconsistent bioinformatics pipelines contribute to varying results. Cross-study comparisons of bioinformatics pipelines have improved reproducibility in oral and gut microbiomics, but this type of comparison has not yet been conducted for the vaginal microbiome (<xref ref-type="bibr" rid="B40">Kang et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B29">Fox et&#xa0;al., 2024</xref>). Additionally, machine learning techniques can be employed to integrate data from multiple cohorts to identify a predictive vaginal microbiome signature.</p>
<p>To address this, we conducted a multicohort analysis of publicly available 16S rRNA gene vaginal microbiome datasets from EC case-control studies. We systematically evaluated and selected a bioinformatics pipeline based on its reproducibility and replicability following the framework proposed by <xref ref-type="bibr" rid="B34">Hejblum et&#xa0;al., 2020</xref>. We also evaluated an additional pipeline that uses recent microbiome processing advancements. The resulting microbiome profiles were used to train a machine-learning classifier that incorporates individual characteristics, such as age, body mass index (BMI), and ethnicity, which influence both the vaginal microbiome and the risk of developing EC and its precursor, atypical endometrial hyperplasia (AEH).</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Dataset search &amp; inclusion</title>
<p>We used PubMed to systematically search for studies that analyzed 16S rRNA amplicon sequence microbiome data from the vaginal microbiome in individuals with EC and control groups (see <xref ref-type="supplementary-material" rid="SF1"><bold>Supplementary Section 1</bold></xref> for search keywords). To reduce variability, we included only those studies that collected specimens <italic>via</italic> vaginal swabs or uterine lavages, used 16S rRNA gene sequencing (any region), and provided publicly available raw sequence data with pathology labels. We excluded studies that used metagenomics, non-amplicon markers, or sampled endometrial tissue. For studies with longitudinal sampling or multiple anatomical sites, only baseline (cervico) vaginal samples were included. Atypical endometrial hyperplasia was grouped with EC, because they are likely to arise together and are clinically treated similarly. All other pathologies, including simple hyperplasia, were deemed benign. Sequence data and metadata were retrieved from the Short Read Archive or materials provided by study authors.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Selection of bioinformatics methods</title>
<p>Our goal was to identify a bioinformatics pipeline that consistently reproduced and replicated key microbiome metrics across various studies, including 1) Alpha diversity, measuring diversity within samples, 2) Beta diversity, to measure clustering of microbial communities based on their composition, and 3)&#xa0;differentially abundant taxa associated with disease status (<xref ref-type="bibr" rid="B24">Douglas and Langille, 2021</xref>). We define reproducibility as obtaining similar results using the original authors&#x2019; analysis pipeline on their dataset and replicability as achieving comparable results when applying that same pipeline to a different dataset (<xref ref-type="bibr" rid="B34">Hejblum et&#xa0;al., 2020</xref>). We extracted bioinformatics workflows from published manuscripts.</p>
<p>Each dataset was reanalyzed using both the original pipeline, the pipelines from other published studies, as well as the DADA2 pipeline, a high-resolution denoiser suitable for identifying rare taxa, particularly relevant in <italic>Lactobacillus</italic>-dominated vaginal communities (<xref ref-type="bibr" rid="B12">Callahan et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B52">Nearing et&#xa0;al., 2018</xref>). Samples were excluded based on pipeline-specific quality thresholds or incomplete metadata (<xref ref-type="supplementary-material" rid="SF2"><bold>Supplementary Section 2</bold></xref>). Processing steps for the DADA2 pipeline included, adapter trimming using BBDuk<xref ref-type="fn" rid="fn1"><sup>1</sup></xref>, quality filtering using Phred scores and trimming the 3&#x2019; ends of reads where the average quality dropped below 20 (see <xref ref-type="supplementary-material" rid="SF3"><bold>Supplementary Section 3</bold></xref> for read lengths maintained). Forward and reverse reads were denoised and paired-end reads were merged. In studies where there was minimum to no read overlap (&lt;50% samples merging with &gt;12bp overlap; details in <xref ref-type="supplementary-material" rid="SF3"><bold>Supplementary Section 3</bold></xref>), only forward reads were used as previously done by (<xref ref-type="bibr" rid="B67">Sekaran et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B1">Abdill et&#xa0;al., 2025</xref>). After filtering out bimeras, the Amplicon Sequence Variants (ASVs) abundance table was normalized by the total number of reads sequenced in each sample. Identified ASVs were assigned species-level taxonomic information using the na&#xef;ve Bayes classifier implemented in QIIME2 based on 1) the Green Genes (GG) database (v13.8), 2) the Genome Taxonomic Database (GTDB) (vbac120) both with uniform taxonomic distribution, and 3) the GTDB database with an expected species distribution using q2-clawback (<xref ref-type="bibr" rid="B20">DeSantis et&#xa0;al., 2006</xref>; <xref ref-type="bibr" rid="B27">Fettweis et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B7">Bokulich et&#xa0;al., 2018</xref>, <xref ref-type="bibr" rid="B8">Bokulich et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B9">Bolyen et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B39">Kaehler et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B60">Rinke et&#xa0;al., 2021</xref>).</p>
<p>To merge taxonomies from GG and GTDB, we used superstring matching and RESCRIPt to generate a consensus taxonomy based on the last common ancestor (<xref ref-type="bibr" rid="B61">Robeson et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B8">Bokulich et&#xa0;al., 2022</xref>). A phylogenetic tree was built by aligning ASVs with MAFFT, processed with FastTree, and midpoint-rooted using the phangorn package (v2.11.1) in R (<xref ref-type="bibr" rid="B55">Price et&#xa0;al., 2010</xref>; <xref ref-type="bibr" rid="B41">Katoh and Standley, 2013</xref>). <xref ref-type="supplementary-material" rid="SF4"><bold>Supplementary Section 4</bold></xref> outlines the bioinformatics pipelines implemented.</p>
<p>For each dataset, we compared the performance of the previously implemented pipelines with DADA2. We used the Shannon index to measure alpha diversity. For beta diversity, we tested the marginal significance of available participant characteristics and disease status using the PERMANOVA test (did not implement multiple testing correction), along with distance measures reported in individual studies (such as Bray-Curtis, calculated using the relative abundances of taxonomic features, and (weighted or unweighted) UniFrac, which considers the phylogeny of the taxonomic features as well) (<xref ref-type="bibr" rid="B10">Bray and Curtis, 1957</xref>; <xref ref-type="bibr" rid="B47">Lozupone et&#xa0;al., 2011</xref>; <xref ref-type="bibr" rid="B4">Anderson, 2017</xref>). For the DADA2 pipeline, we used the Bray-Curtis, Jaccard (<xref ref-type="bibr" rid="B58">Real and Vargas, 1996</xref>), UniFrac, and Jensen-Shannon Divergence (<xref ref-type="bibr" rid="B30">Fuglede and Topsoe, 2004</xref>) metrics. The weighted UniFrac metric was calculated using the phylogenetic tree generated from ASVs. Both alpha and beta diversity were measured at the ASV level. To identify differentially abundant taxa in EC participants, we aggregated the abundance tables to the species level (or the genus taxonomic level, if unassigned at the species level) and used ANCOM-BC with multiple testing adjustment using the Holm method (<xref ref-type="bibr" rid="B45">Lin and Peddada, 2020</xref>). All microbiome metrics evaluated were adjusted for potential confounding patient factors, including age, BMI, and ethnicity. The pipeline that consistently demonstrated trends reported in literature for alpha/beta diversity and associated EC taxa was selected for downstream predictive modeling.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Data integration, model development, and selection of validation cohort</title>
<p>We compared several data integration strategies to develop a predictive vaginal microbiome signature for EC (<xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>). One study, Antonio et&#xa0;al (<xref ref-type="bibr" rid="B73">Walther-Ant&#xf3;nio et&#xa0;al., 2016</xref>). was set aside for validation. This was selected because it was not too large nor too small and had balanced number of EC and benign diagnoses. The remaining studies were used for model training. Our <italic>baseline approach</italic> used early integration, where datasets were concatenated into a single high-dimensional matrix (<xref ref-type="bibr" rid="B54">Picard et&#xa0;al., 2021</xref>). In the <italic>batch-corrected</italic> approach, we used ComBat <xref ref-type="bibr" rid="B38">Johnson et&#xa0;al., 2007</xref>) to adjust for batch effects and assessed clustering by batch versus disease status using the PERMANOVA test (<xref ref-type="bibr" rid="B4">Anderson, 2017</xref>).</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Depiction of modelling frameworks implementing different integration strategies: 1) Early integration where all datasets are aggregated into a single data frame prior to modelling with <bold>(A)</bold> Non-batch corrected and <bold>(B)</bold> ComBat batch-corrected data. 2) Late integration where a local model is trained on each study data and final predictions are averaged across all studies. Models built using <bold>(C)</bold> microbiome data only, <bold>(D)</bold> patient characteristics only, and <bold>(E)</bold> both microbiome and patient characteristic data. The best-performing ensemble was selected based on internal validation (out of fold) error and evaluated on the held-out dataset.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcimb-15-1641413-g001.tif">
<alt-text content-type="machine-generated">Illustration of modelling frameworks using different integration strategies: Panels A and B depict early integration, where all datasets are combined into a single data frame before modelling with either non-batch-corrected or ComBat batch-corrected data. Panels C, D, and E depict late integration, with models trained on each study separately and final predictions averaged. Models are built using microbiome data only, patient characteristics only, or both. The best-performing ensemble is selected based on internal validation error and evaluated on a held-out dataset.</alt-text>
</graphic></fig>
<p>We also evaluate <italic>a late integration</italic> strategy, where separate classifiers were trained on individual datasets and combined to generate ensemble predictions (<xref ref-type="bibr" rid="B49">Mienye and Sun, 2022</xref>). This allowed the inclusion of patient-level characteristics when available and permitted the use of different algorithms for different datasets (<xref ref-type="bibr" rid="B49">Mienye and Sun, 2022</xref>).</p>
<p>Under late integration, we trained four models; (1) a microbiome-only model, (2) a model using patient characteristics (age, BMI, and ethnicity) only, (3) a model combining patient characteristics and microbiome data, (4) a model using patient characteristics and vaginal pH as a microbiome biomarker, since vaginal pH is a consequence of the vaginal microbiome composition and associated with EC risk (<xref ref-type="bibr" rid="B73">Walther-Ant&#xf3;nio et&#xa0;al., 2016</xref>).</p>
<p>All models were built with CLR transformed, genus-level count data and adhering to the Transparent Reporting of a multivariable prediction model for Individual Prognosis Or Diagnosis + Artificial Intelligence guidelines (<xref ref-type="bibr" rid="B18">Collins et&#xa0;al., 2024</xref>) (checklist in <xref ref-type="supplementary-material" rid="SF5"><bold>Supplementary Section 5</bold></xref>). Taxa present in fewer than 5% of samples were filtered out. For each integration strategy, we evaluated the following algorithms: random forest algorithm (randomForest v4.7-1.1), gradient tree boosting algorithm (xgboost v1.7.7.1), and neural network algorithm (nnet v7.3-19) using the tidymodels (v1.2.0) package. We also evaluated ensembles comprising three base models. Hyperparameter tuning was performed using grid search (details in <xref ref-type="supplementary-material" rid="SF6"><bold>Supplementary Section 6</bold></xref>) with a five-fold stratified cross-validation, repeated three times, and optimized for F1 score at a 0.5 classification threshold. Final ensemble models were selected based on out-of-fold performance and tested on a held-out test. To demonstrate the generalizability of the model that performs the best on the held-out dataset, we implemented a leave-one-study-out (LOSO) validation approach, retraining the models each time to only include the covariates available in the held-out dataset.</p>
<p>We reported sensitivity, specificity, negative predictive value (NPV), positive predictive value (PPV), and area under the receiver operating characteristics (AUROC) using the yardstick (v1.3.1) package. For the LOSO validation, we report pooled metrics. Exact 95% confidence intervals were calculated using epiR (v2.0.80). To correct for class imbalance, we used SMOTENC from the themis package (v1.0.3). For datasets with partially missing data, Multivariate Imputation by Chained Equations (mice v3.16.0) was used to impute missing values, conditioned on other participant data and microbiome profile. All analyses were performed in R (v4.3.3) and RStudio (v2023.06.2). Executable code and pipeline parameters are available on Github<xref ref-type="fn" rid="fn2"><sup>2</sup></xref>.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Datasets &amp; bioinformatics pipelines</title>
<p>The systematic literature review yielded 248 articles, of which 11 used 16S rRNA amplicon sequencing to assess vaginal microbiome profiles. We excluded 5 studies that relied on invasive tissue sampling and 1 study that did not provide access to sequencing data, resulting in 5 eligible datasets for analysis.</p>
<p>The first study, published by the Mayo Clinic in 2016, included 22 participants and investigated the vaginal microbiome composition and its putative role in EC (<xref ref-type="bibr" rid="B73">Walther-Ant&#xf3;nio et&#xa0;al., 2016</xref>). The study reported significant age differences between EC patients and controls with benign conditions (mean age 62 vs 47&#xa0;years). A follow-up study by the same group in 2019 expanded the cohort to 149 participants and evaluated participant characteristics, including menopausal status, BMI, vaginal pH, and age, and their associations with microbial composition (<xref ref-type="bibr" rid="B72">Walsh et&#xa0;al., 2019</xref>). This second cohort demonstrated differences in EC patients compared to those with benign diagnoses. Notably, EC patients were older, had higher BMI and vaginal pH (<xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>).</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Patient and study characteristics in included cohorts.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Study (Year)</th>
<th valign="middle" align="left">16S rRNA region</th>
<th valign="middle" align="left">Benign (N&#xa0;=&#xa0;130)</th>
<th valign="middle" align="left">EC (N&#xa0;=&#xa0;135)</th>
<th valign="middle" align="left">Total (N&#xa0;=&#xa0;265)</th>
<th valign="middle" align="left">p-value</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left"><xref ref-type="bibr" rid="B73">Walther-Ant&#xf3;nio et al., 2016</xref></td>
<td valign="middle" align="right">V3-V5</td>
<td valign="middle" align="right">10</td>
<td valign="middle" align="right">12</td>
<td valign="middle" align="right">22</td>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="left">Age, mean &#xb1; SD</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right">47.1 &#xb1; 9.0</td>
<td valign="middle" align="right">62.3 &#xb1; 8.7</td>
<td valign="middle" align="right">63.6 &#xb1; 8.6</td>
<td valign="middle" align="right"><bold>&lt; 0.05</bold></td>
</tr>
<tr>
<td valign="middle" align="left">BMI, mean &#xb1; SD</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right">29.2 &#xb1; 6.89</td>
<td valign="middle" align="right">34.8 &#xb1; 7.7</td>
<td valign="middle" align="right">35.4 &#xb1; 8.1</td>
<td valign="middle" align="right">0.08</td>
</tr>
<tr>
<td valign="middle" align="left">Vaginal pH</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right"/>
<td valign="middle" align="right"/>
<td valign="middle" align="right"/>
<td valign="middle" align="right">0.17</td>
</tr>
<tr>
<td valign="middle" align="right">&lt;= 4.5 (%)</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right">5 (50%)</td>
<td valign="middle" align="right">2 (16.7%)</td>
<td valign="middle" align="right">7 (31.8%)</td>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="right">&gt; 4.5 (%)</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right">5 (50%)</td>
<td valign="middle" align="right">9 (75%)</td>
<td valign="middle" align="right">14 (63.6%)</td>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="right">NA (%)</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right"/>
<td valign="middle" align="right">1 (8.3%)</td>
<td valign="middle" align="right">1 (4.5%)</td>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="left">Ethnicity</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right"/>
<td valign="middle" align="right"/>
<td valign="middle" align="right"/>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="right">White (%)</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right">10 (100%)</td>
<td valign="middle" align="right">12 (100%)</td>
<td valign="middle" align="right">22 (100%)</td>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="left"><xref ref-type="bibr" rid="B72">Walsh et&#xa0;al., 2019</xref></td>
<td valign="middle" align="right">V3-V5</td>
<td valign="middle" align="right">80</td>
<td valign="middle" align="right">69</td>
<td valign="middle" align="right">149</td>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="left">Age, mean &#xb1; SD</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right">50.4 &#xb1; 10.2</td>
<td valign="middle" align="right">61.3 &#xb1; 10.8</td>
<td valign="middle" align="right">55.4 &#xb1; 11.5</td>
<td valign="middle" align="right"><bold>&lt; 0.05</bold></td>
</tr>
<tr>
<td valign="middle" align="left">BMI, mean &#xb1; SD</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right">31.7 &#xb1; 9.0</td>
<td valign="middle" align="right">35.6 &#xb1; 9.6</td>
<td valign="middle" align="right">33.7 &#xb1; 9.4</td>
<td valign="middle" align="right"><bold>&lt; 0.05</bold></td>
</tr>
<tr>
<td valign="middle" align="left">Vaginal pH</td>
<td valign="middle" align="left"/>
<td valign="middle" align="left"/>
<td valign="middle" align="left"/>
<td valign="middle" align="left"/>
<td valign="middle" align="right"><bold>&lt; 0.05</bold></td>
</tr>
<tr>
<td valign="middle" align="right">&lt;= 4.5 (%)</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right">25 (31.3%)</td>
<td valign="middle" align="right">3 (4.3%)</td>
<td valign="middle" align="right">28 (18.8%)</td>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="right">&gt; 4.5 (%)</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right">47 (58.8%)</td>
<td valign="middle" align="right">55 (79.7%)</td>
<td valign="middle" align="left">102 (68.5%)</td>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="center">NA (%)</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right">8 (10%)</td>
<td valign="middle" align="right">11 (15.9%)</td>
<td valign="middle" align="right">19 (12.8%)</td>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="left">Ethnicity</td>
<td valign="middle" align="left"/>
<td valign="middle" align="left"/>
<td valign="middle" align="left"/>
<td valign="middle" align="left"/>
<td valign="middle" align="right">0.06</td>
</tr>
<tr>
<td valign="middle" align="right">White (%)</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right">59 (73.8%)</td>
<td valign="middle" align="right">60 (87%)</td>
<td valign="middle" align="right">110 (73.8%)</td>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="right">Other (%)</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right">21 (26.3%)</td>
<td valign="middle" align="right">9 (13%)</td>
<td valign="middle" align="right">30 (20.1%)</td>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="left"><xref ref-type="bibr" rid="B70">Tsementzi et al, 2020</xref></td>
<td valign="middle" align="right">V4</td>
<td valign="middle" align="right">28</td>
<td valign="middle" align="right">8</td>
<td valign="middle" align="right">36</td>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="left">Age, mean &#xb1; SD</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right">63.7 &#xb1; 6.4</td>
<td valign="middle" align="right">61.1 &#xb1; 5.4</td>
<td valign="middle" align="right">63.5 &#xb1; 6.3</td>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="left">BMI, mean &#xb1; SD</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right">27.1 &#xb1; 6.5</td>
<td valign="middle" align="right">32.0 &#xb1; 8.4</td>
<td valign="middle" align="right">26.9 &#xb1; 6.5</td>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="left">Vaginal pH</td>
<td valign="middle" align="left"/>
<td valign="middle" align="left"/>
<td valign="middle" align="right"/>
<td valign="middle" align="right"/>
<td valign="middle" align="right"><bold>&lt; 0.05</bold></td>
</tr>
<tr>
<td valign="middle" align="right">&lt;= 4.5 (%)</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right">4 (14.3%)</td>
<td valign="middle" align="right">1 (12.5%)</td>
<td valign="middle" align="right">5 (13.9%)</td>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="right">&gt; 4.5 (%)</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right">21 (75%)</td>
<td valign="middle" align="right">3 (37.5%)</td>
<td valign="middle" align="right">24 (66.7%)</td>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="right">NA (%)</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right">3 (10.7%)</td>
<td valign="middle" align="right">4 (50%)</td>
<td valign="middle" align="right">7 (19.4%)</td>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="left">Ethnicity</td>
<td valign="middle" align="left"/>
<td valign="middle" align="left"/>
<td valign="middle" align="right"/>
<td valign="middle" align="right"/>
<td valign="middle" align="right">0.11</td>
</tr>
<tr>
<td valign="middle" align="right">White (%)</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right">17 (60.7%)</td>
<td valign="middle" align="right">2 (25%)</td>
<td valign="middle" align="right">19 (52.8%)</td>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="right">Other (%)</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right">11 (39.3%)</td>
<td valign="middle" align="right">6 (75%)</td>
<td valign="middle" align="right">17 (47.2%)</td>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="left"><xref ref-type="bibr" rid="B32">Gressel et al, 2021</xref></td>
<td valign="middle" align="right">V4</td>
<td valign="middle" align="right">4</td>
<td valign="middle" align="right">23</td>
<td valign="middle" align="right">27</td>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="left"><xref ref-type="bibr" rid="B15">Chao et&#xa0;al., 2022</xref></td>
<td valign="middle" align="right">V3-V4</td>
<td valign="middle" align="right">8</td>
<td valign="middle" align="right">23</td>
<td valign="middle" align="right">31</td>
<td valign="middle" align="right"/>
</tr>
<tr>
<td valign="middle" align="left">Age, mean &#xb1; SD</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right">44.6 &#xb1; 12.2</td>
<td valign="middle" align="right">44.4 &#xb1; 11.1</td>
<td valign="middle" align="right">43.3 &#xb1; 10.5</td>
<td valign="middle" align="right">0.77</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Significant differences (p &lt; 0.05) are indicated in bold.</p></fn>
</table-wrap-foot>
</table-wrap>
<p><xref ref-type="bibr" rid="B70">Tsementzi et&#xa0;al., 2020</xref> analyzed vaginal microbiome composition in EC and cervical cancer (n=38) compared to participants with benign conditions. EC patients had significantly higher vaginal pH (&gt; 4.5) compared to controls. <xref ref-type="bibr" rid="B32">Gressel et&#xa0;al., 2021</xref>, examined postmenopausal women (n=27), profiling cervicovaginal microbiota to identify taxa associated with EC. Most recently, <xref ref-type="bibr" rid="B15">Chao et&#xa0;al., 2022</xref> used lavage-based vaginal sampling to compare microbial taxa associated with EC and benign diagnoses (n=31). Patient characteristics (age, BMI, ethnicity, and vaginal pH) were available for all cohorts except Chao et&#xa0;al. (age only) and Gressel et&#xa0;al. (no patient metadata). The available demographic and clinical characteristics of the participants are summarized in <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>.</p>
<p>Biological specimens across all studies were collected by physicians; all cohorts were recruited in the United States of America except for Chao et&#xa0;al., which enrolled participants in China. Genomic DNA extraction protocol varied: the Mayo Clinic studies used the MoBio PowerSoil Kit, Tsementzi et&#xa0;al. used the DNeasy PowerSoil Kit, and Gressel and Chao et&#xa0;al. used the QIAamp DNA Mini Kit. All studies sequenced the 16S rRNA gene on the Illumina MiSeq platform, targeting various hypervariable regions, with V4 being the common region across all datasets. Study-specific details, including inclusion/exclusion criteria, sampling and storage protocols, DNA extraction, primer design, and controls, are provided in <xref ref-type="supplementary-material" rid="SF7"><bold>Supplementary Section 7</bold></xref>.</p>
<p>Bioinformatic pipelines also varied across studies: Antonio et&#xa0;al. and Walsh et&#xa0;al. employed IM-TORNADO (<xref ref-type="bibr" rid="B37">Jeraldo et&#xa0;al., 2014</xref>), an in-house pipeline that concatenates paired end-reads and processes them using Mothur and USEARCH to generate Operational Taxonomic Units (OTUs) (<xref ref-type="bibr" rid="B66">Schloss et&#xa0;al., 2009</xref>; <xref ref-type="bibr" rid="B25">Edgar, 2010</xref>). Chao et&#xa0;al. used UNOISE to denoise sequences and produce ASVs (<xref ref-type="bibr" rid="B26">Edgar and Flyvbjerg, 2015</xref>). Tsementzi et&#xa0;al. Gressel et&#xa0;al. used QIIME2 with VSEARCH and USEARCH, respectively, to generate OTUs (<xref ref-type="bibr" rid="B25">Edgar, 2010</xref>; <xref ref-type="bibr" rid="B63">Rognes et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B9">Bolyen et&#xa0;al., 2019</xref>). We reprocessed all the datasets using those four pipelines and added a pipeline based on DADA2 (a total of 25 data/pipeline combinations).</p>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Microbial diversity in various conditions</title>
<p>All included studies originally reported increased vaginal microbial (alpha) diversity in EC patients compared to controls. We were able to reproduce this trend across datasets; however, results varied depending on the bioinformatics pipeline used (<xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2A</bold></xref>). The expected trend was not observed in the Tsementzi cohort when processed using the IM-TORNADO pipeline, nor in the Walsh cohort processed with the Tsementzi pipeline. Similarly, alpha diversity was not recapitulated in the Tsementzi and Chao datasets using the Gressel pipeline or in the Antonio dataset using the Chao pipeline. Among all datasets, the Gressel cohort demonstrated the most consistent trends across pipelines and the best quality profile (see <xref ref-type="supplementary-material" rid="SF3"><bold>Supplementary Section 3</bold></xref>). However, their pipeline, which does not filter for chimeric reads, systematically produced inflated alpha diversity values in other datasets.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Reproducibility and replicability of alpha, beta diversity and differentially abundant taxa. <bold>(A)</bold> Each panel provides the Shannon index (y-axis) calculated across the two health conditions on each cohort (x-axis) after processing by the pipeline in the respective panel. The last panel is the DADA2 pipeline; the only pipeline that demonstrated higher Shannon index in EC in comparison to benign across all datasets. <bold>(B)</bold> Each panel represents the marginal proportion of variance explained by participant characteristics (y-axis) available in each dataset when (x-axis) processed by the pipeline in the respective panel. Consistent trends were observed across pipelines. Health conditions explain less than 16% of variance in all datasets, whereas individual characteristics appear to have more influence on the structure and composition of the vaginal microbiome.<bold>(C)</bold> Illustration of differentially abundant taxa (using ANCOM-BC) while accounting participant characteristics (nodes in the network plot) where available. The thickness of the edges represents the number of studies that were found to have a multivariate association between two nodes. Various species of <italic>Peptoniphilus</italic> were differentially abundant in EC individuals in all studies.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcimb-15-1641413-g002.tif">
<alt-text content-type="machine-generated">Illustration of microbiome diversity metrics: Panel A shows box plots of alpha diversity (Shannon index) across pipelines and health conditions (benign vs. endometrial cancer). Panel B displays a heatmap of beta diversity R-squared values across covariates (ethnicity, age, BMI, health condition) for each pipeline. Panel C presents a network of differentially abundant taxa associated with these covariates, with edge thickness representing the number of studies supporting each association.</alt-text>
</graphic></fig>
<p>In contrast, processing with DADA2 consistently replicated the expected alpha diversity trend across all datasets (with no significant differences observed), suggesting greater robustness to technical variability.</p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Intra-variability vs inter-variability</title>
<p>Beta diversity appeared to be less sensitive to preprocessing differences (<xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2B</bold></xref>). Biological trends remained largely consistent across pipelines and metrics. We note that disease status accounted for less than 16% of the variance in all cohorts, regardless of the pipeline or distance metric (see <xref ref-type="supplementary-material" rid="SF8"><bold>Supplementary Section 8</bold></xref>). By contrast, when available, participant-level characteristics, such as ethnicity, age, and BMI, explained a greater proportion of variance.</p>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Vaginal microbiome species associated with health status</title>
<p>Using ANCOM-BC, we adjusted for available covariates such as age, BMI, and ethnicity to identify differential abundant taxonomies associated with disease status (<xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2C</bold></xref>). Across all datasets, various species of <italic>Peptoniphilus</italic> were consistently associated with disease status. Specifically, we observed enrichment of <italic>Peptoniphilus urinimassilliensis</italic> in the Walsh dataset<italic>, Peptoniphilus coxii</italic> in the Antonio and Chao datasets, and <italic>Peptoniphilus</italic> sp000478985 and sp900099555 in the Tsementzi and Gressel datasets, respectively. Notably, <italic>Peptoniphilus coxii</italic> and sp900099555 were also associated with BMI and age, respectively, in the Antonio and Walsh dataset. In addition to <italic>Peptoniphilus</italic>, species from the genera <italic>Prevotella</italic>, <italic>Streptococcus</italic>, and <italic>Blautia</italic> were differentially abundant across four of the five datasets, with the exception of Walsh et&#xa0;al., where these taxa were instead associated with BMI or ethnicity, both known EC risk factors.</p>
<p>Differentially abundant taxa reported in the original studies could not be reproduced using the published bioinformatics pipelines but were consistently recovered using the DADA2 pipeline. The DADA2 pipeline consistently outperformed other methods in identifying consistent ecological trends and was therefore selected as the primary preprocessing pipeline for model development.</p>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Model development and validation</title>
<p>Four studies (Chao, Gressel, Tsementzi, and Walsh) were used for model training and one for performance evaluation (Antonio). Various data integration and modeling strategies were assessed, as outlined in <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;1</bold></xref>. Performance metrics for all ensemble models are detailed in <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref> and <xref ref-type="supplementary-material" rid="SF9"><bold>Supplementary Section 9</bold></xref>.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Performance metrics for early integration (non-batch corrected and ComBat batch-corrected) and late integration ensembles (microbiome data only, participant characteristics only, and participant characteristics and microbiome data) and associated 95% confidence intervals.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="right">Model</th>
<th valign="middle" align="left">NPV</th>
<th valign="middle" align="left">Sensitivity</th>
<th valign="middle" align="left">PPV</th>
<th valign="middle" align="left">Specificity</th>
<th valign="middle" align="left">AUROC</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" colspan="6" align="left"><italic>Early integration:</italic></td>
</tr>
<tr>
<td valign="middle" align="right"><italic>Non-batch corrected data</italic></td>
<td valign="middle" align="left">NaN [0, 1]</td>
<td valign="middle" align="left">1 [0.74, 1]</td>
<td valign="middle" align="left">0.55 [0.32, 0.76]</td>
<td valign="middle" align="left">0 [0, 0.31]</td>
<td valign="middle" align="left">0.49 [0.23, 0.49]</td>
</tr>
<tr>
<td valign="middle" align="right"><italic>Batch-corrected data</italic></td>
<td valign="middle" align="left">NaN [0, 1]</td>
<td valign="middle" align="left">1 [0.74, 1]</td>
<td valign="middle" align="left">0.55 [0.32, 0.76]</td>
<td valign="middle" align="left">0 [0, 0.31]</td>
<td valign="middle" align="left">0.5 [0.5, 0.5]</td>
</tr>
<tr>
<td valign="middle" colspan="6" align="left"><italic>Late integration ensemble:</italic></td>
</tr>
<tr>
<td valign="middle" align="right"><italic>Microbiome data only</italic></td>
<td valign="middle" align="left">0.64 [0.31, 0.89]</td>
<td valign="middle" align="left">0.67 [0.35, 0.90]</td>
<td valign="middle" align="left">0.73 [0.39, 0.94]</td>
<td valign="middle" align="left">0.7 [0.35, 0.93]</td>
<td valign="middle" align="left">0.63 [0.38, 0.63]</td>
</tr>
<tr>
<td valign="middle" align="right"><italic>Participant characteristics only</italic></td>
<td valign="middle" align="left">0.69 [0.39, 0.91]</td>
<td valign="middle" align="left">0.67 [0.35, 0.90]</td>
<td valign="middle" align="left"><bold>0.89 [0.52, 1]</bold></td>
<td valign="middle" align="left"><bold>0.9 [0.56, 1]</bold></td>
<td valign="middle" align="left">0.88 [0.74, 0.88]</td>
</tr>
<tr>
<td valign="middle" align="right"><italic>Participant characteristics and vaginal pH</italic></td>
<td valign="middle" align="left">0.64 [0.35, 0.87]</td>
<td valign="middle" align="left">0.58 [0.28, 0.85]</td>
<td valign="middle" align="left">0.88 [0.47, 1]</td>
<td valign="middle" align="left">0.9 [0.56, 1]</td>
<td valign="middle" align="left">0.87 [0.71, 0.87]</td>
</tr>
<tr>
<td valign="middle" align="right"><italic>Participant characteristics and microbiome data</italic></td>
<td valign="middle" align="left"><bold>1 [0.59, 1]</bold></td>
<td valign="middle" align="left"><bold>1 [0.74, 1]</bold></td>
<td valign="middle" align="left">0.8 [0.52, 0.96]</td>
<td valign="middle" align="left">0.7 [0.35, 0.93]</td>
<td valign="middle" align="left"><bold>0.93 [0.71, 0.93]</bold></td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>NaN, Undefined, NPV, Negative Predictive Value, PPV, Positive Predictive Value, AUROC, Area Under Receiving Operating Characteristic curve (AUROC)</p></fn>
<fn>
<p>The best-performing model for each metric is indicated in bold.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>Early integration models, which pooled data prior to modeling, showed limited ability to correctly identify benign cases. Batch correction using ComBat removed study-specific microbiome clustering (adonis PERMANOVA) while preserving variance due to disease status (see <xref ref-type="supplementary-material" rid="SF10"><bold>Supplementary Section 10</bold></xref><bold>).</bold> However, models trained on batch-corrected or uncorrected data performed similarly in identifying EC cases, both achieving perfect sensitivity, but very poor specificity (0; 95% CI; [0, 0.31]), which resulted in undefined NPV.</p>
<p>Late integration models using only microbiome data achieved moderate performance in a held-out test set (sensitivity: 0.67 (95% CI; 0.35-0.90) and NPV: 0.64 (95% CI; 0.31-0.89)). Predicting with participant metadata where available: age, BMI, and ethnicity for Tsementzi and Walsh; age only for Chao&#x2014;improved specificity 0.9 (95% CI; 0.56-1) but reduced sensitivity to 0.67 (95% CI; 0.35-0.90) and NPV to 0.69 (95% CI; 0.39-0.91). Including vaginal pH, a downstream biomarker of microbiome shifts, with other patient characteristics, did not enhance predictive value (<xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>).</p>
<p>The highest-performing model was an ensemble approach that integrated both microbial and host characteristics where available. Applied to the held-out test set, this model achieved perfect sensitivity of 1.0 (95% CI; 0.74-1) and NPV of 1.0 (95% CI; 0.59-1), with a specificity of 0.7 (95% CI; 0.35-0.93) and AUROC of 0.93 (95% CI; 0.71-0.93). Feature importance analysis across ensemble frameworks (<xref ref-type="supplementary-material" rid="SF11"><bold>Supplementary Section 11</bold></xref>) identified <italic>Lactobacillus</italic>, <italic>Prevotella</italic>,</p>
<p><italic>Peptoniphilus</italic>, <italic>Porphyromonas</italic>, <italic>Peptostreptococcus</italic>, and <italic>Streptococcus</italic> among the top 10 predictors, consistent with ANCOM-BC findings.</p>
<p>The LOSO validation of the ensemble model resulted in a pooled AUROC of 0.7 (95% CI; 0.6-0.7), ranging from 0.7 to 1.0 across individual studies, except for Tsementzi et&#xa0;al., which had the lowest data quality and an AUROC of 0.5 (<xref ref-type="supplementary-material" rid="SF12"><bold>Supplementary Section 12</bold></xref>). The validation on the Chao et&#xa0;al. dataset, achieving perfect discrimination (AUROC of 1.0; 95% CI: [1-1]) and an NPV of 1.0 (95% CI: [0.9-1]), while the discrimation on the Walsh et&#xa0;al. and Gressel et&#xa0;al. datasets was moderate, with AUROC values of 0.8 (95% CI: [0.7-0.8]) and 0.7 (95% CI: 0.4-1), and NPVs of 0.8 (95% CI: 0.7-0.9) and 0.6 (95% CI: 0.4-0.8), respectively.</p>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>In this study, we leveraged publicly available 16S rRNA gene sequencing data from five cohorts to evaluate the potential of vaginal microbiome data for a non-invasive screening approach for EC. We assessed the reproducibility and replicability of published findings and developed ensemble machine-learning models integrating microbial and host-specific data to predict EC status.</p>
<p>Our findings align with prior work emphasizing reproducibility challenges in microbiome research (<xref ref-type="bibr" rid="B65">Schloss, 2018</xref>; <xref ref-type="bibr" rid="B40">Kang et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B64">Rojas-Velazquez et&#xa0;al., 2024</xref>). These limitations often stem from a lack of standardized protocols, incomplete reporting of normalization techniques, and inconsistencies in the versions and parameter settings of the bioinformatics tools used. Additionally, participant-specific characteristics like age, BMI, and ethnicity, all known to strongly influence vaginal microbiome (<xref ref-type="bibr" rid="B57">Ravel et&#xa0;al., 2011</xref>; <xref ref-type="bibr" rid="B35">Hickey et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B68">Si et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B43">&#x141;aniewski et&#xa0;al., 2020</xref>), are frequently unreported and unaccounted for in analyses. Reporting guidelines, such as the &#x201c;Strengthening the Organization and Reporting of Microbiome Guidelines&#x201d;, which outline reporting standards for microbiome studies, remain underutilized despite their potential to improve transparency and reproducibility (<xref ref-type="bibr" rid="B50">Mirzayi et&#xa0;al., 2021</xref>).</p>
<p>While we successfully recapitulated broad biological patterns, such as increased alpha diversity in EC cases, the magnitude and statistical significance of these trends varied across bioinformatics pipelines. Alpha diversity appeared particularly sensitive to preprocessing, reinforcing the importance of rigorous, consistent data filtering (<xref ref-type="bibr" rid="B52">Nearing et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B40">Kang et&#xa0;al., 2021</xref>). In contrast, beta diversity measures were more robust, revealing that host characteristics, rather than disease status, explained a greater proportion of variance across all datasets. These findings highlight the dominant influence of inter-individual variability on disease-driven microbiome shifts and underscore the need to incorporate participant metadata into analytical models.</p>
<p>We identified multiple species of <italic>Peptoniphilus</italic> as enriched in EC cases, consistent with prior findings from <xref ref-type="bibr" rid="B73">Walther-Ant&#xf3;nio et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B70">Tsementzi et&#xa0;al., 2020</xref>. Interestingly, <italic>Peptoniphilus</italic> was also associated with non-cancer traits, such as menopausal status and high vaginal pH, in the <xref ref-type="bibr" rid="B72">Walsh et&#xa0;al., 2019</xref> data. These findings suggest that the observed associations with EC would be a consequence of local inflammation. This genus, a Gram-positive anaerobic coccus commonly found on mucosal surfaces, has previously been implicated in bacterial vaginosis and several gynecologic cancers (<xref ref-type="bibr" rid="B21">Diop et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B74">Wang et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B5">Asangba et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B28">Fong Amaris et&#xa0;al., 2024</xref>), as well as cancer of the mouth and gastrointestinal tracts (<xref ref-type="bibr" rid="B51">Murphy and Frick, 2013</xref>).</p>
<p>We additionally observed that various species of <italic>Prevotella</italic>, <italic>Streptococcus</italic>, and <italic>Blautia</italic> were associated with disease status across multiple datasets. Tsementzi et&#xa0;al. reported an association between <italic>Prevotella</italic> and cancer (<xref ref-type="bibr" rid="B70">Tsementzi et&#xa0;al., 2020</xref>). Species within this genus are commonly linked to HPV infections and are known to drive chronic mucosal inflammation, leading to tissue damage and potentially promoting oncogenesis (<xref ref-type="bibr" rid="B44">Larsen, 2017</xref>; <xref ref-type="bibr" rid="B23">Dong et&#xa0;al., 2022</xref>). Similarly, <italic>Streptococcus</italic> species can also act as pathogens by producing pro-inflammatory cytokines and activating carcinogenic pathways (<xref ref-type="bibr" rid="B6">Biarc et&#xa0;al., 2004</xref>; <xref ref-type="bibr" rid="B2">Abdulamir et&#xa0;al., 2009</xref>, <xref ref-type="bibr" rid="B3">Abdulamir et&#xa0;al., 2010</xref>; <xref ref-type="bibr" rid="B42">Kumar et&#xa0;al., 2017</xref>). Lastly, Antonio et&#xa0;al. found that <italic>Blautia</italic> was enriched in benign specimens and was associated with good outcomes, as evidenced by its inverse correlation with obesity and its ability to alleviate metabolic syndrome (<xref ref-type="bibr" rid="B46">Liu et&#xa0;al., 2021</xref>).</p>
<p>Final classifiers were validated and compared in an independent held-out dataset. We observed that batch correction techniques may overcorrect genuine biological signals (such as age, ethnicity, and varying inclusion criteria across studies), when these factors are not explicitly modeled. Early integration with batch correction yielded a sensitivity of 1 and a specificity of 0, underscoring the risk of overcorrection when relevant metadata are inconsistently available across cohorts.</p>
<p>In contrast, our late-integration ensemble model, which incorporates participant characteristics when available, achieved an AUROC of 0.93, NPV of 1, and a sensitivity of 1, demonstrating a strong potential utility for ruling out EC in symptomatic individuals. Although the small test set size (n = 22) yielded wide confidence intervals, these metrics indicate strong potential to rule out EC in individuals classified as negative without undergoing an endometrial biopsy. The model showed moderate specificity (0.7) and PPV (0.8), likely reflecting false positives among participants with undiagnosed gynecologic conditions. The overlap between classifier-derived feature importance and ANCOM-BC results strengthens the reliability of our findings. Using a LOSO validation, our framework yielded a pooled AUROC of 0.7, an NPV of 0.7, a sensitivity of 0.8, and a PPV of 0.6. Pooled LOSO validation was lower than when tested on Antonio et&#xa0;al., likely due to differences in cohort composition, missing demographic data, and primer variability.</p>
<p>Expanded training datasets that include participants with a broader range of benign conditions may improve specificity and PPV. Furthermore, transfer learning, a method that applies generalized patterns from diverse datasets to smaller, task- and cohort-specific tasks (<xref ref-type="bibr" rid="B16">Chong et&#xa0;al., 2022</xref>), could enhance classifier robustness. This approach could greatly benefit microbiome studies that often face data sparsity. For example, a colorectal cancer detection model trained on gut microbiome profiles from 20 different disease states outperformed one trained solely on colorectal cancer data, with an AUROC of 0.97 vs 0.6 (<xref ref-type="bibr" rid="B16">Chong et&#xa0;al., 2022</xref>).</p>
<p>Our study had several strengths. To our knowledge, this is the first EC prediction model using machine learning on 16S rRNA gene amplicon data across multiple cohorts (n = 243). All data were pre-processed using a uniform DADA2 pipeline, and individual-level patient characteristics were integrated where available. The classifier was validated using both a LOSO framework and in a single held-out test cohort, increasing confidence in its generalizability. However, limitations remain. The included cohorts primarily represent White participants, whereas EC disproportionately affects individuals from historically marginalized populations. For example, individuals who are Black, with obesity, single or widowed, have lower educational attainment, or live in rural areas, experience significantly higher incidence and mortality rates (<xref ref-type="bibr" rid="B22">Doll et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B62">Rodriguez et&#xa0;al., 2021</xref>). The lack of racial and socioeconomic diversity in microbiome studies risks perpetuating existing health disparities and should be addressed in future research. Additionally, differences in study protocols, such as the primers used, exclusion of participants on hormonal therapy in some studies (Tsementzi and Gressel) but not in others, may have introduced bias. Hormonal therapy is known to impact the composition of the vaginal microbiome, yet we were unable to adjust for this factor due to missing data.</p>
<p>If implemented as a screening tool, model performance should be evaluated in the same context as future applications &#x2014;for example, in individuals experiencing AUB, the most common indication for an endometrial biopsy. Although specimens in our source studies were physician-collected, previous research suggests self-collected vaginal swabs are both sensitive and acceptable, offering a promising route for non-invasive screening (<xref ref-type="bibr" rid="B19">Costas et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B13">Camara et&#xa0;al., 2021</xref>).</p>
<p>Lastly, our predictive model was developed using 16S rRNA gene amplicon sequence data. Future work should investigate whether alternative sequencing approaches, such as metagenomics or targeting other amplicons, such as the chaperonin gene (cpn60), can improve model performance. For example, shotgun metagenomics has been shown not only to distinguish between benign and malignant conditions but also to predict EC grade and stage (<xref ref-type="bibr" rid="B33">Hakimjavadi et&#xa0;al., 2022</xref>).</p>
<p>In conclusion, accurately identifying individuals who require an endometrial biopsy remains a challenge. While several studies have reported vaginal microbial signatures of EC, this is the first to integrate microbial and host data across cohorts using a machine-learning framework. Our ensemble model reliably identified EC cases, demonstrating high sensitivity and negative predictive value. Our findings support integrating microbial features and host characteristics to enable robust prediction of EC status and underscore the potential of microbiome-based screening tools. These results could be achieved through non-invasive, self-collection methods that may broaden access to early detection and interventions. Future research should focus on validating models across diverse populations and real-world clinical settings.</p>
</sec>
</body>
<back>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: Sequence Read Archive with accession IDs: PRJNA295859, PRJNA481576, PRJNA448161, PRJNA758386, PRJNA843535.</p></sec>
<sec id="s6" sec-type="ethics-statement">
<title>Ethics statement</title>
<p>Ethical approval was not required for the studies involving humans because this was a re-analysis of previously published, anonymized data. The studies were conducted in accordance with the local legislation and institutional requirements. Written informed consent for participation was not required from the participants or the participants' legal guardians/next of kin in accordance with the national legislation and institutional requirements because the primary data publishers must have obtained either consent or waiver of consent prior to publishing their data in a public repository.</p></sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>DD: Conceptualization, Funding acquisition, Investigation, Methodology, Project administration, Validation, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing, Data curation, Formal Analysis, Software, Visualization. AT: Conceptualization, Funding acquisition, Investigation, Methodology, Project administration, Validation, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing, Resources, Supervision.</p></sec>
<ack>
<title>Acknowledgments</title>
<p>We want to thank the Michael Smith Foundation for Health Research and Drs. Janet Hill (Department of Veterinary Microbiology, University of Saskatchewan), Deborah Money (Department of Obstetrics and Gynecology, University of British Columbia), and Sepideh Pakpour (School of Engineering, University of British Columbia) for their insightful feedback on the manuscript.</p>
</ack>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s10" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<sec id="SM1" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fcimb.2025.1641413/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fcimb.2025.1641413/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="DataSheet1.docx" id="SF1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"><label>Supplementary Section 1</label>
<caption>
<p>Search keywords.</p>
</caption></supplementary-material>
<supplementary-material xlink:href="DataSheet1.docx" id="SF2" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"><label>Supplementary Section 2</label>
<caption>
<p>Data lost due to pipeline specifications.</p>
</caption></supplementary-material>
<supplementary-material xlink:href="DataSheet1.docx" id="SF3" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"><label>Supplementary Section 3</label>
<caption>
<p>Read length maintained after quality filtering.</p>
</caption></supplementary-material>
<supplementary-material xlink:href="DataSheet1.docx" id="SF4" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"><label>Supplementary Section 4</label>
<caption>
<p>Pipelines implemented.</p>
</caption></supplementary-material>
<supplementary-material xlink:href="DataSheet1.docx" id="SF5" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"><label>Supplementary Section 5</label>
<caption>
<p>TRIPOD + AI checklist</p>
</caption></supplementary-material>
<supplementary-material xlink:href="DataSheet1.docx" id="SF6" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"><label>Supplementary Section 6</label>
<caption>
<p>Grids used for hyperparameter optimization.</p>
</caption></supplementary-material>
<supplementary-material xlink:href="DataSheet1.docx" id="SF7" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"><label>Supplementary Section 7</label>
<caption>
<p>Detailed summaries of papers included.</p>
</caption></supplementary-material>
<supplementary-material xlink:href="DataSheet1.docx" id="SF8" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"><label>Supplementary Section 8</label>
<caption>
<p>Beta diversity estimates using various distance metrics.</p>
</caption></supplementary-material>
<supplementary-material xlink:href="DataSheet1.docx" id="SF9" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"><label>Supplementary Section 9</label>
<caption>
<p>Confusion matrices for models implemented.</p>
</caption></supplementary-material>
<supplementary-material xlink:href="DataSheet1.docx" id="SF10" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"><label>Supplementary Section 10</label>
<caption>
<p>Batch correction</p>
</caption></supplementary-material>
<supplementary-material xlink:href="DataSheet1.docx" id="SF11" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"><label>Supplementary Section 11</label>
<caption>
<p>Variable importance plots: Variable importance for microbiome models.</p>
</caption></supplementary-material>
<supplementary-material xlink:href="DataSheet1.docx" id="SF12" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"><label>Supplementary Section 12</label>
<caption>
<p>Leave-one-study-out (LOSO) validation results.</p>
</caption></supplementary-material></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Abdill</surname> <given-names>R. J.</given-names></name>
<name><surname>Graham</surname> <given-names>S. P.</given-names></name>
<name><surname>Rubinetti</surname> <given-names>V.</given-names></name>
<name><surname>Ahmadian</surname> <given-names>M.</given-names></name>
<name><surname>Hicks</surname> <given-names>P.</given-names></name>
<name><surname>Chetty</surname> <given-names>A.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Integration of 168,000 samples reveals global patterns of the human gut microbiome</article-title>. <source>Cell</source> <volume>188</volume>, <fpage>1100</fpage>&#x2013;<lpage>1118.e17</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.cell.2024.12.017</pub-id>, PMID: <pub-id pub-id-type="pmid">39848248</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Abdulamir</surname> <given-names>A. S.</given-names></name>
<name><surname>Hafidh</surname> <given-names>R. R.</given-names></name>
<name><surname>Mahdi</surname> <given-names>L. K.</given-names></name>
<name><surname>Al-jeboori</surname> <given-names>T.</given-names></name>
<name><surname>Abubaker</surname> <given-names>F.</given-names></name>
</person-group> (<year>2009</year>). 
<article-title>Investigation into the controversial association of <italic>Streptococcus gallolyticus</italic> with colorectal cancer and adenoma</article-title>. <source>BMC Cancer</source> <volume>9</volume>, <elocation-id>403</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/1471-2407-9-403</pub-id>, PMID: <pub-id pub-id-type="pmid">19925668</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Abdulamir</surname> <given-names>A. S.</given-names></name>
<name><surname>Hafidh</surname> <given-names>R. R.</given-names></name>
<name><surname>Bakar</surname> <given-names>F. A.</given-names></name>
</person-group> (<year>2010</year>). 
<article-title>Molecular detection, quantification, and isolation of Streptococcus gallolyticus bacteria colonizing colorectal tumors: inflammation-driven potential of carcinogenesis via IL-1, COX-2, and IL-8</article-title>. <source>Mol. Cancer</source> <volume>9</volume>, <elocation-id>249</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/1476-4598-9-249</pub-id>, PMID: <pub-id pub-id-type="pmid">20846456</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Anderson</surname> <given-names>M. J.</given-names></name>
</person-group> (<year>2017</year>). &#x201c;
<article-title>Permutational multivariate analysis of variance (PERMANOVA)</article-title>,&#x201d; in <source>Wiley statsRef: statistics reference online</source> (<publisher-loc>Hoboken, New Jersey</publisher-loc>: 
<publisher-name>John Wiley &amp; Sons, Ltd</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>15</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/9781118445112.stat07841</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Asangba</surname> <given-names>A. E.</given-names></name>
<name><surname>Chen</surname> <given-names>J.</given-names></name>
<name><surname>Goergen</surname> <given-names>K. M.</given-names></name>
<name><surname>Larson</surname> <given-names>M. C.</given-names></name>
<name><surname>Oberg</surname> <given-names>A. L.</given-names></name>
<name><surname>Casarin</surname> <given-names>J.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Diagnostic and prognostic potential of the microbiome in ovarian cancer treatment response</article-title>. <source>Sci. Rep.</source> <volume>13</volume>, <fpage>730</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-023-27555-x</pub-id>, PMID: <pub-id pub-id-type="pmid">36639731</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Biarc</surname> <given-names>J.</given-names></name>
<name><surname>Nguyen</surname> <given-names>I. S.</given-names></name>
<name><surname>Pini</surname> <given-names>A.</given-names></name>
<name><surname>Gosse</surname> <given-names>F.</given-names></name>
<name><surname>Richert</surname> <given-names>S.</given-names></name>
<name><surname>Thierse</surname> <given-names>D.</given-names></name>
<etal/>
</person-group>. (<year>2004</year>). 
<article-title>Carcinogenic properties of proteins with pro-inflammatory activity from <italic>Streptococcus infantarius</italic> (formerly S.bovis )</article-title>. <source>Carcinogenesis</source> <volume>25</volume>, <fpage>1477</fpage>&#x2013;<lpage>1484</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/carcin/bgh091</pub-id>, PMID: <pub-id pub-id-type="pmid">14742316</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bokulich</surname> <given-names>N. A.</given-names></name>
<name><surname>Kaehler</surname> <given-names>B. D.</given-names></name>
<name><surname>Rideout</surname> <given-names>J. R.</given-names></name>
<name><surname>Dillon</surname> <given-names>M.</given-names></name>
<name><surname>Bolyen</surname> <given-names>E.</given-names></name>
<name><surname>Knight</surname> <given-names>R.</given-names></name>
<etal/>
</person-group>. (<year>2018</year>). 
<article-title>Optimizing taxonomic classification of marker-gene amplicon sequences with QIIME 2&#x2019;s q2-feature-classifier plugin</article-title>. <source>Microbiome</source> <volume>6</volume>, <fpage>90</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s40168-018-0470-z</pub-id>, PMID: <pub-id pub-id-type="pmid">29773078</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bokulich</surname> <given-names>N. A.</given-names></name>
<name><surname>&#x141;aniewski</surname> <given-names>P.</given-names></name>
<name><surname>Adamov</surname> <given-names>A.</given-names></name>
<name><surname>Chase</surname> <given-names>D. M.</given-names></name>
<name><surname>Caporaso</surname> <given-names>J. G.</given-names></name>
<name><surname>Herbst-Kralovetz</surname> <given-names>M. M.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Multi-omics data integration reveals metabolome as the top predictor of the cervicovaginal microenvironment</article-title>. <source>PloS Comput. Biol.</source> <volume>18</volume>, <fpage>e1009876</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pcbi.1009876</pub-id>, PMID: <pub-id pub-id-type="pmid">35196323</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bolyen</surname> <given-names>E.</given-names></name>
<name><surname>Rideout</surname> <given-names>J. R.</given-names></name>
<name><surname>Dillon</surname> <given-names>M. R.</given-names></name>
<name><surname>Bokulich</surname> <given-names>N. A.</given-names></name>
<name><surname>Abnet</surname> <given-names>C. C.</given-names></name>
<name><surname>Al-Ghalith</surname> <given-names>G. A.</given-names></name>
<etal/>
</person-group>. (<year>2019</year>). 
<article-title>Reproducible, interactive, scalable and extensible microbiome data science using QIIME 2</article-title>. <source>Nat. Biotechnol.</source> <volume>37</volume>, <fpage>852</fpage>&#x2013;<lpage>857</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41587-019-0209-9</pub-id>, PMID: <pub-id pub-id-type="pmid">31341288</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bray</surname> <given-names>J. R.</given-names></name>
<name><surname>Curtis</surname> <given-names>J. T.</given-names></name>
</person-group> (<year>1957</year>). 
<article-title>An ordination of the upland forest communities of southern wisconsin</article-title>. <source>Ecol. Monogr.</source> <volume>27</volume>, <fpage>325</fpage>&#x2013;<lpage>349</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.2307/1942268</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bray</surname> <given-names>F.</given-names></name>
<name><surname>Laversanne</surname> <given-names>M.</given-names></name>
<name><surname>Sung</surname> <given-names>H.</given-names></name>
<name><surname>Ferlay</surname> <given-names>J.</given-names></name>
<name><surname>Siegel</surname> <given-names>R. L.</given-names></name>
<name><surname>Soerjomataram</surname> <given-names>I.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Global cancer statistics 2022: GLOBOCAN estimates of incidence and mortality worldwide for 36 cancers in 185 countries</article-title>. <source>CA: A Cancer J. Clin.</source> <volume>74</volume>, <fpage>229</fpage>&#x2013;<lpage>263</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3322/caac.21834</pub-id>, PMID: <pub-id pub-id-type="pmid">38572751</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Callahan</surname> <given-names>B. J.</given-names></name>
<name><surname>McMurdie</surname> <given-names>P. J.</given-names></name>
<name><surname>Rosen</surname> <given-names>M. J.</given-names></name>
<name><surname>Han</surname> <given-names>A. W.</given-names></name>
<name><surname>Johnson</surname> <given-names>A. J. A.</given-names></name>
<name><surname>Holmes</surname> <given-names>S. P.</given-names></name>
</person-group> (<year>2016</year>). 
<article-title>DADA2: High-resolution sample inference from Illumina amplicon data</article-title>. <source>Nat. Methods</source> <volume>13</volume>, <fpage>581</fpage>&#x2013;<lpage>583</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nmeth.3869</pub-id>, PMID: <pub-id pub-id-type="pmid">27214047</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Camara</surname> <given-names>H.</given-names></name>
<name><surname>Zhang</surname> <given-names>Y.</given-names></name>
<name><surname>Lafferty</surname> <given-names>L.</given-names></name>
<name><surname>Vallely</surname> <given-names>A. J.</given-names></name>
<name><surname>Guy</surname> <given-names>R.</given-names></name>
<name><surname>Kelly-Hanku</surname> <given-names>A.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Self-collection for HPV-based cervical screening: a qualitative evidence meta-synthesis</article-title>. <source>BMC Public Health</source> <volume>21</volume>, <fpage>1503</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12889-021-11554-6</pub-id>, PMID: <pub-id pub-id-type="pmid">34348689</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chambers</surname> <given-names>L. M.</given-names></name>
<name><surname>Bussies</surname> <given-names>P.</given-names></name>
<name><surname>Vargas</surname> <given-names>R.</given-names></name>
<name><surname>Esakov</surname> <given-names>E.</given-names></name>
<name><surname>Tewari</surname> <given-names>S.</given-names></name>
<name><surname>Reizes</surname> <given-names>O.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>The microbiome and gynecologic cancer: current evidence and future opportunities</article-title>. <source>Curr. Oncol. Rep.</source> <volume>23</volume>, <fpage>92</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11912-021-01079-x</pub-id>, PMID: <pub-id pub-id-type="pmid">34125319</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chao</surname> <given-names>A.</given-names></name>
<name><surname>Chao</surname> <given-names>A.-S.</given-names></name>
<name><surname>Lin</surname> <given-names>C.-Y.</given-names></name>
<name><surname>Weng</surname> <given-names>C.-H.</given-names></name>
<name><surname>Wu</surname> <given-names>R.-C.</given-names></name>
<name><surname>Yeh</surname> <given-names>Y.-M.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>Analysis of endometrial lavage microbiota reveals an increased relative abundance of the plastic-degrading bacteria <italic>Bacillus pseudofirmus</italic> and <italic>Stenotrophomonas rhizophila</italic> in women with endometrial cancer/endometrial hyperplasia</article-title>. <source>Front. Cell. Infection Microbiol.</source> <volume>12</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fcimb.2022.1031967</pub-id>, PMID: <pub-id pub-id-type="pmid">36439209</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chong</surname> <given-names>H.</given-names></name>
<name><surname>Zha</surname> <given-names>Y.</given-names></name>
<name><surname>Yu</surname> <given-names>Q.</given-names></name>
<name><surname>Cheng</surname> <given-names>M.</given-names></name>
<name><surname>Xiong</surname> <given-names>G.</given-names></name>
<name><surname>Wang</surname> <given-names>N.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>EXPERT: transfer learning-enabled context-aware microbial community classification</article-title>. <source>Briefings Bioinf.</source> <volume>23</volume>, <elocation-id>bbac396</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bib/bbac396</pub-id>, PMID: <pub-id pub-id-type="pmid">36124759</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Clarke</surname> <given-names>M. A.</given-names></name>
<name><surname>Long</surname> <given-names>B. J.</given-names></name>
<name><surname>Sherman</surname> <given-names>M. E.</given-names></name>
<name><surname>Lemens</surname> <given-names>M. A.</given-names></name>
<name><surname>Podratz</surname> <given-names>K. C.</given-names></name>
<name><surname>Hopkins</surname> <given-names>M. R.</given-names></name>
<etal/>
</person-group>. (<year>2020</year>). 
<article-title>Risk assessment of endometrial cancer and endometrial intraepithelial neoplasia in women with abnormal bleeding and implications for clinical management algorithms</article-title>. <source>Am. J. Obstetrics Gynecology</source> <volume>223</volume>, <fpage>549.e1</fpage>&#x2013;<lpage>549.e13</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ajog.2020.03.032</pub-id>, PMID: <pub-id pub-id-type="pmid">32268124</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Collins</surname> <given-names>G. S.</given-names></name>
<name><surname>Moons</surname> <given-names>K. G.</given-names></name>
<name><surname>Dhiman</surname> <given-names>P.</given-names></name>
<name><surname>Riley</surname> <given-names>R. D.</given-names></name>
<name><surname>Beam</surname> <given-names>A. L.</given-names></name>
<name><surname>Van Calster</surname> <given-names>B.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>TRIPOD+AI statement: updated guidance for reporting clinical prediction models that use regression or machine learning methods</article-title>. <source>BMJ</source> <volume>385</volume>, <fpage>e078378</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1136/bmj-2023-078378</pub-id>, PMID: <pub-id pub-id-type="pmid">38626948</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Costas</surname> <given-names>L.</given-names></name>
<name><surname>Frias&#x2010;Gomez</surname> <given-names>J.</given-names></name>
<name><surname>Guardiola</surname> <given-names>M.</given-names></name>
<name><surname>Benavente</surname> <given-names>Y.</given-names></name>
<name><surname>Pineda</surname> <given-names>M.</given-names></name>
<name><surname>Pav&#xf3;n</surname> <given-names>M. &#xc1;.</given-names></name>
<etal/>
</person-group>. (<year>2019</year>). 
<article-title>New perspectives on screening and early detection of endometrial cancer</article-title>. <source>Int. J. Cancer</source> <volume>145</volume>, <fpage>3194</fpage>&#x2013;<lpage>3206</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/ijc.32514</pub-id>, PMID: <pub-id pub-id-type="pmid">31199503</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>DeSantis</surname> <given-names>T. Z.</given-names></name>
<name><surname>Hugenholtz</surname> <given-names>P.</given-names></name>
<name><surname>Larsen</surname> <given-names>N.</given-names></name>
<name><surname>Rojas</surname> <given-names>M.</given-names></name>
<name><surname>Brodie</surname> <given-names>E. L.</given-names></name>
<name><surname>Keller</surname> <given-names>K.</given-names></name>
<etal/>
</person-group>. (<year>2006</year>). 
<article-title>Greengenes, a chimera-checked 16S rRNA gene database and workbench compatible with ARB</article-title>. <source>Appl. Environ. Microbiol.</source> <volume>72</volume>, <fpage>5069</fpage>&#x2013;<lpage>5072</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1128/AEM.03006-05</pub-id>, PMID: <pub-id pub-id-type="pmid">16820507</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Diop</surname> <given-names>K.</given-names></name>
<name><surname>Diop</surname> <given-names>A.</given-names></name>
<name><surname>Michelle</surname> <given-names>C.</given-names></name>
<name><surname>Richez</surname> <given-names>M.</given-names></name>
<name><surname>Rathored</surname> <given-names>J.</given-names></name>
<name><surname>Bretelle</surname> <given-names>F.</given-names></name>
<etal/>
</person-group>. (<year>2019</year>). 
<article-title>Description of three new <italic>Peptoniphilus</italic> species cultured in the vaginal fluid of a woman diagnosed with bacterial vaginosis: <italic>Peptoniphilus pacaensis</italic> sp. nov., <italic>Peptoniphilus raoultii</italic> sp. nov., and <italic>Peptoniphilus vaginalis</italic> sp. nov</article-title>. <source>MicrobiologyOpen</source> <volume>8</volume>, <fpage>e00661</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/mbo3.661</pub-id>, PMID: <pub-id pub-id-type="pmid">29931836</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Doll</surname> <given-names>K. M.</given-names></name>
<name><surname>Hempstead</surname> <given-names>B.</given-names></name>
<name><surname>Alson</surname> <given-names>J.</given-names></name>
<name><surname>Sage</surname> <given-names>L.</given-names></name>
<name><surname>Lavallee</surname> <given-names>D.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Assessment of prediagnostic experiences of black women with endometrial cancer in the United States</article-title>. <source>JAMA Network Open</source> <volume>3</volume>, <fpage>e204954</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1001/jamanetworkopen.2020.4954</pub-id>, PMID: <pub-id pub-id-type="pmid">32412636</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Dong</surname> <given-names>B.</given-names></name>
<name><surname>Huang</surname> <given-names>Y.</given-names></name>
<name><surname>Cai</surname> <given-names>H.</given-names></name>
<name><surname>Chen</surname> <given-names>Y.</given-names></name>
<name><surname>Li</surname> <given-names>Y.</given-names></name>
<name><surname>Zou</surname> <given-names>H.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>Prevotella as the hub of the cervicovaginal microbiota affects the occurrence of persistent human papillomavirus infection and cervical lesions in women of childbearing age via host NF-&#x3ba;B/C-myc</article-title>. <source>J. Med. Virol.</source> <volume>94</volume>, <fpage>5519</fpage>&#x2013;<lpage>5534</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/jmv.28001</pub-id>, PMID: <pub-id pub-id-type="pmid">35835717</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Douglas</surname> <given-names>G. M.</given-names></name>
<name><surname>Langille</surname> <given-names>M. G. I.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>A primer and discussion on DNA-based microbiome data and related bioinformatics analyses</article-title>. <source>Peer Community J.</source> <volume>1</volume>, <fpage>e5</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.24072/pcjournal.2</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Edgar</surname> <given-names>R. C.</given-names></name>
</person-group> (<year>2010</year>). 
<article-title>Search and clustering orders of magnitude faster than BLAST</article-title>. <source>Bioinformatics</source> <volume>26</volume>, <fpage>2460</fpage>&#x2013;<lpage>2461</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btq461</pub-id>, PMID: <pub-id pub-id-type="pmid">20709691</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Edgar</surname> <given-names>R. C.</given-names></name>
<name><surname>Flyvbjerg</surname> <given-names>H.</given-names></name>
</person-group> (<year>2015</year>). 
<article-title>Error filtering, pair assembly and error correction for next-generation sequencing reads</article-title>. <source>Bioinformatics</source> <volume>31</volume>, <fpage>3476</fpage>&#x2013;<lpage>3482</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btv401</pub-id>, PMID: <pub-id pub-id-type="pmid">26139637</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Fettweis</surname> <given-names>J. M.</given-names></name>
<name><surname>Serrano</surname> <given-names>M. G.</given-names></name>
<name><surname>Sheth</surname> <given-names>N. U.</given-names></name>
<name><surname>Mayer</surname> <given-names>C. M.</given-names></name>
<name><surname>Glascock</surname> <given-names>A. L.</given-names></name>
<name><surname>Brooks</surname> <given-names>J. P.</given-names></name>
<etal/>
</person-group>. (<year>2012</year>). 
<article-title>Species-level classification of the vaginal microbiome</article-title>. <source>BMC Genomics</source> <volume>13</volume>, <elocation-id>S17</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/1471-2164-13-S8-S17</pub-id>, PMID: <pub-id pub-id-type="pmid">23282177</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Fong Amaris</surname> <given-names>W. M.</given-names></name>
<name><surname>Assump&#xe7;&#xe3;o</surname> <given-names>P. P. D.</given-names></name>
<name><surname>Valadares</surname> <given-names>L. J.</given-names></name>
<name><surname>Moreira</surname> <given-names>F. C.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Microbiota changes: the unseen players in cervical cancer progression</article-title>. <source>Front. Microbiol.</source> <volume>15</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fmicb.2024.1352778</pub-id>, PMID: <pub-id pub-id-type="pmid">38389527</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Fox</surname> <given-names>J. D.</given-names></name>
<name><surname>Sims</surname> <given-names>A.</given-names></name>
<name><surname>Ross</surname> <given-names>M.</given-names></name>
<name><surname>Bettag</surname> <given-names>J.</given-names></name>
<name><surname>Wilder</surname> <given-names>A.</given-names></name>
<name><surname>Natrop</surname> <given-names>D.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Bioinformatic methodologies in assessing gut microbiota</article-title>. <source>Microbiol. Res.</source> <volume>15</volume>, <fpage>2554</fpage>&#x2013;<lpage>2574</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/microbiolres15040170</pub-id>, PMID: <pub-id pub-id-type="pmid">40881965</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Fuglede</surname> <given-names>B.</given-names></name>
<name><surname>Topsoe</surname> <given-names>F.</given-names></name>
</person-group> (<year>2004</year>). &#x201c;
<article-title>Jensen-Shannon divergence and Hilbert space embedding</article-title>,&#x201d; in <conf-name>International Symposium on Information Theory, 2004. ISIT 2004. Proceedings</conf-name>. (
<publisher-name>IEEE</publisher-name>), <fpage>31</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ISIT.2004.1365067</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Fusco</surname> <given-names>W.</given-names></name>
<name><surname>Bricca</surname> <given-names>L.</given-names></name>
<name><surname>Kaitsas</surname> <given-names>F.</given-names></name>
<name><surname>Tartaglia</surname> <given-names>M. F.</given-names></name>
<name><surname>Venturini</surname> <given-names>I.</given-names></name>
<name><surname>Rugge</surname> <given-names>M.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Gut microbiota in colorectal cancer: From pathogenesis to clinic</article-title>. <source>Best Pract. Res. Clin. Gastroenterol.</source> <volume>72</volume>, <elocation-id>101941</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.bpg.2024.101941</pub-id>, PMID: <pub-id pub-id-type="pmid">39645279</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Gressel</surname> <given-names>G. M.</given-names></name>
<name><surname>Usyk</surname> <given-names>M.</given-names></name>
<name><surname>Frimer</surname> <given-names>M.</given-names></name>
<name><surname>Kuo</surname> <given-names>D. Y. S.</given-names></name>
<name><surname>Burk</surname> <given-names>R. D.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Characterization of the endometrial, cervicovaginal and anorectal microbiota in post-menopausal women with endometrioid and serous endometrial cancers</article-title>. <source>PloS One</source> <volume>16</volume>, <fpage>e0259188</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0259188</pub-id>, PMID: <pub-id pub-id-type="pmid">34739493</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hakimjavadi</surname> <given-names>H.</given-names></name>
<name><surname>George</surname> <given-names>S. H.</given-names></name>
<name><surname>Taub</surname> <given-names>M.</given-names></name>
<name><surname>Dodds</surname> <given-names>L. V.</given-names></name>
<name><surname>Sanchez-Covarrubias</surname> <given-names>A. P.</given-names></name>
<name><surname>Huang</surname> <given-names>M.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>The vaginal microbiome is associated with endometrial cancer grade and histology</article-title>. <source>Cancer Res. Commun.</source> <volume>2</volume>, <fpage>447</fpage>&#x2013;<lpage>455</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1158/2767-9764.CRC-22-0075</pub-id>, PMID: <pub-id pub-id-type="pmid">35928983</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hejblum</surname> <given-names>B. P.</given-names></name>
<name><surname>Kunzmann</surname> <given-names>K.</given-names></name>
<name><surname>Lavagnini</surname> <given-names>E.</given-names></name>
<name><surname>Hutchinson</surname> <given-names>A.</given-names></name>
<name><surname>Robertson</surname> <given-names>D. S.</given-names></name>
<name><surname>Jones</surname> <given-names>S. C.</given-names></name>
<etal/>
</person-group>. (<year>2020</year>). 
<article-title>Realistic and robust reproducible research for biostatistics</article-title>. doi:&#xa0;<pub-id pub-id-type="doi">10.20944/preprints202006.0002.v1</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hickey</surname> <given-names>R. J.</given-names></name>
<name><surname>Zhou</surname> <given-names>X.</given-names></name>
<name><surname>Pierson</surname> <given-names>J. D.</given-names></name>
<name><surname>Ravel</surname> <given-names>J.</given-names></name>
<name><surname>Forney</surname> <given-names>L. J.</given-names></name>
</person-group> (<year>2012</year>). 
<article-title>Understanding vaginal microbiome complexity from an ecological perspective</article-title>. <source>Trans. Research : J. Lab. Clin. Med.</source> <volume>160</volume>, <fpage>267</fpage>&#x2013;<lpage>282</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.trsl.2012.02.008</pub-id>, PMID: <pub-id pub-id-type="pmid">22683415</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Iram</surname> <given-names>S.</given-names></name>
<name><surname>Musonda</surname> <given-names>P.</given-names></name>
<name><surname>Ewies</surname> <given-names>A. A. A.</given-names></name>
</person-group> (<year>2010</year>). 
<article-title>Premenopausal bleeding: When should the endometrium be investigated?&#x2014;A retrospective non-comparative study of 3006 women</article-title>. <source>Eur. J. Obstetrics Gynecology Reprod. Biol.</source> <volume>148</volume>, <fpage>86</fpage>&#x2013;<lpage>89</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ejogrb.2009.09.023</pub-id>, PMID: <pub-id pub-id-type="pmid">19853362</pub-id>
</mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jeraldo</surname> <given-names>P.</given-names></name>
<name><surname>Kalari</surname> <given-names>K.</given-names></name>
<name><surname>Chen</surname> <given-names>X.</given-names></name>
<name><surname>Bhavsar</surname> <given-names>J.</given-names></name>
<name><surname>Mangalam</surname> <given-names>A.</given-names></name>
<name><surname>White</surname> <given-names>B.</given-names></name>
<etal/>
</person-group>. (<year>2014</year>). 
<article-title>IM-TORNADO: a tool for comparison of 16s reads from paired-end libraries</article-title>. <source>PloS One</source> <volume>9</volume>, <fpage>e114804</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0114804</pub-id>, PMID: <pub-id pub-id-type="pmid">25506826</pub-id>
</mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Johnson</surname> <given-names>W. E.</given-names></name>
<name><surname>Li</surname> <given-names>C.</given-names></name>
<name><surname>Rabinovic</surname> <given-names>A.</given-names></name>
</person-group> (<year>2007</year>). 
<article-title>Adjusting batch effects in microarray expression data using empirical Bayes methods</article-title>. <source>Biostatistics</source> <volume>8</volume>, <fpage>118</fpage>&#x2013;<lpage>127</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/biostatistics/kxj037</pub-id>, PMID: <pub-id pub-id-type="pmid">16632515</pub-id>
</mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Kaehler</surname> <given-names>B. D.</given-names></name>
<name><surname>Bokulich</surname> <given-names>N. A.</given-names></name>
<name><surname>McDonald</surname> <given-names>D.</given-names></name>
<name><surname>Knight</surname> <given-names>R.</given-names></name>
<name><surname>Caporaso</surname> <given-names>J. G.</given-names></name>
<name><surname>Huttley</surname> <given-names>G. A.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>Species abundance information improves sequence taxonomy classification accuracy</article-title>. <source>Nat. Commun.</source> <volume>10</volume>, <fpage>4643</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41467-019-12669-6</pub-id>, PMID: <pub-id pub-id-type="pmid">31604942</pub-id>
</mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Kang</surname> <given-names>X.</given-names></name>
<name><surname>Deng</surname> <given-names>D. M.</given-names></name>
<name><surname>Crielaard</surname> <given-names>W.</given-names></name>
<name><surname>Brandt</surname> <given-names>B. W.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Reprocessing 16S rRNA gene amplicon sequencing studies: (Meta)Data issues, robustness, and reproducibility</article-title>. <source>Front. Cell. Infection Microbiol.</source> <volume>11</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fcimb.2021.720637</pub-id>, PMID: <pub-id pub-id-type="pmid">34746021</pub-id>
</mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Katoh</surname> <given-names>K.</given-names></name>
<name><surname>Standley</surname> <given-names>D. M.</given-names></name>
</person-group> (<year>2013</year>). 
<article-title>MAFFT multiple sequence alignment software version 7: improvements in performance and usability</article-title>. <source>Mol. Biol. Evol.</source> <volume>30</volume>, <fpage>772</fpage>&#x2013;<lpage>780</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/molbev/mst010</pub-id>, PMID: <pub-id pub-id-type="pmid">23329690</pub-id>
</mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Kumar</surname> <given-names>R.</given-names></name>
<name><surname>Herold</surname> <given-names>J. L.</given-names></name>
<name><surname>Schady</surname> <given-names>D.</given-names></name>
<name><surname>Davis</surname> <given-names>J.</given-names></name>
<name><surname>Kopetz</surname> <given-names>S.</given-names></name>
<name><surname>Martinez-Moczygemba</surname> <given-names>M.</given-names></name>
<etal/>
</person-group>. (<year>2017</year>). 
<article-title>Streptococcus gallolyticus subsp. gallolyticus promotes colorectal tumor development</article-title>. <source>PloS Pathog.</source> <volume>13</volume>, <fpage>e1006440</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.ppat.1006440</pub-id>, PMID: <pub-id pub-id-type="pmid">28704539</pub-id>
</mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>&#x141;aniewski</surname> <given-names>P.</given-names></name>
<name><surname>Ilhan</surname> <given-names>Z. E.</given-names></name>
<name><surname>Herbst-Kralovetz</surname> <given-names>M. M.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>The microbiome and gynaecological cancer development, prevention and therapy</article-title>. <source>Nat. Rev. Urol.</source> <volume>17</volume>, <fpage>232</fpage>&#x2013;<lpage>250</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41585-020-0286-z</pub-id>, PMID: <pub-id pub-id-type="pmid">32071434</pub-id>
</mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Larsen</surname> <given-names>J. M.</given-names></name>
</person-group> (<year>2017</year>). 
<article-title>The immune response to Prevotella bacteria in chronic inflammatory disease</article-title>. <source>Immunology</source> <volume>151</volume>, <fpage>363</fpage>&#x2013;<lpage>374</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/imm.12760</pub-id>, PMID: <pub-id pub-id-type="pmid">28542929</pub-id>
</mixed-citation>
</ref>
<ref id="B45">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lin</surname> <given-names>H.</given-names></name>
<name><surname>Peddada</surname> <given-names>S. D.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Analysis of compositions of microbiomes with bias correction</article-title>. <source>Nat. Commun.</source> <volume>11</volume>, <fpage>3514</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41467-020-17041-7</pub-id>, PMID: <pub-id pub-id-type="pmid">32665548</pub-id>
</mixed-citation>
</ref>
<ref id="B46">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>X.</given-names></name>
<name><surname>Mao</surname> <given-names>B.</given-names></name>
<name><surname>Gu</surname> <given-names>J.</given-names></name>
<name><surname>Wu</surname> <given-names>J.</given-names></name>
<name><surname>Cui</surname> <given-names>S.</given-names></name>
<name><surname>Wang</surname> <given-names>G.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>Blautia&#x2014;a new functional genus with potential probiotic properties</article-title>? <source>Gut Microbes</source> <volume>13</volume>, <elocation-id>1875796</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/19490976.2021.1875796</pub-id>, PMID: <pub-id pub-id-type="pmid">33525961</pub-id>
</mixed-citation>
</ref>
<ref id="B47">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lozupone</surname> <given-names>C.</given-names></name>
<name><surname>Lladser</surname> <given-names>M. E.</given-names></name>
<name><surname>Knights</surname> <given-names>D.</given-names></name>
<name><surname>Stombaugh</surname> <given-names>J.</given-names></name>
<name><surname>Knight</surname> <given-names>R.</given-names></name>
</person-group> (<year>2011</year>). 
<article-title>UniFrac: an effective distance metric for microbial community comparison</article-title>. <source>ISME J.</source> <volume>5</volume>, <fpage>169</fpage>&#x2013;<lpage>172</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/ismej.2010.133</pub-id>, PMID: <pub-id pub-id-type="pmid">20827291</pub-id>
</mixed-citation>
</ref>
<ref id="B48">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Marcus</surname> <given-names>D.</given-names></name>
<name><surname>King</surname> <given-names>A.</given-names></name>
<name><surname>Yazbek</surname> <given-names>J.</given-names></name>
<name><surname>Hughes</surname> <given-names>C.</given-names></name>
<name><surname>Ghaem&#x2010;Maghami</surname> <given-names>S.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Anxiety and stress in women with suspected endometrial cancer: Survey and paired observational study</article-title>. <source>Psycho-Oncology</source> <volume>30</volume>, <fpage>1393</fpage>&#x2013;<lpage>1400</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/pon.5697</pub-id>, PMID: <pub-id pub-id-type="pmid">33855785</pub-id>
</mixed-citation>
</ref>
<ref id="B49">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mienye</surname> <given-names>I. D.</given-names></name>
<name><surname>Sun</surname> <given-names>Y.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>A survey of ensemble learning: concepts, algorithms, applications, and prospects</article-title>. <source>IEEE Access</source> <volume>10</volume>, <fpage>99129</fpage>&#x2013;<lpage>99149</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ACCESS.2022.3207287</pub-id>
</mixed-citation>
</ref>
<ref id="B50">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mirzayi</surname> <given-names>C.</given-names></name>
<name><surname>Renson</surname> <given-names>A.</given-names></name><collab>Genomic Standards Consortium</collab><collab>Massive Analysis and Quality Control Society</collab>
<name><surname>Furlanello</surname> <given-names>C.</given-names></name>
<name><surname>Sansone</surname> <given-names>S. A.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>Reporting guidelines for human microbiome research: the STORMS checklist</article-title>. <source>Nat. Med.</source> <volume>27</volume>, <fpage>1885</fpage>&#x2013;<lpage>1892</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41591-021-01552-x</pub-id>, PMID: <pub-id pub-id-type="pmid">34789871</pub-id>
</mixed-citation>
</ref>
<ref id="B51">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Murphy</surname> <given-names>E. C.</given-names></name>
<name><surname>Frick</surname> <given-names>I.-M.</given-names></name>
</person-group> (<year>2013</year>). 
<article-title>Gram-positive anaerobic cocci &#x2013; commensals and opportunistic pathogens</article-title>. <source>FEMS Microbiol. Rev.</source> <volume>37</volume>, <fpage>520</fpage>&#x2013;<lpage>553</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/1574-6976.12005</pub-id>, PMID: <pub-id pub-id-type="pmid">23030831</pub-id>
</mixed-citation>
</ref>
<ref id="B52">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Nearing</surname> <given-names>J. T.</given-names></name>
<name><surname>Douglas</surname> <given-names>G. M.</given-names></name>
<name><surname>Comeau</surname> <given-names>A. M.</given-names></name>
<name><surname>Langille</surname> <given-names>M. G.</given-names></name>
</person-group> (<year>2018</year>). 
<article-title>Denoising the Denoisers: an independent evaluation of microbiome sequence error-correction approaches</article-title>. <source>PeerJ</source> <volume>6</volume>, <fpage>e5364</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.7717/peerj.5364</pub-id>, PMID: <pub-id pub-id-type="pmid">30123705</pub-id>
</mixed-citation>
</ref>
<ref id="B53">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Onstad</surname> <given-names>M. A.</given-names></name>
<name><surname>Schmandt</surname> <given-names>R. E.</given-names></name>
<name><surname>Lu</surname> <given-names>K. H.</given-names></name>
</person-group> (<year>2016</year>). 
<article-title>Addressing the role of obesity in endometrial cancer risk, prevention, and treatment</article-title>. <source>J. Clin. Oncol.</source> <volume>34</volume>, <fpage>4225</fpage>&#x2013;<lpage>4230</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1200/JCO.2016.69.4638</pub-id>, PMID: <pub-id pub-id-type="pmid">27903150</pub-id>
</mixed-citation>
</ref>
<ref id="B54">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Picard</surname> <given-names>M.</given-names></name>
<name><surname>Scott-Boyer</surname> <given-names>M. P.</given-names></name>
<name><surname>Bodein</surname> <given-names>A.</given-names></name>
<name><surname>P&#xe9;rin</surname> <given-names>O.</given-names></name>
<name><surname>Droit</surname> <given-names>A.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Integration strategies of multi-omics data for machine learning analysis</article-title>. <source>Comput. Struct. Biotechnol. J.</source> <volume>19</volume>, <fpage>3735</fpage>&#x2013;<lpage>3746</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.csbj.2021.06.030</pub-id>, PMID: <pub-id pub-id-type="pmid">34285775</pub-id>
</mixed-citation>
</ref>
<ref id="B55">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Price</surname> <given-names>M. N.</given-names></name>
<name><surname>Dehal</surname> <given-names>P. S.</given-names></name>
<name><surname>Arkin</surname> <given-names>A. P.</given-names></name>
</person-group> (<year>2010</year>). 
<article-title>FastTree 2 &#x2013; approximately maximum-likelihood trees for large alignments</article-title>. <source>PloS One</source> <volume>5</volume>, <fpage>e9490</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0009490</pub-id>, PMID: <pub-id pub-id-type="pmid">20224823</pub-id>
</mixed-citation>
</ref>
<ref id="B56">
<mixed-citation publication-type="web">
<person-group person-group-type="author">
<name><surname>Qureshi</surname> <given-names>F. U.</given-names></name>
<name><surname>Sohail</surname> <given-names>S.</given-names></name>
<name><surname>Qureshi</surname> <given-names>M. T. A. F. U.</given-names></name>
<name><surname>Sohail</surname> <given-names>S.</given-names></name>
<name><surname>Ahmed</surname> <given-names>M. T.</given-names></name>
</person-group> (<year>2018</year>). 
<article-title>Relationship of patterns of abnormal uterine bleeding with underlying pathology</article-title>. <source>J. Rawalpindi Med. Coll</source>. Available online at: <uri xlink:href="https://www.journalrmc.com/index.php/JRMC/article/view/963">https://www.journalrmc.com/index.php/JRMC/article/view/963</uri> (<date-in-citation content-type="access-date">March 2, 2025</date-in-citation>).
</mixed-citation>
</ref>
<ref id="B57">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ravel</surname> <given-names>J.</given-names></name>
<name><surname>Gajer</surname> <given-names>P.</given-names></name>
<name><surname>Abdo</surname> <given-names>Z.</given-names></name>
<name><surname>Schneider</surname> <given-names>G. M.</given-names></name>
<name><surname>Koenig</surname> <given-names>S. S.</given-names></name>
<name><surname>McCulle</surname> <given-names>S. L.</given-names></name>
<etal/>
</person-group>. (<year>2011</year>). 
<article-title>Vaginal microbiome of reproductive-age women</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>108</volume>, <fpage>4680</fpage>&#x2013;<lpage>4687</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1073/pnas.1002611107</pub-id>, PMID: <pub-id pub-id-type="pmid">20534435</pub-id>
</mixed-citation>
</ref>
<ref id="B58">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Real</surname> <given-names>R.</given-names></name>
<name><surname>Vargas</surname> <given-names>J. M.</given-names></name>
</person-group> (<year>1996</year>). 
<article-title>The probabilistic basis of jaccard&#x2019;s index of similarity</article-title>. <source>Systematic Biol.</source> <volume>45</volume>, <fpage>380</fpage>&#x2013;<lpage>385</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/sysbio/45.3.380</pub-id>
</mixed-citation>
</ref>
<ref id="B59">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Reeves</surname> <given-names>G. K.</given-names></name>
<name><surname>Pirie</surname> <given-names>K.</given-names></name>
<name><surname>Beral</surname> <given-names>V.</given-names></name>
<name><surname>Green</surname> <given-names>J.</given-names></name>
<name><surname>Spencer</surname> <given-names>E.</given-names></name>
<name><surname>Bull</surname> <given-names>D.</given-names></name>
</person-group> (<year>2007</year>). 
<article-title>Cancer incidence and mortality in relation to body mass index in the Million Women Study: cohort study</article-title>. <source>BMJ : Br. Med. J.</source> <volume>335</volume>, <fpage>1134</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1136/bmj.39367.495995.AE</pub-id>, PMID: <pub-id pub-id-type="pmid">17986716</pub-id>
</mixed-citation>
</ref>
<ref id="B60">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Rinke</surname> <given-names>C.</given-names></name>
<name><surname>Chuvochina</surname> <given-names>M.</given-names></name>
<name><surname>Mussig</surname> <given-names>A. J.</given-names></name>
<name><surname>Chaumeil</surname> <given-names>P. A.</given-names></name>
<name><surname>Dav&#xed;n</surname> <given-names>A. A.</given-names></name>
<name><surname>Waite</surname> <given-names>D. W.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>A standardized archaeal taxonomy for the Genome Taxonomy Database</article-title>. <source>Nat. Microbiol.</source> <volume>6</volume>, <fpage>946</fpage>&#x2013;<lpage>959</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41564-021-00918-8</pub-id>, PMID: <pub-id pub-id-type="pmid">34155373</pub-id>
</mixed-citation>
</ref>
<ref id="B61">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Robeson</surname> <given-names>M. S. II</given-names></name>
<name><surname>O&#x2019;Rourke</surname> <given-names>D. R.</given-names></name>
<name><surname>Kaehler</surname> <given-names>B. D.</given-names></name>
<name><surname>Ziemski</surname> <given-names>M.</given-names></name>
<name><surname>Dillon</surname> <given-names>M. R.</given-names></name>
<name><surname>Foster</surname> <given-names>J. T.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>RESCRIPt: Reproducible sequence taxonomy reference database management</article-title>. <source>PloS Comput. Biol.</source> <volume>17</volume>, <fpage>e1009581</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pcbi.1009581</pub-id>, PMID: <pub-id pub-id-type="pmid">34748542</pub-id>
</mixed-citation>
</ref>
<ref id="B62">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Rodriguez</surname> <given-names>V. E.</given-names></name>
<name><surname>LeBr&#xf3;n</surname> <given-names>A. M.</given-names></name>
<name><surname>Chang</surname> <given-names>J.</given-names></name>
<name><surname>Bristow</surname> <given-names>R. E.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Guideline adherent treatment, sociodemographic disparities, and cause-specific survival for endometrial carcinomas</article-title>. <source>Cancer</source> <volume>127</volume>, <fpage>2423</fpage>&#x2013;<lpage>2431</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/cncr.33502</pub-id>, PMID: <pub-id pub-id-type="pmid">33721357</pub-id>
</mixed-citation>
</ref>
<ref id="B63">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Rognes</surname> <given-names>T.</given-names></name>
<name><surname>Flouri</surname> <given-names>T.</given-names></name>
<name><surname>Nichols</surname> <given-names>B.</given-names></name>
<name><surname>Quince</surname> <given-names>C.</given-names></name>
<name><surname>Mah&#xe9;</surname> <given-names>F.</given-names></name>
</person-group> (<year>2016</year>). 
<article-title>VSEARCH: a versatile open source tool for metagenomics</article-title>. <source>PeerJ</source> <volume>4</volume>, <fpage>e2584</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.7717/peerj.2584</pub-id>, PMID: <pub-id pub-id-type="pmid">27781170</pub-id>
</mixed-citation>
</ref>
<ref id="B64">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Rojas-Velazquez</surname> <given-names>D.</given-names></name>
<name><surname>Kidwai</surname> <given-names>S.</given-names></name>
<name><surname>Kraneveld</surname> <given-names>A. D.</given-names></name>
<name><surname>Tonda</surname> <given-names>A.</given-names></name>
<name><surname>Oberski</surname> <given-names>D.</given-names></name>
<name><surname>Garssen</surname> <given-names>J.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Methodology for biomarker discovery with reproducibility in microbiome data using machine learning</article-title>. <source>BMC Bioinf.</source> <volume>25</volume>, <fpage>26</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12859-024-05639-3</pub-id>, PMID: <pub-id pub-id-type="pmid">38225565</pub-id>
</mixed-citation>
</ref>
<ref id="B65">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Schloss</surname> <given-names>P. D.</given-names></name>
</person-group> (<year>2018</year>). 
<article-title>Identifying and overcoming threats to reproducibility, replicability, robustness, and generalizability in microbiome research</article-title>. <source>mBio</source> <volume>9</volume>, <fpage>e00525</fpage>&#x2013;<lpage>e00518</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1128/mBio.00525-18</pub-id>, PMID: <pub-id pub-id-type="pmid">29871915</pub-id>
</mixed-citation>
</ref>
<ref id="B66">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Schloss</surname> <given-names>P. D.</given-names></name>
<name><surname>Westcott</surname> <given-names>S. L.</given-names></name>
<name><surname>Ryabin</surname> <given-names>T.</given-names></name>
<name><surname>Hall</surname> <given-names>J. R.</given-names></name>
<name><surname>Hartmann</surname> <given-names>M.</given-names></name>
<name><surname>Hollister</surname> <given-names>E. B.</given-names></name>
<etal/>
</person-group>. (<year>2009</year>). 
<article-title>Introducing mothur: open-source, platform-independent, community-supported software for describing and comparing microbial communities</article-title>. <source>Appl. Environ. Microbiol.</source> <volume>75</volume>, <fpage>7537</fpage>&#x2013;<lpage>7541</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1128/AEM.01541-09</pub-id>, PMID: <pub-id pub-id-type="pmid">19801464</pub-id>
</mixed-citation>
</ref>
<ref id="B67">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sekaran</surname> <given-names>K.</given-names></name>
<name><surname>Varghese</surname> <given-names>R. P.</given-names></name>
<name><surname>Gopikrishnan</surname> <given-names>M.</given-names></name>
<name><surname>Alsamman</surname> <given-names>A. M.</given-names></name>
<name><surname>El Allali</surname> <given-names>A.</given-names></name>
<name><surname>Zayed</surname> <given-names>H.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Unraveling the dysbiosis of vaginal microbiome to understand cervical cancer disease etiology&#x2014;An explainable AI approach</article-title>. <source>Genes</source> <volume>14</volume>, <elocation-id>936</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/genes14040936</pub-id>, PMID: <pub-id pub-id-type="pmid">37107694</pub-id>
</mixed-citation>
</ref>
<ref id="B68">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Si</surname> <given-names>J.</given-names></name>
<name><surname>You</surname> <given-names>H. J.</given-names></name>
<name><surname>Yu</surname> <given-names>J.</given-names></name>
<name><surname>Sung</surname> <given-names>J.</given-names></name>
<name><surname>Ko</surname> <given-names>G.</given-names></name>
</person-group> (<year>2017</year>). 
<article-title>Prevotella as a hub for vaginal microbiota under the influence of host genetics and their association with obesity</article-title>. <source>Cell Host Microbe</source> <volume>21</volume>, <fpage>97</fpage>&#x2013;<lpage>105</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.chom.2016.11.010</pub-id>, PMID: <pub-id pub-id-type="pmid">28017660</pub-id>
</mixed-citation>
</ref>
<ref id="B69">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Siegel</surname> <given-names>R. L.</given-names></name>
<name><surname>Giaquinto</surname> <given-names>A. N.</given-names></name>
<name><surname>Jemal</surname> <given-names>A.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Cancer statistic</article-title>. <source>CA: A Cancer J. Clin.</source> <volume>74</volume>, <fpage>12</fpage>&#x2013;<lpage>49</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3322/caac.21820</pub-id>, PMID: <pub-id pub-id-type="pmid">38230766</pub-id>
</mixed-citation>
</ref>
<ref id="B70">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Tsementzi</surname> <given-names>D.</given-names></name>
<name><surname>Pena&#x2010;Gonzalez</surname> <given-names>A.</given-names></name>
<name><surname>Bai</surname> <given-names>J.</given-names></name>
<name><surname>Hu</surname> <given-names>Y. J.</given-names></name>
<name><surname>Patel</surname> <given-names>P.</given-names></name>
<name><surname>Shelton</surname> <given-names>J.</given-names></name>
<etal/>
</person-group>. (<year>2020</year>). 
<article-title>Comparison of vaginal microbiota in gynecologic cancer patients pre- and post-radiation therapy and healthy women</article-title>. <source>Cancer Med.</source> <volume>9</volume>, <fpage>3714</fpage>&#x2013;<lpage>3724</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/cam4.3027</pub-id>, PMID: <pub-id pub-id-type="pmid">32237205</pub-id>
</mixed-citation>
</ref>
<ref id="B71">
<mixed-citation publication-type="web">
<person-group person-group-type="author"><collab>Uterine cancer survival statistics</collab>
</person-group> (<year>2015</year>). <source>Cancer research UK</source>. Available online at: <uri xlink:href="https://www.cancerresearchuk.org/health-professional/cancer-statistics/statistics-by-cancer-type/uterine-cancer/survival">https://www.cancerresearchuk.org/health-professional/cancer-statistics/statistics-by-cancer-type/uterine-cancer/survival</uri> (Accessed <date-in-citation content-type="access-date">March 2, 2025</date-in-citation>).
</mixed-citation>
</ref>
<ref id="B72">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Walsh</surname> <given-names>D. M.</given-names></name>
<name><surname>Hokenstad</surname> <given-names>A. N.</given-names></name>
<name><surname>Chen</surname> <given-names>J.</given-names></name>
<name><surname>Sung</surname> <given-names>J.</given-names></name>
<name><surname>Jenkins</surname> <given-names>G. D.</given-names></name>
<name><surname>Chia</surname> <given-names>N.</given-names></name>
<etal/>
</person-group>. (<year>2019</year>). 
<article-title>Postmenopause as a key factor in the composition of the Endometrial Cancer Microbiome (ECbiome)</article-title>. <source>Sci. Rep.</source> <volume>9</volume>, <fpage>19213</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-019-55720-8</pub-id>, PMID: <pub-id pub-id-type="pmid">31844128</pub-id>
</mixed-citation>
</ref>
<ref id="B73">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Walther-Ant&#xf3;nio</surname> <given-names>M. R.</given-names></name>
<name><surname>Chen</surname> <given-names>J.</given-names></name>
<name><surname>Multinu</surname> <given-names>F.</given-names></name>
<name><surname>Hokenstad</surname> <given-names>A.</given-names></name>
<name><surname>Distad</surname> <given-names>T. J.</given-names></name>
<name><surname>Cheek</surname> <given-names>E. H.</given-names></name>
<etal/>
</person-group>. (<year>2016</year>). 
<article-title>Potential contribution of the uterine microbiome in the development of endometrial cancer</article-title>. <source>Genome Med.</source> <volume>8</volume>, <fpage>122</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s13073-016-0368-y</pub-id>, PMID: <pub-id pub-id-type="pmid">27884207</pub-id>
</mixed-citation>
</ref>
<ref id="B74">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>H.</given-names></name>
<name><surname>Yang</surname> <given-names>J. L.</given-names></name>
<name><surname>Chen</surname> <given-names>C.</given-names></name>
<name><surname>Zheng</surname> <given-names>Y.</given-names></name>
<name><surname>Chen</surname> <given-names>M.</given-names></name>
<name><surname>Qi</surname> <given-names>J.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>Identification of <italic>Peptoniphilus vaginalis</italic>-Like Bacteria, <italic>Peptoniphilus septimus</italic> sp. nov., From Blood Cultures in a Cervical Cancer Patient Receiving Chemotherapy: Case and Implications</article-title>. <source>Front. Cell. Infection Microbiol.</source> <volume>12</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fcimb.2022.954355</pub-id>, PMID: <pub-id pub-id-type="pmid">35880078</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/749802">Jun-Mo Kim</ext-link>, Chung-Ang University, Republic of Korea</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1846304">Byeonghwi Lim</ext-link>, Chung-Ang University, Republic of Korea</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3136442">Kafilat Salvador-Oke</ext-link>, University of Stellenbosch, South Africa</p></fn>
</fn-group>
<fn-group>
<fn id="fn1"><label>1</label>
<p><ext-link ext-link-type="uri" xlink:href="https://sourceforge.net/projects/bbmap/">https://sourceforge.net/projects/bbmap/</ext-link></p></fn>
<fn id="fn2"><label>2</label>
<p><ext-link ext-link-type="uri" xlink:href="https://github.com/TalhoukLab/VM01_EC_predictive_signature.git">https://github.com/TalhoukLab/VM01_EC_predictive_signature.git</ext-link></p></fn>
</fn-group>
</back>
</article>