<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="review-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Res. Metr. Anal.</journal-id>
<journal-title>Frontiers in Research Metrics and Analytics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Res. Metr. Anal.</abbrev-journal-title>
<issn pub-type="epub">2504-0537</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frma.2024.1493944</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Metrics and Analytics</subject>
<subj-group>
<subject>Review</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>A bibliometric review of predictive modelling for cervical cancer risk</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Ngema</surname> <given-names>Francis</given-names></name>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2839401/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Mdhluli</surname> <given-names>Bonginkosi</given-names></name>
<uri xlink:href="http://loop.frontiersin.org/people/2886897/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Mmileng</surname> <given-names>Pako</given-names></name>
<uri xlink:href="http://loop.frontiersin.org/people/2824945/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Shungube</surname> <given-names>Precious</given-names></name>
<uri xlink:href="http://loop.frontiersin.org/people/2888854/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Makgaba</surname> <given-names>Mokgoropo</given-names></name>
<uri xlink:href="http://loop.frontiersin.org/people/2824937/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Hossana</surname> <given-names>Twinomurinzi</given-names></name>
<uri xlink:href="http://loop.frontiersin.org/people/1898281/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff><institution>Centre of Applied Data Science, University of Johannesburg</institution>, <addr-line>Johannesburg</addr-line>, <country>South Africa</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Dimitrios Katsaros, University of Thessaly, Greece</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Antonis Sidiropoulos, International Hellenic University, Greece</p>
<p>Evangelia Fragkou, University of Thessaly, Greece</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Francis Ngema <email>francisngema&#x00040;gmail.com</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>19</day>
<month>11</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>9</volume>
<elocation-id>1493944</elocation-id>
<history>
<date date-type="received">
<day>10</day>
<month>09</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>25</day>
<month>10</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2024 Ngema, Mdhluli, Mmileng, Shungube, Makgaba and Hossana.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Ngema, Mdhluli, Mmileng, Shungube, Makgaba and Hossana</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>Cervical cancer represents a significant public health challenge, particularly affecting women&#x00027;s health globally. This study aims to advance the understanding of cervical cancer risk prediction research through a bibliometric analysis. The study identified 800 records from Scopus and Web of Science databases, which were reduced to 142 unique records after removing duplicates. Out of 100 abstracts assessed, 42 were excluded based on specific criteria, resulting in 58 studies included in the bibliometric review. Multiple scoping methods such as thematic analysis, citation analysis, bibliographic coupling, natural language processing, Latent Dirichlet Allocation and other visualisation techniques were used to analyse related publications between 2013 and 2024. The key findings revealed the importance of interdisciplinary collaboration in cervical cancer risk prediction, integrating expertise from mathematical disciplines, biomedical health, healthcare practitioners, public health, and policy. This approach significantly enhanced the accuracy and efficiency of cervical cancer detection and predictive modelling by adopting advanced machine learning algorithms, such as random forests and support vector machines. The main challenges were the lack of external validation on independent datasets and the need to address model interpretability to ensure healthcare providers understand and trust the predictive models. The study revealed the importance of interdisciplinary collaboration in cervical cancer risk prediction. It made recommendations for future research to focus on increasing the external validation of models, improving model interpretability, and promoting global research collaborations to enhance the comprehensiveness and applicability of cervical cancer risk prediction models.</p></abstract>
<kwd-group>
<kwd>cervical cancer</kwd>
<kwd>risk prediction</kwd>
<kwd>machine learning</kwd>
<kwd>artificial intelligence</kwd>
<kwd>thematic analysis</kwd>
<kwd>natural language processing</kwd>
<kwd>latent Dirichlet allocation</kwd>
<kwd>predictive modelling</kwd>
</kwd-group>
<counts>
<fig-count count="10"/>
<table-count count="7"/>
<equation-count count="0"/>
<ref-count count="32"/>
<page-count count="17"/>
<word-count count="8863"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Research Assessment</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Cervical cancer represents a significant public health challenge, particularly affecting women&#x00027;s health globally (Ding et al., <xref ref-type="bibr" rid="B5">2021</xref>). This challenge arises from various factors, including limited access to screening and early detection services, inadequate healthcare infrastructure, low levels of disease awareness, and persistent socioeconomic disparities. Moreover, the high prevalence of human papillomavirus (HPV) infection, the primary cause of cervical cancer, coupled with insufficient HPV vaccination coverage, contributes to elevated incidence rates (WHO, <xref ref-type="bibr" rid="B27">2024</xref>). To effectively combat this challenge, a comprehensive approach is imperative, involving strengthening healthcare systems, expanding access to affordable screening and vaccination programs, and implementing extensive education and awareness initiatives.</p>
<p>The Sustainable Development Goals (SDGs) adopted by United Nations member states highlight the importance of reducing premature mortality from non-communicable diseases (StatsSA, <xref ref-type="bibr" rid="B23">2023</xref>). Efforts to address this align with the World Health Organisation&#x00027;s (WHO) call to action in 2018 to eliminate cervical cancer as a public health concern (Gultekin et al., <xref ref-type="bibr" rid="B9">2020</xref>). WHO&#x00027;s strategy aims to increase HPV vaccination uptake and screening coverage to achieve a significant reduction in cervical cancer incidence rates.</p>
<p>Cervical cancer screening plays a pivotal role in reducing morbidity and mortality associated with the disease. Traditional methods like the Pap smear have evolved into more advanced techniques such as liquid-based cytology (LBC), high-risk HPV (hrHPV) testing, and artificial intelligence (AI)-powered systems (Swanson and Pantanowitz, <xref ref-type="bibr" rid="B24">2024</xref>). These advancements aim to overcome limitations of sensitivity and accuracy associated with traditional approaches, particularly in resource-limited settings.</p>
<p>While traditional cervical cancer prediction models rely on established risk factors and statistical methods, machine learning (ML) techniques offer innovative solutions by using complex algorithms and large datasets (Liu et al., <xref ref-type="bibr" rid="B17">2023</xref>; Meng et al., <xref ref-type="bibr" rid="B18">2022</xref>; Hu et al., <xref ref-type="bibr" rid="B10">2018</xref>). ML algorithms, including support vector machines, random forests, and deep learning networks, have shown promise in enhancing sensitivity, specificity, and overall accuracy in predicting cervical cancer risk (Rahimi et al., <xref ref-type="bibr" rid="B21">2023</xref>; Zhang et al., <xref ref-type="bibr" rid="B31">2020</xref>; Esteva et al., <xref ref-type="bibr" rid="B7">2019</xref>). Challenges such as model interpretability and data bias remain significant concerns in ML application to cervical cancer prediction (Singh and Goyal, <xref ref-type="bibr" rid="B22">2020</xref>).</p>
<p>To gain a comprehensive understanding of the current state of research and identify potential future directions, a bibliometric analysis is a valuable tool. This approach allows for the systematic exploration of the intellectual landscape by analysing publication patterns, citations, keywords, and collaborations within a specific research field (Vargas-Cardona et al., <xref ref-type="bibr" rid="B26">2023</xref>). Bibliometric analysis, a quantitative methodology, offers a powerful tool to explore the evolving knowledge structure within a specific research field (Jimma, <xref ref-type="bibr" rid="B14">2023</xref>). This method provides researchers with a systematic approach to analysing publication patterns, citations, keywords, and collaborations.</p>
<p>Through investigating these aspects, bibliometric analysis yields measurable, accurate, and detailed information on the field (Donthu et al., <xref ref-type="bibr" rid="B6">2021</xref>). This comprehensive understanding empowers researchers to not only gain a thorough grasp of the subject matter but also to foster a multidisciplinary approach, a key factor in advancing scientific progress (Motamedi et al., <xref ref-type="bibr" rid="B19">2023</xref>).</p>
<p>In the context of cervical cancer risk prediction modelling, a bibliometric analysis can reveal how this field has developed over time, identify prominent researchers and institutions contributing to the field, and uncover emerging research trends. This knowledge will be crucial for guiding future research ventures and improving our ability to predict and prevent cervical cancer. In this review, the authors employed a bibliometric analysis along with other scoping methods to investigate the field of predictive modelling for cervical cancer risk assessment.</p>
<p>The aim of this study was to identify foundational literature and thematic content in cervical cancer risk prediction modelling through citation analysis, and to examine research trends, collaboration patterns, and niche areas to uncover gaps and propose new directions for future research.</p>
<p>The study contributes to cervical cancer research by enhancing understanding of cervical cancer risk prediction through comprehensive thematic analysis, revealing core themes, relationships, and broader trends. It advances methodological approaches by integrating Braun and Clarke&#x00027;s framework with Natural Language Processing (NLP). The study identifies emerging niche themes like &#x0201C;machine learning algorithms&#x0201D; and &#x0201C;predictive models&#x0201D;, and highlights a multidisciplinary approach involving machine learning, deep learning, and clinical validation.</p>
<p>These insights can facilitate interdisciplinary collaborations and improve prevention, diagnosis, and treatment outcomes in cervical cancer research. For example, the identification of emerging themes such as &#x0201C;machine learning algorithms&#x0201D; can lead to collaborations between data scientists, statisticians, computer scientists, pathologists, general practitioners and oncologists, resulting in the development of advanced predictive models that enhance early detection and personalised treatment plans for cervical cancer patients.</p>
<p>The remainder of the study is structured as follows: the next section details the methodology that guided the study. It is followed by the results and discussion section, the limitations, and the conclusion.</p>
</sec>
<sec id="s2">
<title>2 Materials and methods</title>
<p>This review uses bibliometric analysis, a method for studying scientific publications, to examine research on machine learning for cervical cancer risk prediction. It analyses the structure and content of existing research articles to understand how this field is developing. This review aims to provide a clear picture of current research trends and how machine learning is being used to improve cervical cancer risk assessment.</p>
<sec>
<title>2.1 Data source and search strategy</title>
<p>The Population, Intervention, Comparison, and Outcome (PICO) framework was used to structure the search strategy. This is summarised in <xref ref-type="table" rid="T1">Table 1</xref>. This method ensured a comprehensive search strategy to gather relevant publications related to predictive modelling in cervical cancer risk assessment, covering key aspects such as population characteristics, intervention methods, comparison with traditional approaches, and outcome measures.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Search terms.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Concept</bold></th>
<th valign="top" align="left"><bold>Search terms</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Population</td>
<td valign="top" align="left">&#x0201C;Cervical cancer&#x0201D; OR &#x0201C;Cervical cancer risk factors&#x0201D; OR &#x0201C;Uterine Cervical Neoplasms&#x0201D; OR &#x0201C;Cervical Intraepithelial Neoplasm&#x0201D; OR &#x0201C;CIN&#x0201D;</td>
</tr>
<tr>
<td valign="top" align="left">Intervention</td>
<td valign="top" align="left">&#x0201C;Machine learning algorithms&#x0201D; OR &#x0201C;Logistic regression&#x0201D; OR &#x0201C;Random forest&#x0201D; OR &#x0201C;Support Vector Machines (SVM)&#x0201D; OR &#x0201C;XGBoost&#x0201D; OR &#x0201C;Deep Learning&#x0201D; OR &#x0201C;Neural Networks&#x0201D; OR &#x0201C;Decision Tree&#x0201D; OR &#x0201C;Gradient Boosting&#x0201D; OR &#x0201C;KNN&#x0201D; OR &#x0201C;Naive Bayes&#x0201D;</td>
</tr>
<tr>
<td valign="top" align="left">Comparison</td>
<td valign="top" align="left">&#x0201C;Pap smear&#x0201D; OR &#x0201C;HPV testing&#x0201D; OR &#x0201C;VIA (Visual Inspection with Acetic Acid)&#x0201D; OR &#x0201C;histopathological examination (histology)&#x0201D; OR &#x0201C;Cytology&#x0201D; OR &#x0201C;Nomograms&#x0201D; OR &#x0201C;clinical prediction rules&#x0201D; OR &#x0201C;risk assessment scores&#x0201D; OR &#x0201C;Colposcopy&#x0201D;</td>
</tr>
<tr>
<td valign="top" align="left">Outcome</td>
<td valign="top" align="left">&#x0201C;Accuracy&#x0201D; OR &#x0201C;sensitivity&#x0201D; OR &#x0201C;specificity&#x0201D; OR &#x0201C;false positive rate&#x0201D; OR &#x0201C;AUC (Area Under Curve)&#x0201D; OR &#x0201C;predictive values&#x0201D; OR &#x0201C;calibration&#x0201D; OR &#x0201C;clinical utility&#x0201D; OR &#x0201C;clinical support decision&#x0201D;</td>
</tr>
<tr>
<td valign="top" align="left">Refined search strategy for Web of Science</td>
<td valign="top" align="left">(&#x0201C;machine learning&#x0201D; OR &#x0201C;predictive modelling&#x0201D; OR &#x0201C;artificial Learning&#x0201D;) NEAR/10 (&#x0201C;cervical cancer&#x0201D; OR &#x0201C;cervical risk&#x0201D;)</td>
</tr>
<tr>
<td valign="top" align="left">Refined search strategy for Scopus</td>
<td valign="top" align="left">(&#x0201C;machine learning&#x0201D; OR &#x0201C;predictive modelling&#x0201D; OR &#x0201C;artificial Learning&#x0201D;) W/10 (&#x0201C;cervical cancer&#x0201D; OR &#x0201C;cervical risk&#x0201D;)</td>
</tr></tbody>
</table>
</table-wrap>
</sec>
<sec>
<title>2.2 Data collection, cleaning and organisation</title>
<sec>
<title>2.2.1 Data collection</title>
<p>The entire data collection process is illustrated in a flowchart (see <xref ref-type="fig" rid="F1">Figure 1</xref>). This flowchart visually depicts the number of records identified, screened, excluded, and ultimately included in the final analysis, promoting transparency and replicability of our research methods.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>Flow diagram for the bibliometric review.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frma-09-1493944-g0001.tif"/>
</fig>
<p>Both Scopus and Web of Science databases were used to retrieve relevant publications. Scopus was chosen for its comprehensive coverage across various disciplines, while Web of Science provided access to high-quality scholarly literature, ensuring a thorough exploration of the research landscape. The search strategy was refined by including proximity operators.</p>
<p>Proximity operators function as specialised search query keywords designed to refine retrieval by dictating the closeness of terms within a document (Goldman et al., <xref ref-type="bibr" rid="B8">1998</xref>). Unlike Boolean operators that focus on presence or absence, proximity operators emphasise the conceptual proximity of search terms. This ensures retrieved documents not only contain the specified keywords but also discuss them in close association, enhancing the relevance of results.</p>
<p>In Web of Science, the NEAR/x operator dictates the maximum number of intervening words between terms (e.g., &#x0201C;cancer NEAR/3 risk&#x0201D;). Conversely, Scopus utilises W/x expressions, requiring terms to appear in the specified order within a defined word range (e.g., &#x0201C;Cervical Cancer&#x0201D; W/2 &#x0201C;Machine learning&#x0201D;). Researchers can significantly improve the focus and relevance of their literature searches within Web of Science and Scopus by strategically employing proximity operators within each platform&#x00027;s designated syntax.</p>
<p>The retrieved data was saved in various file formats, e.g., .csv and .bib. The software EndNote was employed to facilitate the management of articles from both databases. EndNote allows for efficient organisation and storage of citation information, aiding in the subsequent stages of the research process.</p>
</sec>
<sec>
<title>2.2.2 Data cleaning and organisation</title>
<p>The retrieved publications from both databases underwent initial screening to identify and eliminate duplicates, thereby ensuring the integrity of the dataset. Subsequently, citation information and metadata were standardised and formatted uniformly to facilitate seamless analysis and comparison across datasets. The datasets obtained from Scopus and Web of Science were merged, and duplicates were subsequently removed using R Studio. As a result, the final dataset comprised 100 unique articles for further analysis.</p>
<p>The articles were distributed among five authors, with each responsible for reviewing twenty articles. Each publication underwent a thorough assessment for relevance and quality, guided by predefined inclusion and exclusion criteria outlined in <xref ref-type="table" rid="T2">Table 2</xref>. The authors convened to collectively discuss each article and reached a consensus on whether to include or exclude it based on the established criteria. Publications pertaining to treatment or involving patients already diagnosed with cervical cancer were excluded from the final dataset. Consequently, the refined dataset consisted of 58 distinct articles selected for further analysis.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Inclusion/exclusion criteria.</p></caption>
<table frame="box" rules="all">
<tbody>
<tr>
<td valign="top" align="left">Relevance</td>
<td valign="top" align="left">The article specifically addressed the development, validation, or application of predictive models for assessing cervical cancer risk. Studies investigating other aspects of cervical cancer, such as treatment or diagnosis, were excluded. Duplicate publications (preference given to the most comprehensive or recent publication) were excluded.</td>
</tr>
<tr>
<td valign="top" align="left">Study design</td>
<td valign="top" align="left">Peer-reviewed original research articles, systematic reviews, meta-analyses, and clinical trials were included. These studies provided robust evidence for the effectiveness of predictive models. Non-research articles including editorials, commentaries, letters, conference abstracts, and case reports were excluded. Studies with limited methodology or unclear results (e.g., lacking clear descriptions of the model, data analysis, or with inconclusive findings) were excluded.</td>
</tr>
<tr>
<td valign="top" align="left">Population</td>
<td valign="top" align="left">Studies involving human subjects of any age, ethnicity, or geographical location were included. This ensured a diverse representation of the population at risk for cervical cancer. Studies focusing solely on animals or <italic>in vitro</italic> models were excluded, as these findings did not translate directly to human risk assessment.</td>
</tr>
<tr>
<td valign="top" align="left">Predictive models</td>
<td valign="top" align="left">Articles that described the development, validation, or application of predictive models for assessing cervical cancer risk. The type of model (e.g., machine learning algorithm, statistical model) was not a limitation. Articles describing predictive models for diseases other than cervical cancer were excluded to maintain focus.</td>
</tr>
<tr>
<td valign="top" align="left">Outcome measures</td>
<td valign="top" align="left">Studies reporting on the performance metrics of the predictive models for cervical cancer risk assessment were essential. This included measures like sensitivity, specificity, and area under the receiver operating characteristic curve (AUC). Studies lacking performance metrics or validation of the predictive models were excluded.</td>
</tr>
<tr>
<td valign="top" align="left">Language</td>
<td valign="top" align="left">Articles published in English were preferred due to wider accessibility. However, articles in other languages were considered where translated versions were available.</td>
</tr>
<tr>
<td valign="top" align="left">Publication date</td>
<td valign="top" align="left">No restriction was applied to the publication date. This allowed for the inclusion of both recent advancements and established research in the field.</td>
</tr></tbody>
</table>
</table-wrap>
<p>The accuracy of citation details and publication metadata was verified to minimise errors and inconsistencies in the dataset.</p>
</sec>
</sec>
<sec>
<title>2.3 Bibliometric analysis</title>
<p>Science mapping, as defined by Donthu et al. (<xref ref-type="bibr" rid="B6">2021</xref>) examines the relationships between research constituents, focusing on intellectual interactions and structural connections among them. This analysis employed techniques such as citation analysis, thematic analysis, bibliographic coupling, co-word analysis, and co-authorship analysis (Donthu et al., <xref ref-type="bibr" rid="B6">2021</xref>; &#x000D6;zt&#x000FC;rk et al., <xref ref-type="bibr" rid="B20">2024</xref>). When combined with network analysis, these techniques were instrumental in presenting the bibliometric structure and the intellectual structure of the research field.</p>
<sec>
<title>2.3.1 Citation analysis</title>
<p>Citation analysis was utilised to identify highly cited publications and uncover their thematic content (Khare and Jain, <xref ref-type="bibr" rid="B15">2022</xref>). This involved examining the number of citations received by each publication to gauge its impact in the field. Relevant bibliometric software, such as VOSviewer and BiblioShiny, were employed to analyse citation data and visualise citation networks, aiding in the identification of influential publications and their intellectual connections.</p>
</sec>
<sec>
<title>2.3.2 Thematic analysis</title>
<p>This research approach merged the strengths of thematic analysis with the power of Natural Language Processing (NLP) techniques. Braun and Clarke&#x00027;s thematic analysis framework provided a systematic approach for identifying, analysing, and reporting patterns within qualitative data (&#x000D6;zt&#x000FC;rk et al., <xref ref-type="bibr" rid="B20">2024</xref>). Thematic analysis, a qualitative analysis method, involves coding documents to identify themes and capturing qualitative attributes relevant to the research subject (Braun and Clarke, <xref ref-type="bibr" rid="B4">2006</xref>).</p>
<p>Initially, the dataset underwent Term Frequency-Inverse Document Frequency (TF-IDF) vectorisation and stop word removal to preprocess the text, followed by the application of Latent Dirichlet Allocation for topic modelling to identify five topics within the abstracts. The second approach employed Generate Similar (Gensim) dictionary to convert the text data into a bag-of-words representation, followed by LDA-based topic modelling configured to identify 6 topics. LDA is a statistical technique that identifies latent thematic structures within a collection of documents (Jelodar et al., <xref ref-type="bibr" rid="B13">2019</xref>).</p>
<p>Two approaches were combined for examining the abstract text data utilising LDA to identify topics within the abstracts, with the LDA model extracting the most prominent words associated with each topic, revealing prevalent themes related to cervical cancer risk prediction modelling. Subsequently, the topics from both approaches were merged to form four overarching themes.</p>
<p>The thematic analysis process involved familiarisation with the data, followed by documenting and coding, similar to tokenisation. Irrelevant words were removed to focus on pertinent information. Themes were generated by observing patterns in the codes and assessing their correlation, ensuring alignment with the research objectives. The iterative review of themes led to their refinement and finalisation, ultimately contributing to the clarification of prevalent themes within the abstracts pertaining to cervical cancer risk prediction modelling.</p>
<p>However, to gain a more comprehensive understanding, three complementary methods were used: thematic mapping, thematic evolution analysis, and trend topic analysis. Thematic mapping involves visually representing the relationships between identified themes, creating a map-like depiction that clarifies how themes interact and influence each other (Agbo et al., <xref ref-type="bibr" rid="B1">2021</xref>). This visual representation enhances our comprehension of the thematic landscape and the interplay of themes within the data. Using BiblioShiny, a software program for analysing research publications, this study was able to identify how the main themes in cervical cancer research have shifted focus over time.</p>
<p>This revealed interesting trends, such as one topic becoming less popular while another gained more attention (Khare and Jain, <xref ref-type="bibr" rid="B15">2022</xref>). This analysis also revealed the emergence of entirely new themes within the data. Finally, trend topic analysis complemented thematic analysis by exploring broader trends within the data. This involved analysing the frequency of specific terms or concepts to identify areas of growing interest or decline over time (Liang et al., <xref ref-type="bibr" rid="B16">2021</xref>).</p>
</sec>
<sec>
<title>2.3.3 Bibliographic coupling</title>
<p>Bibliographic coupling was employed to identify recent and niche publications that share common references. This technique is based on the assumption that scholarly articles that cite a significant number of the same sources are likely to be similar in their content and research focus (Donthu et al., <xref ref-type="bibr" rid="B6">2021</xref>). The study analysed a dataset of 48 articles and created a visual representation to explore these connections.</p>
<p>Due to VOSviewer&#x00027;s criteria for bibliographic coupling, only 48 out of the 58 articles were included in the visualisation of research trends. This visualisation helped to understand the thematic landscape and how prominent themes connect with specific research fields and journals. Researchers can identify thematic clusters within a body of literature by analysing these citation patterns.</p>
</sec>
<sec>
<title>2.3.4 Data analysis and visualisation</title>
<p>BiblioShiny was used to analyse publication trends and geographic distribution of research output relevant to cervical cancer risk prediction. VOSviewer complemented this by creating visualisations like citation networks and thematic maps. These techniques helped interpret results, identifying key themes, research gaps, emerging trends, and prominent research institutions/authors in cervical cancer risk prediction research.</p>
</sec>
</sec>
</sec>
<sec id="s3">
<title>3 Results and discussions</title>
<p><xref ref-type="table" rid="T3">Table 3</xref> illustrates the data, comprising 58 articles sourced from 49 publications, exhibited a robust annual growth rate of 13.43%, reflecting a growth trend. The average document age of 2.97 years and an average citation count of 14.84 per document further highlighted the recent nature and influential reach of these publications.</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Overview of the dataset.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Description</bold></th>
<th valign="top" align="center"><bold>Results</bold></th>
</tr>
</thead>
<tbody>
<tr style="background-color:#dee1e1">
<td valign="top" align="left" colspan="2"><bold>Main information about data</bold></td>
</tr>
<tr>
<td valign="top" align="left">Timespan</td>
<td valign="top" align="center">2013:2024</td>
</tr>
<tr>
<td valign="top" align="left">Sources (journals, books, etc.)</td>
<td valign="top" align="center">49</td>
</tr>
<tr>
<td valign="top" align="left">Documents</td>
<td valign="top" align="center">58</td>
</tr>
<tr>
<td valign="top" align="left">Annual growth rate %</td>
<td valign="top" align="center">13.43</td>
</tr>
<tr>
<td valign="top" align="left">Document average age</td>
<td valign="top" align="center">2.97</td>
</tr>
<tr>
<td valign="top" align="left">Average citations per doc</td>
<td valign="top" align="center">14.84</td>
</tr>
<tr>
<td valign="top" align="left">References</td>
<td valign="top" align="center">86</td>
</tr>
<tr style="background-color:#dee1e1">
<td valign="top" align="left" colspan="2"><bold>Document contents</bold></td>
</tr>
<tr>
<td valign="top" align="left">Keywords plus (ID)</td>
<td valign="top" align="center">665</td>
</tr>
<tr>
<td valign="top" align="left">Author&#x00027;s keywords (DE)</td>
<td valign="top" align="center">180</td>
</tr>
<tr style="background-color:#dee1e1">
<td valign="top" align="left" colspan="2"><bold>Authors</bold></td>
</tr>
<tr>
<td valign="top" align="left">Authors</td>
<td valign="top" align="center">288</td>
</tr>
<tr>
<td valign="top" align="left">Authors of single-authored docs</td>
<td valign="top" align="center">1</td>
</tr>
<tr style="background-color:#dee1e1">
<td valign="top" align="left" colspan="2"><bold>Authors collaboration</bold></td>
</tr>
<tr>
<td valign="top" align="left">Single-authored docs</td>
<td valign="top" align="center">1</td>
</tr>
<tr>
<td valign="top" align="left">Co-authors per doc</td>
<td valign="top" align="center">5.28</td>
</tr>
<tr>
<td valign="top" align="left">International co-authorships %</td>
<td valign="top" align="center">0</td>
</tr>
<tr style="background-color:#dee1e1">
<td valign="top" align="left" colspan="2"><bold>Document types</bold></td>
</tr>
<tr>
<td valign="top" align="left">Article</td>
<td valign="top" align="center">58</td>
</tr></tbody>
</table>
</table-wrap>
<p>Collaboration is a defining characteristic of authorship on the prediction of cervical cancer, with an average of 5.28 co-authors per document. The study observed a significant trend of interdisciplinary collaboration in the field of cervical cancer risk predictive modelling. This collaboration was characterised by the integration of expertise from diverse academic domains, including computer science, mathematics, statistics, biomedical engineering, epidemiology, oncology, pathology, obstetrics, gynaecology, and public health. International co-authorships were absent, suggesting potential avenues for expanding research collaborations and fostering global knowledge exchange in cervical cancer research.</p>
<p>Notably, only one document had a single author, further emphasising the collaborative nature of research in this domain. As all documents are classified as articles, the data underscores the emphasis on scholarly rigour and methodological depth within cervical cancer research. These findings highlighting the importance of interdisciplinary collaborations and innovative discoveries in the fight against predictive cervical cancer. For example, one highly cited work documented a successful collaboration between computer scientists, biomedical engineers, pathologists, and oncologists. This collaboration resulted in a computer-aided diagnosis (CAD) system for cervical cancer screening. The system achieved high accuracy in detecting and classifying cervical tissue abnormalities, potentially leading to improved patient outcomes and increased survival rates for women (Alquran et al., <xref ref-type="bibr" rid="B3">2022</xref>).</p>
<sec>
<title>3.1 Citation analysis</title>
<p>The top 20 highly cited research articles presented in <xref ref-type="table" rid="T4">Table 4</xref> explored various methods for improving cervical cancer screening, diagnosis, and risk prediction. Machine learning algorithms were a common theme, with studies applying them to analyse clinical data, Pap smear images, and self-collected samples. These approaches have achieved high accuracy in some cases, exceeding the performance of traditional methods.</p>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>Top 20 highly cited publications.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Rank</bold></th>
<th valign="top" align="left"><bold>Title</bold></th>
<th valign="top" align="left"><bold>Year</bold></th>
<th valign="top" align="left"><bold>Journal</bold></th>
<th valign="top" align="left"><bold>Study objectives</bold></th>
<th valign="top" align="left"><bold>Methodologies</bold></th>
<th valign="top" align="left"><bold>Key findings</bold></th>
<th valign="top" align="left"><bold>Citation count</bold></th>
<th valign="top" align="left"><bold>Focus area</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="left">Datadriven cervical cancer prediction model with outlier detection and oversampling methods</td>
<td valign="top" align="left">2020</td>
<td valign="top" align="left">Sensors</td>
<td valign="top" align="left">Propose a cervical cancer prediction model using risk factors as inputs.</td>
<td valign="top" align="left">Uses DBSCAN &#x00026; IForest for outlier detection, SMOTE &#x00026; SMOTE Tomek for balancing, Random Forest for classification.</td>
<td valign="top" align="left">IForest with SMOTE and IForest with SMOTETomek outperform other methods; RF is best classifier. CCPM shows better accuracy than previous methods.</td>
<td valign="top" align="left">145</td>
<td valign="top" align="left">Cervical cancer prediction</td>
</tr>
<tr>
<td valign="top" align="left">2</td>
<td valign="top" align="left">Hybrid model for detection of cervical cancer using causal analysis and machine learning techniques</td>
<td valign="top" align="left">2022</td>
<td valign="top" align="left">Comp Math Methods Med</td>
<td valign="top" align="left">Efficient feature selection and prediction model for cervical cancer datasets.</td>
<td valign="top" align="left">Boruta analysis for feature selection and SVM for classification.</td>
<td valign="top" align="left">Boruta with SVM outperforms existing methods.</td>
<td valign="top" align="left">70</td>
<td valign="top" align="left">Cervical cancer classification</td>
</tr>
<tr>
<td valign="top" align="left">3</td>
<td valign="top" align="left">A comprehensive study on multiclass cervical cancer diagnostic prediction on Pap smear images</td>
<td valign="top" align="left">2020</td>
<td valign="top" align="left">Tissue Cell</td>
<td valign="top" align="left">Multi-class diagnosis of cervical lesions using deep learning.</td>
<td valign="top" align="left">Evaluates AlexNet, VGGNet (VGG-16 &#x00026; VGG-19), ResNet (ResNet-50 &#x00026; ResNet-101) and GoogleNet architectures.</td>
<td valign="top" align="left">Ensemble classifier with three best deep learning models achieves high accuracy multi-class classification.</td>
<td valign="top" align="left">60</td>
<td valign="top" align="left">Cervical cancer diagnosis</td>
</tr>
<tr>
<td valign="top" align="left">4</td>
<td valign="top" align="left">Cervical cancer diagnosis based on random forest</td>
<td valign="top" align="left">2017</td>
<td valign="top" align="left">Int J Perform Eng</td>
<td valign="top" align="left">Framework for cervical cancer diagnosis using Random Forest and feature selection.</td>
<td valign="top" align="left">Uses pre-processing, segmentation, relief for feature selection, and RF for classification.</td>
<td valign="top" align="left">RF with top 13 features outperforms other classifiers with 94.44% accuracy.</td>
<td valign="top" align="left">56</td>
<td valign="top" align="left">Cervical cancer diagnosis</td>
</tr>
<tr>
<td valign="top" align="left">5</td>
<td valign="top" align="left">Exemplar pyramid deep feature extraction based cervical cancer image classification model using papsmear images</td>
<td valign="top" align="left">2022</td>
<td valign="top" align="left">Biomed Signal Process Control</td>
<td valign="top" align="left">Detects cervical cancer using exemplar pyramid deep feature extraction method.</td>
<td valign="top" align="left">Employs transfer learning with Darknet19 or Darknet53 in an exemplar pyramid structure and Neighborhood.</td>
<td valign="top" align="left">Achieves 98.26 and 99.47% accuracy on Sipakmed and Mendeley LBC datasets, respectively.</td>
<td valign="top" align="left">53</td>
<td valign="top" align="left">Cervical cancer detection</td>
</tr>
<tr>
<td valign="top" align="left">6</td>
<td valign="top" align="left">Machine learning-based statistical analysis for early stage detection of cervical cancer</td>
<td valign="top" align="left">2021</td>
<td valign="top" align="left">Comput Biol Med</td>
<td valign="top" align="left">Find efficient models to detect early-stage cervical cancer using clinical data.</td>
<td valign="top" align="left">Apply random forest (RF), and instance-based K-nearest neighbor (IBK) to four types of clinical data.</td>
<td valign="top" align="left">RF performs best for biopsy and cytology, RF and IBK perform best for Hinselmann (99.16%) and Schiller (98.58%) data, respectively.</td>
<td valign="top" align="left">39</td>
<td valign="top" align="left">Cervical cancer detection</td>
</tr>
<tr>
<td valign="top" align="left">7</td>
<td valign="top" align="left">Cervical cancer diagnostics using hybrid object detection adversarial networks</td>
<td valign="top" align="left">2022</td>
<td valign="top" align="left">IEEE J Biomedical Health Inform</td>
<td valign="top" align="left">FSOD-GAN for screening and diagnosing cervical cancer using colposcopy images.</td>
<td valign="top" align="left">Faster R-CNN for spot detection and hierarchical classification of cancer stages.</td>
<td valign="top" align="left">Achieves 99% accuracy in diagnosing cervical cancer stages.</td>
<td valign="top" align="left">34</td>
<td valign="top" align="left">Cervical cancer screening and diagnosis</td>
</tr>
<tr>
<td valign="top" align="left">8</td>
<td valign="top" align="left">DCAVN cervical cancer prediction using deep convolutional and variational autoencoder</td>
<td valign="top" align="left">2021</td>
<td valign="top" align="left">Multimedia Tools Appl</td>
<td valign="top" align="left">Automate cancer diagnosis and classification with deep learning techniques.</td>
<td valign="top" align="left">Uses CNN with variational autoencoder for data classification.</td>
<td valign="top" align="left">Outperforms traditional methods with 99.2 and 99.4% accuracy.</td>
<td valign="top" align="left">30</td>
<td valign="top" align="left">Cervical cancer diagnosis</td>
</tr>
<tr>
<td valign="top" align="left">9</td>
<td valign="top" align="left">Performance analysis of machine learning algorithms for cervical cancer detection</td>
<td valign="top" align="left">2020</td>
<td valign="top" align="left">Int J Healthcare Information Systems and Informatics</td>
<td valign="top" align="left">Apply ML algorithms for cervical cancer detection.</td>
<td valign="top" align="left">Uses segmentation, Extra Tree for feature selection, and logistic regression with L1 regularization.</td>
<td valign="top" align="left">Achieves up to 100% accuracy on some datasets.</td>
<td valign="top" align="left">27</td>
<td valign="top" align="left">Cervical cancer detection</td>
</tr>
<tr>
<td valign="top" align="left">10</td>
<td valign="top" align="left">Cervical cancer classification using combined ML and deep learning approach</td>
<td valign="top" align="left">2022</td>
<td valign="top" align="left">Comput Mater Continua</td>
<td valign="top" align="left">Develop a computer-aided diagnosis system to classify Pap-smear images.</td>
<td valign="top" align="left">ResNet101 for feature extraction and SVM for classification.</td>
<td valign="top" align="left">Achieved 100% accuracy for distinguishing normal/abnormal cases.</td>
<td valign="top" align="left">27</td>
<td valign="top" align="left">Cervical cancer screening (deep learning)</td>
</tr>
<tr>
<td valign="top" align="left">11</td>
<td valign="top" align="left">Genome-wide miRNA analysis of HPV-positive self-samples for early detection of cervical cancer</td>
<td valign="top" align="left">2019</td>
<td valign="top" align="left">Int J Cancer</td>
<td valign="top" align="left">Identify deregulated miRNAs as triage markers for cervical cancer in self-samples.</td>
<td valign="top" align="left">Uses small RNA sequencing and qPCR for validation.</td>
<td valign="top" align="left">Identifies a 9-miRNA marker panel with AUC of 0.89 for CIN3 detection.</td>
<td valign="top" align="left">26</td>
<td valign="top" align="left">Cervical cancer screening (self-sampling)</td>
</tr>
<tr>
<td valign="top" align="left">12</td>
<td valign="top" align="left">Predicting cervical cancer using machine learning methods</td>
<td valign="top" align="left">2020</td>
<td valign="top" align="left">Int J Adv Comput Sci Appl</td>
<td valign="top" align="left">Develop ML model for accurate cervical cancer diagnosis.</td>
<td valign="top" align="left">Uses voting classifier, SMOTE, and PCA on cervical cancer risk factor dataset.</td>
<td valign="top" align="left">Achieves higher accuracy and sensitivity than cytology alone.</td>
<td valign="top" align="left">25</td>
<td valign="top" align="left">Cervical cancer diagnosis</td>
</tr>
<tr>
<td valign="top" align="left">13</td>
<td valign="top" align="left">Real world effectiveness of primary screening with high risk human papillomavirus testing in the cervical cancer screening program in china a nationwide population based study</td>
<td valign="top" align="left">2021</td>
<td valign="top" align="left">BMC Med</td>
<td valign="top" align="left">Evaluate HPV testing effectiveness in cervical cancer screening.</td>
<td valign="top" align="left">Population-based study comparing HPV testing and cytology.</td>
<td valign="top" align="left">HPV testing has higher detection rate and predictive value for CIN2&#x0002B;.</td>
<td valign="top" align="left">24</td>
<td valign="top" align="left">Cervical cancer screening (HPV testing)</td>
</tr>
<tr>
<td valign="top" align="left">14</td>
<td valign="top" align="left">Development of a cervical cancer progress prediction tool for human papillomavirus positive Koreans</td>
<td valign="top" align="left">2015</td>
<td valign="top" align="left">J Int Med Res</td>
<td valign="top" align="left">Develop web-based tool to predict high-risk cervical lesions in HPV positive women.</td>
<td valign="top" align="left">Use Support Vector Machine (SVM) model to identify patient features using PAP smear and HPV genotype data.</td>
<td valign="top" align="left">Achieves 74.41% accuracy using four selected features (PAP, HPV16, HPV52, and HPV35).</td>
<td valign="top" align="left">18</td>
<td valign="top" align="left">Cervical cancer risk prediction</td>
</tr>
<tr>
<td valign="top" align="left">15</td>
<td valign="top" align="left">A cervical abnormality risk prediction model can we use clinical information to predict which patients with ascuslsil pap tests will develop cin 23 or ais</td>
<td valign="top" align="left">2013</td>
<td valign="top" align="left">J Lower Genital Tract Dis</td>
<td valign="top" align="left">Model for predicting precancerous lesions in women with mild PAP abnormalities.</td>
<td valign="top" align="left">Multivariate logistic regression on clinical and demographic data.</td>
<td valign="top" align="left">Poor individual predictive ability due to data limitations.</td>
<td valign="top" align="left">17</td>
<td valign="top" align="left">Cervical cancer risk prediction</td>
</tr>
<tr>
<td valign="top" align="left">16</td>
<td valign="top" align="left">Ordinal losses for classification of cervical cancer risk</td>
<td valign="top" align="left">2021</td>
<td valign="top" align="left">Peerj Comput Sci</td>
<td valign="top" align="left">Improved cervical cancer risk prediction using ordinal loss in neural networks.</td>
<td valign="top" align="left">Non-parametric ordinal loss function for deep neural networks.</td>
<td valign="top" align="left">Achieves 75.6% accuracy for seven classes, 81.3% for four classes.</td>
<td valign="top" align="left">16</td>
<td valign="top" align="left">Cervical cancer screening (deep learning)</td>
</tr>
<tr>
<td valign="top" align="left">17</td>
<td valign="top" align="left">Hematological markers as predictors for cervical cancer</td>
<td valign="top" align="left">2019</td>
<td valign="top" align="left">J Oncol</td>
<td valign="top" align="left">Investigate hematological markers for cervical cancer diagnosis.</td>
<td valign="top" align="left">Analyzes Neutrophil-to-Lymphocyte Ratio (NLR) and Platelet-to-Lymphocyte Ratio (PLR).</td>
<td valign="top" align="left">NLR and PLR significantly elevated in cervical cancer patients.</td>
<td valign="top" align="left">15</td>
<td valign="top" align="left">Cervical cancer diagnosis (inflammatory markers)</td>
</tr>
<tr>
<td valign="top" align="left">18</td>
<td valign="top" align="left">A machine learning-based framework for the prediction of cervical cancer risk in women</td>
<td valign="top" align="left">2022</td>
<td valign="top" align="left">Sustainability</td>
<td valign="top" align="left">Develop a deep learning model for cervical cancer risk prediction using HPV test data.</td>
<td valign="top" align="left">Proposes a deep learning model; not fully implemented.</td>
<td valign="top" align="left">Promising future development for risk prediction.</td>
<td valign="top" align="left">11</td>
<td valign="top" align="left">Cervical cancer risk prediction (deep learning)</td>
</tr>
<tr>
<td valign="top" align="left">19</td>
<td valign="top" align="left">Hybridization of deep learning pretrained models with machine learning classifiers and fuzzy minmax neural network for cervical cancer diagnosis</td>
<td valign="top" align="left">2023</td>
<td valign="top" align="left">Diagn</td>
<td valign="top" align="left">Improve Pap-smear image classification accuracy using deep learning and fuzzy neural networks.</td>
<td valign="top" align="left">Combine deep learning architectures with fuzzy min-max neural networks to classify Pap-smear images.</td>
<td valign="top" align="left">Achieved 95.33% classification accuracy on Pap-smear image dataset.</td>
<td valign="top" align="left">11</td>
<td valign="top" align="left">Cervical cancer diagnosis (deep learning)</td>
</tr>
<tr>
<td valign="top" align="left">20</td>
<td valign="top" align="left">The role of high-risk human papilloma virus testing in the surveillance of cervical cancer after treatment</td>
<td valign="top" align="left">2023</td>
<td valign="top" align="left">Arch Pathol Lab Med</td>
<td valign="top" align="left">Investigate the role of HR-HPV testing in predicting cervical cancer recurrence.</td>
<td valign="top" align="left">Retrospective study on patients who underwent HR-HPV testing during cervical cancer surveillance.</td>
<td valign="top" align="left">Persistent HR-HPV infection is a risk factor for cervical cancer recurrence.</td>
<td valign="top" align="left">11</td>
<td valign="top" align="left">Cervical cancer progression prediction</td>
</tr></tbody>
</table>
</table-wrap>
<p>Recent advancements in cervical cancer research are revolutionising screening, diagnosis, and risk prediction methods. Machine learning algorithms are at the forefront, with studies such as Ijaz et al. (<xref ref-type="bibr" rid="B12">2020</xref>) utilising them to analyse clinical data for early cancer detection. Deep learning, a specialised machine learning technique, is also proving valuable. Hussain et al. (<xref ref-type="bibr" rid="B11">2020</xref>) demonstrate its effectiveness in Pap smear image analysis and classification using convolutional neural networks.</p>
<p>Beyond machine learning, the table highlights other promising techniques. Yaman and Tuncer (<xref ref-type="bibr" rid="B28">2022</xref>) introduced a novel deep feature extraction method for accurate cancer detection, while Ali et al. (<xref ref-type="bibr" rid="B2">2021</xref>) showcased the efficacy of machine learning models in analysing various clinical datasets to identify early-stage cervical cancer. Additionally, research on HPV testing, a major risk factor, is ongoing, with studies exploring its potential to improve screening programs (Zhao et al., <xref ref-type="bibr" rid="B32">2021</xref>). Furthermore, <xref ref-type="fig" rid="F2">Figure 2</xref> shows the most globally cited authors. Overall, these advancements offer significant hope for improved cervical cancer outcomes and enhanced patient care.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>Top 20 most global cited articles.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frma-09-1493944-g0002.tif"/>
</fig>
<p>The top 20 highly cited publications in cervical cancer research revealed a multi-pronged approach using recent advancements. Machine learning and deep learning techniques were prominent, demonstrating effectiveness in analysing various data sources&#x02014;clinical data, Pap smear images, and HPV test results. Algorithms such as Random Forest and convolutional neural networks achieved high accuracy in classification and detection tasks.</p>
<p>Furthermore, researchers developed methods for efficient feature selection and model optimisation (e.g., Boruta analysis) to refine these tools and enhance model performance. Early detection remains a critical focus, with novel models such as the Cervical Cancer Prediction Model utilising risk factors and clinical data for early identification. This focus aimed to enhance patient outcomes through timely intervention.</p>
<p>Enhanced screening strategies are another key theme. Integration of advanced technologies such as digital colposcopy and image analysis were explored to improve accuracy and efficiency. In addition, the role of HPV testing in identifying high-risk individuals was investigated for improved screening protocols. Finally, the importance of clinical validation and translation of research findings into practical applications was emphasised. Extensive validation studies ensure the generalisability of proposed methodologies, thus, bridging the gap between research and improved patient care.</p>
<p>Thematic content analysis revealed a multidisciplinary approach in cervical cancer research. Machine learning, deep learning, clinical validation, and translation contributed significantly to innovation in cervical cancer detection, diagnosis, and management.</p>
<p>The reviewed studies on machine learning in cervical cancer research reveal several gaps. Many studies lack external validation on independent datasets, limiting the robustness and generalisability of their findings. Most studies do not address model interpretability, crucial for understanding prediction mechanisms. Socioeconomic factors, such as income and access to healthcare, are largely ignored despite their significant impact on cervical cancer risk and outcomes. Furthermore, information on dataset size and diversity is often missing, hindering the assessment of model performance and applicability across different populations. Addressing these gaps is essential for advancing the practical application of machine learning in cervical cancer care.</p>
</sec>
<sec>
<title>3.2 Co-citation analysis</title>
<p>The visual representation in <xref ref-type="fig" rid="F3">Figure 3</xref> depicts a co-citation network with authors as nodes connected by lines indicating their co-citations, with the central node labelled &#x0201C;Zhang&#x0201D; prominently displayed. Surrounding this central node are numerous red lines connecting to other author nodes, suggesting that Zhang is highly cited with these authors. Other author nodes, labelled with various names or abbreviations, are scattered around the central node, with connections represented by red lines of varying opacities.</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>Co-citation network.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frma-09-1493944-g0003.tif"/>
</fig>
<p>The denser lines indicate stronger co-citation relationships, resembling a starburst pattern emanating from the central &#x0201C;Zhang&#x0201D; node. This network structure implies that &#x0201C;Zhang&#x0201D; holds significant influence in the field, with the connections revealing collaborative relationships and influential connections among authors. Co-citation networks offer valuable insights into scholarly collaboration, research trends, and influential figures, aiding researchers in understanding the dynamics of scientific collaboration and impact.</p>
</sec>
<sec>
<title>3.3 Thematic analysis</title>
<p>The analysis of abstracts identified emerging themes within cervical cancer risk prediction modelling, represented by codes extracted from the text data. <xref ref-type="table" rid="T5">Table 5</xref> summarises the frequency counts of these codes, showcasing the prominence of keywords such as &#x0201C;cervical,&#x0201D; &#x0201C;cancer,&#x0201D; &#x0201C;model,&#x0201D; &#x0201C;patient,&#x0201D; and &#x0201C;accuracy.&#x0201D; Additionally, codes such as &#x0201C;pap,&#x0201D; &#x0201C;study,&#x0201D; and &#x0201C;machine&#x0201D; were also prevalent, indicating themes related to diagnostic methods, research studies, and machine learning techniques, respectively. These codes provided valuable insights into the key concepts and areas of focus within the abstracts, facilitating a deeper understanding of the research landscape in cervical cancer risk prediction.</p>
<table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>Codes for emerging themes from the abstracts.</p></caption>
<table frame="box" rules="all">
<tbody>
<tr>
<td valign="top" align="left">Word</td>
<td valign="top" align="center">cervical</td>
<td valign="top" align="center">cancer</td>
<td valign="top" align="center">model</td>
<td valign="top" align="center">patient</td>
<td valign="top" align="center">Accuracy</td>
<td valign="top" align="center">learning</td>
<td valign="top" align="center">feature</td>
<td valign="top" align="center">Image</td>
<td valign="top" align="center">result</td>
<td valign="top" align="center">method</td>
</tr>
<tr>
<td valign="top" align="left">Count</td>
<td valign="top" align="center">266</td>
<td valign="top" align="center">257</td>
<td valign="top" align="center">123</td>
<td valign="top" align="center">92</td>
<td valign="top" align="center">90</td>
<td valign="top" align="center">84</td>
<td valign="top" align="center">81</td>
<td valign="top" align="center">79</td>
<td valign="top" align="center">78</td>
<td valign="top" align="center">76</td>
</tr>
<tr>
<td valign="top" align="left">Word</td>
<td valign="top" align="center">pap</td>
<td valign="top" align="center">study</td>
<td valign="top" align="center">machine</td>
<td valign="top" align="center">smear</td>
<td valign="top" align="center">classification</td>
<td valign="top" align="center">woman</td>
<td valign="top" align="center">algorithm</td>
<td valign="top" align="center">Risk</td>
<td valign="top" align="center">hpv</td>
<td valign="top" align="center">diagnosis</td>
</tr>
<tr>
<td valign="top" align="left">Count</td>
<td valign="top" align="center">70</td>
<td valign="top" align="center">62</td>
<td valign="top" align="center">60</td>
<td valign="top" align="center">58</td>
<td valign="top" align="center">56</td>
<td valign="top" align="center">56</td>
<td valign="top" align="center">55</td>
<td valign="top" align="center">55</td>
<td valign="top" align="center">54</td>
<td valign="top" align="center">49</td>
</tr></tbody>
</table>
</table-wrap>
<p>TF-IDF vectorisation and LDA produced topics characterised by prominent keywords, delineating prevalent themes within the abstracts. These topics encompassed a spectrum of concepts, with Topic 0 focusing on cancer and cervical, Topic 1 emphasising patient-related terms, Topic 2 highlighting medical images, Topic 3 focusing on patient-centred models, and Topic 4 showcasing features pertinent to predictive modelling. The weightings in <xref ref-type="table" rid="T6">Tables 6</xref>, <xref ref-type="table" rid="T7">7</xref> represent the probability of a word belonging to a specific topic, with higher weights indicating a stronger association.</p>
<table-wrap position="float" id="T6">
<label>Table 6</label>
<caption><p>TF-IDF LDA results.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Topic</bold></th>
<th valign="top" align="left"><bold>Word 1</bold></th>
<th valign="top" align="left"><bold>Word 2</bold></th>
<th valign="top" align="left"><bold>Word 3</bold></th>
<th valign="top" align="left"><bold>Word 4</bold></th>
<th valign="top" align="left"><bold>Word 5</bold></th>
<th valign="top" align="left"><bold>Word 6</bold></th>
<th valign="top" align="left"><bold>Word 7</bold></th>
<th valign="top" align="left"><bold>Word 8</bold></th>
<th valign="top" align="left"><bold>Word 9</bold></th>
<th valign="top" align="left"><bold>Word 10</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Topic 0</td>
<td valign="top" align="left">Cancer (0.029)</td>
<td valign="top" align="left">Cervical (0.028)</td>
<td valign="top" align="left">Model (0.013)</td>
<td valign="top" align="left">Learning (0.011)</td>
<td valign="top" align="left">Accuracy (0.011)</td>
<td valign="top" align="left">&#x02013;</td>
<td valign="top" align="left">&#x02013;</td>
<td valign="top" align="left">&#x02013;</td>
<td valign="top" align="left">&#x02013;</td>
<td valign="top" align="left">&#x02013;</td>
</tr>
<tr>
<td valign="top" align="left">Topic 1</td>
<td valign="top" align="left">Feature</td>
<td valign="top" align="left">Model</td>
<td valign="top" align="left">Image</td>
<td valign="top" align="left">Propose</td>
<td valign="top" align="left">Accuracy</td>
<td valign="top" align="left">Pap</td>
<td valign="top" align="left">Patient</td>
<td valign="top" align="left">Classification</td>
<td valign="top" align="left">Smear</td>
<td valign="top" align="left">Dataset</td>
</tr>
<tr>
<td valign="top" align="left">Topic 2</td>
<td valign="top" align="left">HPV</td>
<td valign="top" align="left">Testing</td>
<td valign="top" align="left">Self</td>
<td valign="top" align="left">HR</td>
<td valign="top" align="left">Sample</td>
<td valign="top" align="left">Positive</td>
<td valign="top" align="left">Patient</td>
<td valign="top" align="left">Risk</td>
<td valign="top" align="left">Recurrence</td>
<td valign="top" align="left">Vs</td>
</tr>
<tr>
<td valign="top" align="left">Topic 3</td>
<td valign="top" align="left">Machine</td>
<td valign="top" align="left">Algorithm</td>
<td valign="top" align="left">Model</td>
<td valign="top" align="left">ML</td>
<td valign="top" align="left">Colposcopy</td>
<td valign="top" align="left">Algorithms</td>
<td valign="top" align="left">Result</td>
<td valign="top" align="left">Classifier</td>
<td valign="top" align="left">Colposcopic</td>
<td valign="top" align="left">Neural</td>
</tr>
<tr>
<td valign="top" align="left">Topic 4</td>
<td valign="top" align="left">CI</td>
<td valign="top" align="left">RT</td>
<td valign="top" align="left">Patient</td>
<td valign="top" align="left">AI</td>
<td valign="top" align="left">Nomogram</td>
<td valign="top" align="left">Lesion</td>
<td valign="top" align="left">Gynecologic</td>
<td valign="top" align="left">Grade</td>
<td valign="top" align="left">Associate</td>
<td valign="top" align="left">Prognosis</td>
</tr>
<tr>
<td valign="top" align="left">Topic 5</td>
<td valign="top" align="left">Recurrence</td>
<td valign="top" align="left">Adenocarcinoma</td>
<td valign="top" align="left">Invasion</td>
<td valign="top" align="left">Usual</td>
<td valign="top" align="left">Risk</td>
<td valign="top" align="left">Model</td>
<td valign="top" align="left">Tumour</td>
<td valign="top" align="left">Smote</td>
<td valign="top" align="left">Analyze</td>
<td valign="top" align="left">Month</td>
</tr></tbody>
</table>
</table-wrap>
<table-wrap position="float" id="T7">
<label>Table 7</label>
<caption><p>Gensim LDA results.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Topic</bold></th>
<th valign="top" align="left"><bold>Word 1</bold></th>
<th valign="top" align="left"><bold>Word 2</bold></th>
<th valign="top" align="left"><bold>Word 3</bold></th>
<th valign="top" align="left"><bold>Word 4</bold></th>
<th valign="top" align="left"><bold>Word 5</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Topic 0</td>
<td valign="top" align="left">Cancer (0.029)</td>
<td valign="top" align="left">Cervical (0.028)</td>
<td valign="top" align="left">Model (0.013)</td>
<td valign="top" align="left">Learning (0.011)</td>
<td valign="top" align="left">Accuracy (0.011)</td>
</tr>
<tr>
<td valign="top" align="left">Topic 1</td>
<td valign="top" align="left">Cervical (0.029)</td>
<td valign="top" align="left">Cancer (0.018)</td>
<td valign="top" align="left">Model (0.010)</td>
<td valign="top" align="left">Patient (0.010)</td>
<td valign="top" align="left">Accuracy (0.010)</td>
</tr>
<tr>
<td valign="top" align="left">Topic 2</td>
<td valign="top" align="left">Cancer (0.026)</td>
<td valign="top" align="left">Cervical (0.025)</td>
<td valign="top" align="left">Model (0.013)</td>
<td valign="top" align="left">Patient (0.008)</td>
<td valign="top" align="left">Image (0.008)</td>
</tr>
<tr>
<td valign="top" align="left">Topic 3</td>
<td valign="top" align="left">Cancer (0.022)</td>
<td valign="top" align="left">Cervical (0.020)</td>
<td valign="top" align="left">Model (0.010)</td>
<td valign="top" align="left">Patient (0.010)</td>
<td valign="top" align="left">Result (0.008)</td>
</tr>
<tr>
<td valign="top" align="left">Topic 4</td>
<td valign="top" align="left">Cancer (0.026)</td>
<td valign="top" align="left">Cervical (0.025)</td>
<td valign="top" align="left">Model (0.013)</td>
<td valign="top" align="left">Feature (0.013)</td>
<td valign="top" align="left">Patient (0.011)</td>
</tr></tbody>
</table>
</table-wrap>
<p>Additionally, Gensim and LDA topic modelling revealed five key themes within cervical cancer risk prediction research. Topics 0 and 1 featured core concepts such as &#x0201C;cancer,&#x0201D; &#x0201C;cervical,&#x0201D; &#x0201C;model,&#x0201D; and &#x0201C;accuracy,&#x0201D; emphasising model development and machine learning. Topic 2 suggested the use of medical images for detection, while Topic 3 focused on patient-centred models with interpretable results. Topic 4 pointed to research on feature engineering for improved model performance, highlighting a strong focus on model development and translating research into practical clinical applications for improved patient outcomes.</p>
<p>This convergence of findings from both methodologies emphasises the multifaceted nature of cervical cancer risk prediction research. The clusters derived from both approaches included:</p>
<list list-type="order">
<list-item><p><bold>Cervical cancer diagnosis and risk assessment with machine learning:</bold> this cluster merged topics emphasising machine learning models for diagnosis and risk assessment in cervical cancer, incorporating both image-based and non-image-based approaches.</p></list-item>
<list-item><p><bold>HPV testing and risk factors for cervical cancer</bold>: this cluster focused on the role of HPV testing, self-sampling methods, and their association with cervical cancer risk, potentially including comparisons with other risk factors.</p></list-item>
<list-item><p><bold>Prognosis and risk stratification in cervical cancer</bold>: this cluster highlighted the importance of clinicopathological features, AI, and nomograms in determining a patient&#x00027;s risk grade and prognosis in cervical cancer.</p></list-item>
<list-item><p><bold>Cervical cancer recurrence analysis:</bold> this cluster explored factors associated with cervical cancer recurrence, including tumour characteristics, risk models, and survival analysis methods, addressing data imbalance with SMOTE.</p></list-item>
</list>
<p>The integration of findings from both the Braun and Clarke thematic analysis and the NLP approach underscored the multifaceted nature of cervical cancer risk prediction research. The thematic and NLP analyses provided specific, actionable insights into cervical cancer risk prediction research. Thematic analysis revealed emerging themes such as diagnostic methods, machine learning techniques, and patient-focused models, underscored by the frequent occurrence of keywords like &#x0201C;cervical,&#x0201D; &#x0201C;cancer,&#x0201D; &#x0201C;model,&#x0201D; and &#x0201C;accuracy.&#x0201D;</p>
<p>NLP techniques like TF-IDF vectorisation and LDA highlighted key topics, including the development of predictive models, the use of medical images, and patient-centred approaches, as seen in the prevalence of terms across topics. This integrated methodology found research clusters such as machine learning for diagnosis, HPV testing, prognosis and risk stratification, and recurrence analysis. These findings emphasise the multidisciplinary nature of cervical cancer risk prediction, showing the combination of qualitative and quantitative insights to enhance understanding and guide future research directions.</p>
<p>The provided <xref ref-type="fig" rid="F4">Figure 4</xref> illustrates the thematic understanding of cervical cancer research and machine learning, aligning with the identified themes using BiblioShiny. A quadrant plot visualises the thematic landscape of cervical cancer research and machine learning. Two axes, developmental degree (vertical) and relevance degree (horizontal), divide the plot into quadrants. Niche themes, like &#x0201C;pap smear images&#x0201D; and &#x0201C;machine learning algorithms,&#x0201D; occupy a specific space, reflecting their focused nature. Conversely, central themes like &#x0201C;cervical cancer prediction&#x0201D; reside in the core area due to their high relevance.</p>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p>Thematic map.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frma-09-1493944-g0004.tif"/>
</fig>
<p>Emerging or declining themes, such as &#x0201C;challenging process due to detecting cervical cancer,&#x0201D; are positioned accordingly. Established themes like &#x0201C;confidence interval (CI)&#x0201D; form the foundational &#x0201C;basic themes&#x0201D; quadrant. Specific terms like &#x0201C;support vector machine,&#x0201D; &#x0201C;human papillomavirus (HPV),&#x0201D; or &#x0201C;squamous intraepithelial lesions&#x0201D; further illustrate the thematic breakdown within each quadrant. The study gained insights into the relative importance and developmental stages of various research areas, ultimately aiding in identifying promising avenues and knowledge gaps within the field of cervical cancer research and machine learning by analysing this thematic distribution.</p>
<p>The study aligns with current trends, as commonly used models in cervical cancer risk prediction include support vector machine (SVM), random forest (RF), and multivariable logistic regression. This is further supported by research comparing machine learning algorithms for disease prediction, which identified SVM and RF as preferred algorithms (Uddin et al., <xref ref-type="bibr" rid="B25">2019</xref>; Zhang et al., <xref ref-type="bibr" rid="B30">2023</xref>; Yang et al., <xref ref-type="bibr" rid="B29">2019</xref>).</p>
<p>The niche themes &#x0201C;detect cervical cancer&#x0201D; and &#x0201C;machine learning algorithms&#x0201D; signify specialised areas of focus within the broader research field. This means that there is a concentrated effort on using advanced computational techniques to improve the accuracy and efficiency of cervical cancer detection.</p>
<p>Researchers are increasingly integrating machine learning algorithms to develop innovative diagnostic tools, which can potentially enhance early detection, optimise treatment plans, and improve patient outcomes. This targeted research highlights the importance of interdisciplinary collaboration in advancing medical technologies and addressing specific challenges within cervical cancer care.</p>
<p>The visualisation in <xref ref-type="fig" rid="F5">Figure 5</xref> revealed that between 2013 and 2019, research activities primarily focused on refining pap smear imaging techniques, a fundamental aspect of cervical cancer screening. This sustained emphasis on pap smear imaging highlights the pivotal role in identifying precancerous lesions and early-stage malignancies. Concurrently, investigations into cervical cancer risk spanned several years (2013&#x02013;2021), reflecting ongoing efforts to elucidate risk factors, epidemiology, and preventive measures. Studies explored genetic predispositions, viral associations (such as human papillomavirus), and lifestyle factors contributing to cervical cancer risk, contributing to a comprehensive understanding of the disease&#x00027;s aetiology and prevention strategies.</p>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p>Thematic evolution.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frma-09-1493944-g0005.tif"/>
</fig>
<p>The years 2020&#x02013;2021 witnessed a significant shift with the emergence of machine learning algorithms in cervical cancer research. Researchers adopted artificial intelligence to enhance diagnostic accuracy and predictive models, analysing complex datasets comprising imaging results and patient histories for personalised risk assessment. Subsequently (2021&#x02013;2022), predictive modelling gained prominence, aiming to forecast individualised outcomes such as disease progression, treatment response, and recurrence. Integrating clinical data with machine learning algorithms facilitated more precise risk stratification, marking a notable advancement in cervical cancer risk prediction and management strategies.</p>
<p>The provided <xref ref-type="fig" rid="F6">Figure 6</xref> offers insight into the evolving landscape of topics related to cervical cancer and machine learning from 2016 to 2021. Each topic is depicted by blue dots, with their sizes indicating the term frequency, denoted by a legend on the right. Notable topics include cervical cancer detection, machine learning models, Pap smear images, cervical cancer screening, machine learning algorithms, support vector machine, cervical cancer risk, cervical cancer patients, and human papillomavirus (HPV). The graph highlights the progressive nature of research and development in cervical cancer detection, machine learning, and associated domains, emphasising the ongoing efforts to advance diagnostic and predictive capabilities for improved healthcare outcomes.</p>
<fig id="F6" position="float">
<label>Figure 6</label>
<caption><p>Trend topics.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frma-09-1493944-g0006.tif"/>
</fig>
<p>The significance of the word cloud (see <xref ref-type="fig" rid="F7">Figure 7</xref>) analysis lies in its ability to highlight key themes and trends within cervical cancer screening research. It reveals the central focus on cervical cancer screening, indicating the ongoing efforts to improve detection and diagnosis methods. The prominence of terms like &#x0201C;Pap Smear Images,&#x0201D; &#x0201C;Human Papillomavirus (HPV),&#x0201D; &#x0201C;Low-Grade Squamous Intraepithelial,&#x0201D; &#x0201C;High-Grade Squamous Intraepithelial,&#x0201D; and &#x0201C;Machine Learning Algorithms&#x0201D; suggests a shift towards incorporating advanced technology for more accurate and efficient screening processes.</p>
<fig id="F7" position="float">
<label>Figure 7</label>
<caption><p>Word cloud.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frma-09-1493944-g0007.tif"/>
</fig>
<p>This implies a potential transformation in clinical practise, with the adoption of innovative approaches to enhance early detection rates and improve patient outcomes. Terms like &#x0201C;Cervical Cancer Detection,&#x0201D; &#x0201C;Random Forest (RF),&#x0201D; and &#x0201C;Support Vector Machine&#x0201D; show the importance of early diagnosis and the specific techniques employed to achieve this goal. Overall, the word cloud analysis shows the evolving field of cervical cancer screening research, highlighting the integration of technology and the ongoing pursuit of improved screening methods for better patient care.</p>
</sec>
<sec>
<title>3.4 Bibliographic coupling</title>
<p>A key finding was the recurring theme of &#x0201C;diagnostics&#x0201D; (see <xref ref-type="fig" rid="F8">Figure 8</xref>). This theme appeared as a central element in the visualisation, highlighting its widespread relevance across the analysed articles. The prominence of diagnostics suggested its importance as a major area of research focus.</p>
<fig id="F8" position="float">
<label>Figure 8</label>
<caption><p>Bibliographic coupling.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frma-09-1493944-g0008.tif"/>
</fig>
<p>The visualisation also revealed a diverse range of journals and research fields connected to diagnostics. Each surrounding node represented a specific journal or field of study, such as the Asian Journal of Information Technology, Healthcare Technology Letters, International Journal of Molecular Sciences, Abdominal Radiology, Multimedia Tools and Applications, Computational Mathematics International Journal, and Indian Journal of Science &#x00026; Technology. This variety of nodes showcased the broad scope of research areas associated with diagnostics.</p>
<p>Red lines connect the central &#x0201C;diagnostics&#x0201D; theme to other nodes, indicating bibliographic coupling. This means there were close relationships between diagnostics research and various research domains. These connections highlighted the interdisciplinary nature of diagnostics research, suggesting its interconnectedness with multiple fields. The visualisation provided evidence for the role diagnostics plays in integrating different areas of research.</p>
<p>This approach went beyond traditional citation analysis by uncovering interconnected relationships between research topics and journals, offering valuable insights into the multidisciplinary nature of diagnostics research. Unlike traditional citation analysis, which primarily focuses on citation counts and direct references, <xref ref-type="fig" rid="F8">Figure 8</xref> provided a more comprehensive view of the complex interplay between different areas of scholarly inquiry.</p>
<p>The findings demonstrated the collaborative nature of interdisciplinary research efforts in driving advancements in cervical cancer diagnosis by highlighting the interconnectedness between diagnostics research and various research domains. This deeper understanding emphasises the importance of interdisciplinary collaboration in addressing complex healthcare challenges and highlights the pivotal role of diagnostics research in advancing diagnostic capabilities for cervical cancer.</p>
</sec>
<sec>
<title>3.5 Scientific production</title>
<p>The map in <xref ref-type="fig" rid="F9">Figure 9</xref> illustrates the distribution of scientific output across different countries regarding predictive modelling for cervical cancer risk. Countries are shaded in varying shades of blue to denote the volume of publications, with darker shades indicating higher production. Notably, India, United States, China, and Australia emerge as significant contributors to this field. Conversely, Africa appears mostly unshaded, indicating minimal research output, underscoring the disparity in scientific contributions between developed nations and African countries.</p>
<fig id="F9" position="float">
<label>Figure 9</label>
<caption><p>Map on country scientific production.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frma-09-1493944-g0009.tif"/>
</fig>
<p>This observation emphasises the necessity for heightened investment and focus on scientific research concerning cervical cancer risk prediction in African nations. Closing this research gap not only aids in addressing the burden of cervical cancer within Africa but also holds promise for enhancing screening, prevention, and treatment strategies on a global scale. Redirecting resources and support towards scientific activities in Africa can pave the way for achieving greater equity in healthcare access and mitigating the global burden of cervical cancer.</p>
<p>The provided <xref ref-type="fig" rid="F10">Figure 10</xref> illustrates a succinct overview of the number of articles produced each year in the field of predictive modelling for cervical cancer risk. Spanning from &#x0007E;2013 to 2023, the <italic>x</italic>-axis denotes the years, while the <italic>y</italic>-axis indicates the number of articles published. Notably, until around 2018, there was a steady production of fewer than five articles per year. However, post-2018, there is a notable surge in article production, reaching a peak in 2022 with over 15 articles. Subsequently, there is a sharp decline in 2022. This spike in 2021 indicates a surge in research activity, while the decline in 2022 may suggest a shift in research focus or external factors influencing publication trends.</p>
<fig id="F10" position="float">
<label>Figure 10</label>
<caption><p>Annual scientific production.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frma-09-1493944-g0010.tif"/>
</fig>
<p>This analysis of publication trends highlights the growing importance of predictive modelling in cervical cancer risk assessment, signifying its potential to improve preventative healthcare strategies. While the decline in 2023 publications requires further exploration, it highlights the dynamic nature of this research field. Continued monitoring of publication trends alongside a deeper understanding of the underlying reasons for these shifts can provide valuable insights for researchers and stakeholders invested in advancing this critical field.</p>
</sec>
</sec>
<sec id="s4">
<title>4 Limitations</title>
<p>Limitations of this study include the exclusive reliance on the Scopus and Web of Science databases, potentially omitting relevant studies from other sources. Our search encompassed all literature on predictive modelling for cervical cancer risk. However, scholarly attention to this topic became prominent only from 2013 onwards. No literature predating 2013 addressed predictive modelling for cervical cancer risk, leading to the retrieval of relevant articles exclusively from 2013 onwards.</p>
</sec>
<sec id="s5">
<title>5 Conclusion</title>
<p>This study aimed to identify literature and thematic content in cervical cancer risk prediction modelling through citation analysis and to explore research trends, collaboration patterns, and niche areas. The study significantly enhanced the understanding of cervical cancer risk prediction by integrating Braun and Clarke&#x00027;s framework with NLP and LDA. It provided insights into core themes, relationships, and broader trends, offering a solid foundation for future research and improvements in cervical cancer prevention, diagnosis, and treatment.</p>
<p>Integrating diverse expertise from fields such as mathematical disciplines, biomedical health, healthcare practitioners, public health and policy is essential for a comprehensive approach to cervical cancer risk prediction. This interdisciplinary collaboration leads to more robust and holistic solutions. The adoption of advanced machine learning algorithms, transitioning from simpler models like logistic regression to more complex algorithms such as random forest and support vector machines, significantly enhances the accuracy and efficiency of cervical cancer detection and predictive modelling.</p>
<p>These advancements are crucial for early detection and improved patient outcomes, which are vital for effective public health interventions. However, many studies lack external validation on independent datasets, limiting the robustness and generalisability of their findings. Furthermore, addressing model interpretability is crucial for understanding prediction mechanisms and ensuring that healthcare providers can trust and effectively use these models in clinical settings.</p>
</sec>
</body>
<back>
<sec sec-type="author-contributions" id="s6">
<title>Author contributions</title>
<p>FN: Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. BM: Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. PM: Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. PS: Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. MM: Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. TH: Supervision, Writing &#x02013; review &#x00026; editing.</p>
</sec>
<sec sec-type="funding-information" id="s7">
<title>Funding</title>
<p>The author(s) declare that no financial support was received for the research, authorship, and/or publication of this article.</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s8">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Agbo</surname> <given-names>F. J.</given-names></name> <name><surname>Oyelere</surname> <given-names>S. S.</given-names></name> <name><surname>Suhonen</surname> <given-names>J.</given-names></name> <name><surname>Tukiainen</surname> <given-names>M.</given-names></name></person-group> (<year>2021</year>). <article-title>Scientific production and thematic breakthroughs in smart learning environments: a bibliometric analysis</article-title>. <source>Smart Learn. Environ</source>. <volume>8</volume>:<fpage>1</fpage>. <pub-id pub-id-type="doi">10.1186/s40561-020-00145-4</pub-id></citation>
</ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ali</surname> <given-names>M. M.</given-names></name> <name><surname>Ahmed</surname> <given-names>K.</given-names></name> <name><surname>Bui</surname> <given-names>F. M.</given-names></name> <name><surname>Paul</surname> <given-names>B. K.</given-names></name> <name><surname>Ibrahim</surname> <given-names>S. M.</given-names></name> <name><surname>Quinn</surname> <given-names>J. M. W.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Machine learning-based statistical analysis for early stage detection of cervical cancer</article-title>. <source>Comput. Biol. Med</source>. <volume>13</volume>:<fpage>1049859</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2021.104985</pub-id><pub-id pub-id-type="pmid">34735942</pub-id></citation></ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Alquran</surname> <given-names>H.</given-names></name> <name><surname>Azani Mustafa</surname> <given-names>W.</given-names></name> <name><surname>Abu Qasmieh</surname> <given-names>I.</given-names></name> <name><surname>Mohd Yacob</surname> <given-names>Y.</given-names></name> <name><surname>Alsalatie</surname> <given-names>M.</given-names></name> <name><surname>Al-Issa</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Cervical cancer classification using combined machine learning and deep learning approach</article-title>. <source>Comp. Mater. Continua</source> <volume>72</volume>, <fpage>5117</fpage>&#x02013;<lpage>5134</lpage>. <pub-id pub-id-type="doi">10.32604/cmc.2022.025692</pub-id></citation>
</ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Braun</surname> <given-names>V.</given-names></name> <name><surname>Clarke</surname> <given-names>V.</given-names></name></person-group> (<year>2006</year>). <article-title>Using thematic analysis in psychology</article-title>. <source>Qual. Res. Psychol</source>. <volume>3</volume>, <fpage>77</fpage>&#x02013;<lpage>101</lpage>. <pub-id pub-id-type="doi">10.1191/1478088706qp063oa</pub-id><pub-id pub-id-type="pmid">32100154</pub-id></citation></ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ding</surname> <given-names>D.</given-names></name> <name><surname>Lang</surname> <given-names>T.</given-names></name> <name><surname>Zou</surname> <given-names>D.</given-names></name> <name><surname>Tan</surname> <given-names>J.</given-names></name> <name><surname>Chen</surname> <given-names>J.</given-names></name> <name><surname>Zhou</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Machine learning-based prediction of survival prognosis in cervical cancer</article-title>. <source>BMC Bioinform</source>. <volume>22</volume>:<fpage>331</fpage>. <pub-id pub-id-type="doi">10.1186/s12859-021-04261-x</pub-id><pub-id pub-id-type="pmid">34134623</pub-id></citation></ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Donthu</surname> <given-names>N.</given-names></name> <name><surname>Kumar</surname> <given-names>S.</given-names></name> <name><surname>Mukherjee</surname> <given-names>D.</given-names></name> <name><surname>Pandey</surname> <given-names>N.</given-names></name> <name><surname>Lim</surname> <given-names>W. M.</given-names></name></person-group> (<year>2021</year>). <article-title>How to conduct a bibliometric analysis: an overview and guidelines</article-title>. <source>J. Bus. Res</source>. <volume>133</volume>, <fpage>285</fpage>&#x02013;<lpage>296</lpage>. <pub-id pub-id-type="doi">10.1016/j.jbusres.2021.04.070</pub-id></citation>
</ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Esteva</surname> <given-names>A.</given-names></name> <name><surname>Robicquet</surname> <given-names>A.</given-names></name> <name><surname>Ramsundar</surname> <given-names>B.</given-names></name> <name><surname>Kuleshov</surname> <given-names>V.</given-names></name> <name><surname>DePristo</surname> <given-names>M.</given-names></name> <name><surname>Chou</surname> <given-names>K.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>A guide to deep learning in healthcare</article-title>. <source>Nat. Med</source>. <volume>25</volume>, <fpage>24</fpage>&#x02013;<lpage>29</lpage>. <pub-id pub-id-type="doi">10.1038/s41591-018-0316-z</pub-id><pub-id pub-id-type="pmid">30617335</pub-id></citation></ref>
<ref id="B8">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Goldman</surname> <given-names>R.</given-names></name> <name><surname>Shivakumar</surname> <given-names>N.</given-names></name> <name><surname>Venkatasubramanian</surname> <given-names>S.</given-names></name> <name><surname>Garcia-Molina</surname> <given-names>H.</given-names></name></person-group> (<year>1998</year>). <article-title>&#x0201C;Proximity search in databases,&#x0201D;</article-title> in <source>24rd International Conference on Very Large Data Bases (VLDB 1998), August 24&#x02013;27, 1998</source> (<publisher-loc>New York, NY</publisher-loc>).</citation>
</ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gultekin</surname> <given-names>M.</given-names></name> <name><surname>Ramirez</surname> <given-names>P. T.</given-names></name> <name><surname>Broutet</surname> <given-names>N.</given-names></name> <name><surname>Hutubessy</surname> <given-names>R.</given-names></name></person-group> (<year>2020</year>). <article-title>World Health Organization call for action to eliminate cervical cancer globally</article-title>. <source>Int. J. Gynecol. Cancer</source> <volume>30</volume>, <fpage>426</fpage>&#x02013;<lpage>427</lpage>. <pub-id pub-id-type="doi">10.1136/ijgc-2020-001285</pub-id><pub-id pub-id-type="pmid">32122950</pub-id></citation></ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hu</surname> <given-names>B.</given-names></name> <name><surname>Dixon</surname> <given-names>P.</given-names></name> <name><surname>Jacobs</surname> <given-names>J.</given-names></name> <name><surname>Dennerlein</surname> <given-names>J.</given-names></name> <name><surname>Schiffman</surname> <given-names>J.</given-names></name></person-group> (<year>2018</year>). <article-title>Machine learning algorithms based on signals from a single wearable inertial sensor can detect surface-and age-related differences in walking</article-title>. <source>J. Biomech</source>. <volume>71</volume>, <fpage>37</fpage>&#x02013;<lpage>42</lpage>. <pub-id pub-id-type="doi">10.1016/j.jbiomech.2018.01.005</pub-id><pub-id pub-id-type="pmid">29452755</pub-id></citation></ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hussain</surname> <given-names>E.</given-names></name> <name><surname>Mahanta</surname> <given-names>L. B.</given-names></name> <name><surname>Das</surname> <given-names>C. R.</given-names></name> <name><surname>Talukdar</surname> <given-names>R. K.</given-names></name></person-group> (<year>2020</year>). <article-title>A comprehensive study on the multi-class cervical cancer diagnostic prediction on pap smear images using a fusion-based decision from ensemble deep convolutional neural network</article-title>. <source>Tissue Cell</source> <volume>65</volume>:<fpage>101347</fpage>. <pub-id pub-id-type="doi">10.1016/j.tice.2020.101347</pub-id><pub-id pub-id-type="pmid">32746984</pub-id></citation></ref>
<ref id="B12">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ijaz</surname> <given-names>M. F.</given-names></name> <name><surname>Attique</surname> <given-names>M.</given-names></name> <name><surname>Son</surname> <given-names>Y.</given-names></name></person-group> (<year>2020</year>). <article-title>Data-driven cervical cancer prediction model with outlier detection and over-sampling methods</article-title>. <source>Sensors</source> <volume>20</volume>:<fpage>2809</fpage>. <pub-id pub-id-type="doi">10.3390/s20102809</pub-id><pub-id pub-id-type="pmid">32429090</pub-id></citation></ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jelodar</surname> <given-names>H.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Yuan</surname> <given-names>C.</given-names></name> <name><surname>Feng</surname> <given-names>X.</given-names></name> <name><surname>Jiang</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Latent Dirichlet allocation (LDA) and topic modeling: models, applications, a survey</article-title>. <source>Multimed. Tools Appl</source>. <volume>78</volume>, <fpage>15169</fpage>&#x02013;<lpage>15211</lpage>. <pub-id pub-id-type="doi">10.1007/s11042-018-6894-4</pub-id></citation>
</ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jimma</surname> <given-names>B. L.</given-names></name></person-group> (<year>2023</year>). <article-title>Artificial intelligence in healthcare: a bibliometric analysis</article-title>. <source>Telemat. Inf. Rep</source>. <volume>9</volume>:<fpage>100041</fpage>. <pub-id pub-id-type="doi">10.1016/j.teler.2023.100041</pub-id></citation>
</ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Khare</surname> <given-names>A.</given-names></name> <name><surname>Jain</surname> <given-names>R.</given-names></name></person-group> (<year>2022</year>). <article-title>Mapping the conceptual and intellectual structure of the consumer vulnerability field: a bibliometric analysis</article-title>. <source>J. Bus. Res</source>. <volume>150</volume>, <fpage>567</fpage>&#x02013;<lpage>584</lpage>. <pub-id pub-id-type="doi">10.1016/j.jbusres.2022.06.039</pub-id></citation>
</ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liang</surname> <given-names>C.</given-names></name> <name><surname>Qiao</surname> <given-names>S.</given-names></name> <name><surname>Olatosi</surname> <given-names>B.</given-names></name> <name><surname>Lyu</surname> <given-names>T.</given-names></name> <name><surname>Li.</surname> <given-names>X</given-names></name></person-group>. (<year>2021</year>). <article-title>Emergence and evolution of big data science in HIV research: bibliometric analysis of federally sponsored studies 2000&#x02013;2019</article-title>. <source>Int. J. Med. Inform</source>. <volume>154</volume>:<fpage>104558</fpage>. <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2021.104558</pub-id><pub-id pub-id-type="pmid">34481301</pub-id></citation></ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>S.</given-names></name> <name><surname>Zhou</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>C.</given-names></name> <name><surname>Shen</surname> <given-names>J.</given-names></name> <name><surname>Zheng</surname> <given-names>Y.</given-names></name></person-group> (<year>2023</year>). <article-title>Prediction of lymph node status in patients with early-stage cervical cancer based on radiomic features of magnetic resonance imaging (MRI) images</article-title>. <source>BMC Med. Imaging</source> <volume>23</volume>:<fpage>101</fpage>. <pub-id pub-id-type="doi">10.1186/s12880-023-01059-6</pub-id><pub-id pub-id-type="pmid">37528338</pub-id></citation></ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Meng</surname> <given-names>B.</given-names></name> <name><surname>Li</surname> <given-names>G.</given-names></name> <name><surname>Zeng</surname> <given-names>Z.</given-names></name> <name><surname>Zheng</surname> <given-names>B.</given-names></name> <name><surname>Xia</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Establishment of early diagnosis models for cervical precancerous lesions using large-scale cervical cancer screening datasets</article-title>. <source>Virol. J</source>. <volume>19</volume>:<fpage>177</fpage>. <pub-id pub-id-type="doi">10.1186/s12985-022-01908-w</pub-id><pub-id pub-id-type="pmid">36335385</pub-id></citation></ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Motamedi</surname> <given-names>N.</given-names></name> <name><surname>Sheikhshoaei</surname> <given-names>F.</given-names></name> <name><surname>Ghazimirsaeid</surname> <given-names>J.</given-names></name> <name><surname>Mansourzadeh</surname> <given-names>M. J.</given-names></name> <name><surname>Dehdarirad</surname> <given-names>H.</given-names></name></person-group> (<year>2023</year>). <article-title>Bibliometric analysis and topic modeling of information systems in maternal health publications</article-title>. <source>Int. J. Inf. Sci. Manag</source>. <volume>21</volume>, <fpage>85</fpage>&#x02013;<lpage>101</lpage>. <pub-id pub-id-type="doi">10.22034/ijism.2023.1977814.0</pub-id><pub-id pub-id-type="pmid">36190387</pub-id></citation></ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>&#x000D6;zt&#x000FC;rk</surname> <given-names>O.</given-names></name> <name><surname>Kocaman</surname> <given-names>R.</given-names></name> <name><surname>Kanbach</surname> <given-names>D. K.</given-names></name></person-group> (<year>2024</year>). <article-title>How to design bibliometric research: an overview and a framework proposal</article-title>. <source>Rev. Manag. Sci.</source> <volume>18</volume>, <fpage>3333</fpage>&#x02013;<lpage>3361</lpage>. <pub-id pub-id-type="doi">10.1007/s11846-024-00738-0</pub-id></citation>
</ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rahimi</surname> <given-names>M.</given-names></name> <name><surname>Akbari</surname> <given-names>A.</given-names></name> <name><surname>Asadi</surname> <given-names>F.</given-names></name> <name><surname>Emami</surname> <given-names>H.</given-names></name></person-group> (<year>2023</year>). <article-title>Cervical cancer survival prediction by machine learning algorithms: a systematic review</article-title>. <source>BMC Cancer</source> <volume>23</volume>:<fpage>341</fpage>. <pub-id pub-id-type="doi">10.1186/s12885-023-10808-3</pub-id><pub-id pub-id-type="pmid">37055741</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Singh</surname> <given-names>S. K.</given-names></name> <name><surname>Goyal</surname> <given-names>A.</given-names></name></person-group> (<year>2020</year>). <article-title>Performance analysis of machine learning algorithms for cervical cancer detection</article-title>. <source>Int. J. Healthc. Inf. Syst. Inf</source>. <volume>15</volume>, <fpage>1</fpage>&#x02013;<lpage>21</lpage>. <pub-id pub-id-type="doi">10.4018/IJHISI.2020040101</pub-id></citation>
</ref>
<ref id="B23">
<citation citation-type="book"><person-group person-group-type="author"><collab>StatsSA</collab></person-group> (<year>2023</year>). <source>Cancer in South Africa/Statistics South Africa</source>. <publisher-loc>Pretoria</publisher-loc>: <publisher-name>Statistics South Africa</publisher-name>.</citation>
</ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Swanson</surname> <given-names>A. A.</given-names></name> <name><surname>Pantanowitz</surname> <given-names>L.</given-names></name></person-group> (<year>2024</year>). <article-title>The evolution of cervical cancer screening</article-title>. <source>J. Am. Soc. Cytopathol</source>. <volume>13</volume>, <fpage>10</fpage>&#x02013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1016/j.jasc.2023.09.007</pub-id><pub-id pub-id-type="pmid">37865567</pub-id></citation></ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Uddin</surname> <given-names>S.</given-names></name> <name><surname>Khan</surname> <given-names>A.</given-names></name> <name><surname>Hossain</surname> <given-names>M. E.</given-names></name> <name><surname>Moni</surname> <given-names>M. A.</given-names></name></person-group> (<year>2019</year>). <article-title>Comparing different supervised machine learning algorithms for disease prediction</article-title>. <source>BMC Med. Inform. Decis. Mak</source>. <volume>19</volume>:<fpage>281</fpage>. <pub-id pub-id-type="doi">10.1186/s12911-019-1004-8</pub-id><pub-id pub-id-type="pmid">31864346</pub-id></citation></ref>
<ref id="B26">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vargas-Cardona</surname> <given-names>H. D.</given-names></name> <name><surname>Rodriguez-Lopez</surname> <given-names>M.</given-names></name> <name><surname>Arrivillaga</surname> <given-names>M.</given-names></name> <name><surname>Vergara-Sanchez</surname> <given-names>C.</given-names></name> <name><surname>Garc&#x000ED;a-Cifuentes</surname> <given-names>J. P.</given-names></name> <name><surname>Berm&#x000FA;dez</surname> <given-names>P. C.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Artificial intelligence for cervical cancer screening: scoping review, 2009-2022</article-title>. <source>Int. J. Gynecol. Obstetr.</source> <volume>165</volume>, <fpage>566</fpage>&#x02013;<lpage>578</lpage>. <pub-id pub-id-type="doi">10.1002/ijgo.15179</pub-id><pub-id pub-id-type="pmid">37811597</pub-id></citation></ref>
<ref id="B27">
<citation citation-type="book"><person-group person-group-type="author"><collab>WHO</collab></person-group> (<year>2024</year>). <source>Human Papillomavirus and Cancer</source>. <publisher-loc>Geneva</publisher-loc>: <publisher-name>World Health Organization</publisher-name>.</citation>
</ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yaman</surname> <given-names>O.</given-names></name> <name><surname>Tuncer</surname> <given-names>T.</given-names></name></person-group> (<year>2022</year>). <article-title>Exemplar pyramid deep feature extraction based cervical cancer image classification model using pap-smear images</article-title>. <source>Biomed. Signal Process. Control</source>. <volume>73</volume>:<fpage>103428</fpage>. <pub-id pub-id-type="doi">10.1016/j.bspc.2021.103428</pub-id></citation>
</ref>
<ref id="B29">
<citation citation-type="book"><person-group person-group-type="editor"><name><surname>Yang</surname> <given-names>W.</given-names></name> <name><surname>Gou</surname> <given-names>X.</given-names></name> <name><surname>Xu</surname> <given-names>T.</given-names></name> <name><surname>Yi</surname> <given-names>X.</given-names></name> <name><surname>Jiang</surname> <given-names>M.</given-names></name></person-group> (eds.). (<year>2019</year>). <article-title>&#x0201C;Cervical cancer risk prediction model and analysis of risk factors based on machine learning,&#x0201D;</article-title> in <source>Proceedings of the 2019 11th International Conference on Bioinformatics and Biomedical Technology</source> (<publisher-loc>Stockholm</publisher-loc>), <fpage>50</fpage>&#x02013;<lpage>54</lpage>. <pub-id pub-id-type="doi">10.1145/3340074.3340078</pub-id></citation>
</ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>C.</given-names></name> <name><surname>Xu</surname> <given-names>J.</given-names></name> <name><surname>Tang</surname> <given-names>R.</given-names></name> <name><surname>Yang</surname> <given-names>J.</given-names></name> <name><surname>Wang</surname> <given-names>W.</given-names></name> <name><surname>Yu</surname> <given-names>X.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Novel research and future prospects of artificial intelligence in cancer diagnosis and treatment</article-title>. <source>J. Hematol. Oncol</source>. <volume>16</volume>:<fpage>114</fpage>. <pub-id pub-id-type="doi">10.1186/s13045-023-01514-5</pub-id><pub-id pub-id-type="pmid">38012673</pub-id></citation></ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>H.</given-names></name> <name><surname>Guo</surname> <given-names>Y.</given-names></name> <name><surname>Prosperi</surname> <given-names>M.</given-names></name> <name><surname>Bian</surname> <given-names>J.</given-names></name></person-group> (<year>2020</year>). <article-title>An ontology-based documentation of data discovery and integration process in cancer outcomes research</article-title>. <source>BMC Med. Inform. Decis. Mak</source>. <volume>20</volume>:<fpage>292</fpage>. <pub-id pub-id-type="doi">10.1186/s12911-020-01270-3</pub-id><pub-id pub-id-type="pmid">33317497</pub-id></citation></ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname> <given-names>Y.</given-names></name> <name><surname>Bao</surname> <given-names>H.</given-names></name> <name><surname>Ma</surname> <given-names>L.</given-names></name> <name><surname>Song</surname> <given-names>B.</given-names></name> <name><surname>Di</surname> <given-names>J.</given-names></name> <name><surname>Wang</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Real-world effectiveness of primary screening with high-risk human papillomavirus testing in the cervical cancer screening programme in China: a nationwide, population-based study</article-title>. <source>BMC Med</source>. <volume>19</volume>:<fpage>164</fpage>. <pub-id pub-id-type="doi">10.1186/s12916-021-02026-0</pub-id><pub-id pub-id-type="pmid">34261463</pub-id></citation></ref>
</ref-list>
</back>
</article>