<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Oncol.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Oncology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Oncol.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2234-943X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fonc.2025.1734076</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Ensemble learning for predicting microsatellite instability in colorectal cancer using pretreatment colonoscopy images and clinical data</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>You</surname><given-names>Jia</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn004"><sup>&#x2021;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3136760/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhang</surname><given-names>Shenghan</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn003"><sup>&#x2020;</sup></xref>
<xref ref-type="author-notes" rid="fn004"><sup>&#x2021;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhang</surname><given-names>Jianjie</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Chen</surname><given-names>Yaru</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhang</surname><given-names>Mengmeng</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhou</surname><given-names>Chungen</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Jiang</surname><given-names>Bin</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/558783/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Nanjing Hospital of Chinese Medicine Affiliated to Nanjing University of Chinese Medicine</institution>, <city>Nanjing</city>, <state>Jiangsu</state>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>Department of Biomedical Informatics, Harvard Medical School</institution>, <city>Boston</city>, <state>MA</state>,&#xa0;<country country="us">United States</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Bin Jiang, <email xlink:href="mailto:jbfirsth@aliyun.com">jbfirsth@aliyun.com</email></corresp>
<fn fn-type="present-address" id="fn003">
<label>&#x2020;</label>
<p>Present address: Shenghan Zhang, Department of Technology, PharMolix Inc., Shanghai, China</p></fn>
<fn fn-type="equal" id="fn004">
<label>&#x2021;</label>
<p>These authors contributed equally to this work</p></fn>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-01-02">
<day>02</day>
<month>01</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>15</volume>
<elocation-id>1734076</elocation-id>
<history>
<date date-type="received">
<day>28</day>
<month>10</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>04</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>29</day>
<month>11</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 You, Zhang, Zhang, Chen, Zhang, Zhou and Jiang.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>You, Zhang, Zhang, Chen, Zhang, Zhou and Jiang</copyright-holder>
<license>
<ali:license_ref start_date="2026-01-02">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Background</title>
<p>Microsatellite instability (MSI) is an important molecular biomarker in colorectal cancer (CRC), associated with favorable prognosis and response to immune checkpoint inhibitors. Conventional MSI testing, including immunohistochemistry (IHC) and polymerase chain reaction (PCR), is invasive, time-consuming, and resource-dependent, underscoring the need for non-invasive and automated alternatives. This study aimed to develop and evaluate an ensemble learning framework integrating pretreatment colonoscopy images and routine clinical data for non-invasive MSI prediction in CRC.</p>
</sec>
<sec>
<title>Methods</title>
<p>In this retrospective study, patients with pathologically confirmed CRC and IHC-determined MSI status were included. Pretreatment colonoscopy images and routine clinical variables were collected. Five deep learning architectures (ResNet-50, EfficientNet, DenseNet, VGG-16, and Vision Transformer) were trained on image data, while four machine learning algorithms (Logistic Regression, Random Forest, Support Vector Machine, and Gradient Boosting) were trained on clinical data. The best-performing models from each modality were combined using a majority-voting ensemble. Model performance was assessed using accuracy, precision, recall, and area under the receiver operating characteristic curve (AUROC). Interpretability was evaluated using Gradient-weighted Class Activation Mapping (Grad-CAM) for image models and SHapley Additive exPlanations (SHAP) for clinical models.</p>
</sec>
<sec>
<title>Results</title>
<p>Among 1,844 patients, VGG-16 achieved the best image-based performance (AUROC = 0.896, accuracy = 0.832, recall = 0.708). Logistic Regression outperformed other clinical models (AUROC = 0.898, accuracy = 0.825, recall = 0.828). The ensemble model integrating both modalities achieved AUROC = 0.886, precision = 0.920, and recall = 0.845, outperforming single-modality approaches.</p>
</sec>
<sec>
<title>Conclusion</title>
<p>The proposed ensemble learning framework provides a non-invasive, interpretable, and accurate method for MSI prediction, offering potential to improve preoperative precision diagnostics and clinical decision-making in colorectal cancer.</p>
</sec>
</abstract>
<kwd-group>
<kwd>artificial intelligence</kwd>
<kwd>colonoscopy</kwd>
<kwd>colorectal cancer</kwd>
<kwd>deep learning</kwd>
<kwd>diagnositic model</kwd>
<kwd>ensemble learning</kwd>
<kwd>machine learning</kwd>
<kwd>microsatellite instability (MSI)</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for work and/or its publication. 1. General Program of Basic Research of Jiangsu Provincial Science and Technology Department (Grant No. BK20241749); 2. Nanjing Municipal Science and Technology Bureau (Grant No. YKK21218); 3. 2023 Basic Research Project of Nanjing Hospital of Chinese Medicine Affiliated to Nanjing University of Chinese Medicine (Grant No. YJJC202301); 5. Postgraduate Research &amp; Practice Innovation Program of Jiangsu Province (Grant No. SJCX25_0889).</funding-statement>
</funding-group>
<counts>
<fig-count count="7"/>
<table-count count="4"/>
<equation-count count="0"/>
<ref-count count="43"/>
<page-count count="15"/>
<word-count count="7169"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Gastrointestinal Cancers: Colorectal Cancer</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Colorectal cancer (CRC) is the third most commonly diagnosed cancer and the second leading cause of cancer-related mortality worldwide, accounting for more than 900,000 deaths annually (<xref ref-type="bibr" rid="B1">1</xref>). Microsatellite instability (MSI), resulting from deficiency of the mismatch repair (MMR) system, is a key molecular subtype of CRC with critical clinical implications (<xref ref-type="bibr" rid="B2">2</xref>). MSI is associated with a more favorable prognosis in early-stage disease, particularly in stage II CRC (<xref ref-type="bibr" rid="B3">3</xref>). In addition, MSI tumors display marked responsiveness to immune checkpoint inhibitors, largely attributable to their high mutational burden and immunogenic microenvironment, making MSI status a crucial biomarker for guiding immunotherapy (<xref ref-type="bibr" rid="B4">4</xref>). Moreover, MSI serves as the molecular hallmark of Lynch syndrome, the most common hereditary colorectal cancer syndrome, and its detection is essential for identifying affected patients as well as at-risk family members (<xref ref-type="bibr" rid="B5">5</xref>). Consequently, MSI testing has become indispensable for guiding therapeutic decisions, predicting prognosis, and Lynch syndrome screening.</p>
<p>Current MSI testing primarily relies on immunohistochemistry (IHC) for MMR proteins and polymerase chain reaction (PCR)-based assays. Both approaches require tissue samples obtained through colonoscopy biopsy or surgical resection, which are inherently invasive and may lead to complications such as infection or bleeding. Tumor heterogeneity may also result in sampling bias, with MSI status underestimated or overestimated depending on the biopsy site (<xref ref-type="bibr" rid="B6">6</xref>). Furthermore, conventional testing methods often require several days to generate results and depend on specialized laboratory infrastructure, trained pathologists, and quality-controlled reagents, which are not universally available, particularly in resource-limited settings (<xref ref-type="bibr" rid="B7">7</xref>). These limitations underscore the urgent need for non-invasive, real-time, and cost-effective alternatives to support precision oncology.</p>
<p>Artificial intelligence (AI) has achieved rapid progress in medical image analysis (<xref ref-type="bibr" rid="B8">8</xref>, <xref ref-type="bibr" rid="B9">9</xref>), thereby providing promising opportunities for MSI prediction. Pathology-based models applying deep learning to hematoxylin and eosin (H&amp;E) slides have achieved high predictive accuracy by identifying morphological patterns associated with MSI (<xref ref-type="bibr" rid="B10">10</xref>, <xref ref-type="bibr" rid="B11">11</xref>). Likewise, radiology-based approaches using CT and MRI have shown promise, either through end-to-end deep learning models applied directly to imaging data (<xref ref-type="bibr" rid="B12">12</xref>) or through radiomics workflows in which high-dimensional quantitative features are extracted and subsequently modeled using machine learning algorithms (<xref ref-type="bibr" rid="B13">13</xref>&#x2013;<xref ref-type="bibr" rid="B15">15</xref>). However, pathology-based methods remain invasive, and radiology-based approaches often require labor-intensive manual tumor segmentation and rely on imaging features that may not fully capture biologically relevant tissue characteristics.</p>
<p>Colonoscopy offers a compelling alternative. It is routinely performed for CRC screening, localization, and treatment (<xref ref-type="bibr" rid="B16">16</xref>,&#xa0;<xref ref-type="bibr" rid="B17">17</xref>), enabling the acquisition of high-quality, pretreatment images that reflect mucosal morphology, vascular patterns, and surface texture in real time. Compared with radiology, colonoscopy provides richer visual information at lower cost and without radiation exposure. Recent studies have demonstrated the feasibility of using colonoscopy images for MSI prediction (<xref ref-type="bibr" rid="B18">18</xref>,&#xa0;<xref ref-type="bibr" rid="B19">19</xref>). For instance, Lo et&#xa0;al. (<xref ref-type="bibr" rid="B19">19</xref>) developed a Vision Transformer (ViT) model that achieved an AUC of 0.86 in MSI detection, while Cai et&#xa0;al. (<xref ref-type="bibr" rid="B18">18</xref>) trained a convolutional model achieving AUROCs of 0.948 (internal) and 0.807 (external). Despite these advances, most image-based approaches remain unimodal, relying solely on visual information, which may limit their robustness and interpretability.</p>
<p>Recent evidence suggests that integrating multiple data modalities can significantly enhance model performance. Multimodal AI frameworks have shown an average AUC improvement of approximately six percentage points over unimodal models across medical domains (<xref ref-type="bibr" rid="B20">20</xref>). For MSI prediction, combining clinical features with pathology (<xref ref-type="bibr" rid="B21">21</xref>) or radiology data (<xref ref-type="bibr" rid="B13">13</xref>, <xref ref-type="bibr" rid="B14">14</xref>, <xref ref-type="bibr" rid="B22">22</xref>) has been shown to improve accuracy. Within colonoscopy, Lo et&#xa0;al. (<xref ref-type="bibr" rid="B23">23</xref>) proposed a multimodal ViT model that concatenated colonoscopy image features with clinical data to predict colorectal cancer prognosis, achieving an AUC of 0.93 compared with 0.77 for colonoscopy images alone and 0.59 for clinical features alone. However, direct feature concatenation between heterogeneous data types may not optimally capture cross-modal relationships, emphasizing the need for more effective integration strategies.</p>
<p>Ensemble learning offers a practical and generalizable solution by combining outputs from multiple models to improve predictive stability and generalization. For instance, Cui et&#xa0;al. (<xref ref-type="bibr" rid="B24">24</xref>) applied a multimodal AI framework to the diagnosis of solid pancreatic lesions by integrating an endoscopic ultrasound imaging model with a clinical data model, achieving superior diagnostic accuracy compared with unimodal approaches. Likewise, recent work in bone tumor classification demonstrated that combining radiological imaging with clinical data outperformed image-only strategies (<xref ref-type="bibr" rid="B25">25</xref>). Building on these findings, integrating colonoscopy images with clinical data via ensemble learning may provide a non-invasive, interpretable, and clinically scalable approach for MSI prediction in CRC.</p>
<p>In this work, we developed and evaluated five deep learning architectures for colonoscopy image analysis and four machine learning classifiers for clinical data. Based on performance and balance across evaluation metrics, we selected representative models from each modality and integrated them using a majority-voting strategy to construct a multimodal ensemble framework. To enhance interpretability and facilitate clinical translation, we further applied Grad-CAM to visualize model attention in image-based predictions and SHAP to identify key feature contributions in the clinical models.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Study design</title>
<p>We retrospectively identified patients with CRC treated at Nanjing Hospital of Chinese Medicine between 2019 and 2024. Eligible patients met the following inclusion criteria: (1) Pathologically confirmed CRC (2); Available MSI status determined by IHC. Patients were excluded if they had (1) received radiotherapy, chemotherapy, immunotherapy, or surgical resection prior to MSI testing or (2) synchronous colorectal tumors. MSI status was defined as loss of expression of at least one MMR protein, while preserved expression of all four proteins was classified as MSS.</p>
<p>Colonoscopy images were collected from the hospital&#x2019;s picture archiving and communication system (PACS). These images were obtained from patients undergoing colonoscopy for cancer screening and tumor localization. Images were obtained using multiple endoscopy platforms, including Olympus Medical Systems (CF-H260AI, CF-H290I, CF-Q260AL, CF-H170I), Fujifilm Medical Systems (EC-530WI, EC-L590ZW, EC-530WM, EC-600WM, EC-760R-V/M, EC-760ZP-V/M), and Pentax Medical Systems (EC-34-i10F, EC-38i10F, EC-3890Fi, EC-3890FK, EC-3870FK). All images were exported in their original resolution (ranging from 764 &#xd7; 504 to 2220 &#xd7; 1230 pixels) in JPG or BMP format for subsequent analysis.</p>
<p>Routine clinical data were extracted from electronic medical records. Based on prior literature, clinical expertise, and practical considerations, 50 routine variables were initially selected (see <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table S1</bold></xref>). Variables with more than 20% missing data, including D-dimer, FOBT, CRP, GFR, and HbA1c, were excluded (see <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure S1</bold></xref>). Consequently, 45 clinical variables were retained for model development.</p>
<p>In total, 1,855 patients met the inclusion criteria, including 116 MSI and 1,739 MSS cases. Pretreatment colonoscopy images were available for 1,224 patients (10,411 MSS images and 1,096 MSI images), who were included in the image-based analyses. The workflow is shown in <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref> abd the overall process of patient screening and cohort inclusion is summarized in <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Overview of the study workflow, including data preprocessing, model development, and ensemble integration.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1734076-g001.tif">
<alt-text content-type="machine-generated">Flowchart detailing a medical data processing pipeline. It includes three main sections: data preprocessing, data splitting, and model development. Preprocessing involves clinical and image data desensitization, annotation, and augmentation. Data is split into training (1,485 samples) and test sets (370 samples) using five-fold cross-validation. The model development section combines machine learning with clinical data factors like gender, age, and tumor location, highlighting the use of feature extraction and classification to predict outcomes. The final output is a comprehensive prediction using ensemble methods.</alt-text>
</graphic></fig>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Flowchart of patient inclusion and exclusion.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1734076-g002.tif">
<alt-text content-type="machine-generated">Flowchart depicting patient selection for a colorectal cancer study. Out of 2,822 patients, 967 were excluded due to absence of MSI status (687), prior anti-cancer therapy (270), or synchronous tumors (10). 1,855 patients remained eligible, providing 11,507 colonoscopy images.</alt-text>
</graphic></fig>
<p>The study protocol was approved by the Institutional Review Board (IRB) of the Nanjing Hospital of Chinese Medicine (Approval No. KY2024090), and the requirement for written informed consent was waived due to the retrospective nature of the study. All patient data were anonymized prior to analysis.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Data preprocessing</title>
<sec id="s2_2_1">
<label>2.2.1</label>
<title>Colonoscopy images</title>
<p>Colonoscopy images went through an initial quality control step to exclude frames that were blurred, overexposed, narrow-band imaging (NBI)&#x2014;based, or showed inadequate bowel preparation. To eliminate non-informative content, we automatically removed black borders and on-screen text using a pixel-intensity&#x2013;based cropping algorithm. Each image was first converted to grayscale, and every row and column was scanned to identify the first and last positions where at least 20% of the pixels fell within a valid intensity range (5&#x2013;250). This threshold reliably distinguished the circular endoscopic field from the surrounding dark margins. The bounding box defined by these boundaries was then applied to the original RGB image to produce a clean, content-focused crop. The resulting images were then resized to 224 &#xd7; 224 pixels to match the deep learning model&#x2019;s input requirements.</p>
</sec>
<sec id="s2_2_2">
<label>2.2.2</label>
<title>Clinical data</title>
<p>Outliers in continuous variables were identified using fences in the interquartile range (IQR) <inline-formula>
<mml:math display="inline" id="im1"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>Q</mml:mi><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mn>1.5</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mi>I</mml:mi><mml:mi>Q</mml:mi><mml:mi>R</mml:mi><mml:mo>,</mml:mo><mml:mtext>&#x2004;</mml:mtext><mml:mi>Q</mml:mi><mml:mn>3</mml:mn><mml:mo>+</mml:mo><mml:mn>1.5</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mi>I</mml:mi><mml:mi>Q</mml:mi><mml:mi>R</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> and compared to the nearest bound prior to imputation. Missing values were imputed using the Multivariate Imputation by Chained Equations (MICE) method. After imputation, categorical variables were transformed using one-hot encoding, and all variables were then standardized using z-score normalization.</p>
</sec>
<sec id="s2_2_3">
<label>2.2.3</label>
<title>Data splitting and augmentation</title>
<p>The dataset was split at the patient level into training (80%) and test (20%) sets with stratified sampling to preserve MSI/MSS ratios. The prevalence of MSI in our dataset was 6.29%, reflecting its relatively low frequency in colorectal cancer. Such class imbalance poses a significant challenge for modelling, as it may bias predictions toward the majority class (MSS) while underrepresenting the minority class (MSI). This imbalance can result in reduced sensitivity for MSI detection, impaired generalization, and misleading performance metrics. Addressing this issue is therefore critical to ensure that models achieve balanced performance across both classes.</p>
<p>To mitigate the impact of class imbalance and improve model robustness, data augmentation strategies were applied selectively to the minority class. For colonoscopy images, augmentation was performed exclusively on MSI samples during training to avoid further widening the gap between class sizes. The augmentation pipeline included random resized cropping, horizontal and vertical flipping, small rotations (&#xb1; 15&#xb0;&gt;), color jittering, perspective distortion, Gaussian blur, and random erasing, thereby enhancing resilience to variability introduced by different imaging devices and acquisition conditions. For the clinical data, the Synthetic Minority Over-sampling Technique (SMOTE) was applied to the training set to generate synthetic MSI cases by interpolating between existing minority-class samples. This approach increased representation of MSI without duplicating data and preserved the underlying feature distribution, thus improving the model&#x2019;s ability to recognize minority-class patterns.</p>
</sec>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Model development</title>
<sec id="s2_3_1">
<label>2.3.1</label>
<title>Image-based model</title>
<p>For image-based MSI status prediction, we implemented a flexible deep learning framework that supports a range of backbone architectures, including ResNet, EfficientNet, ViT, DenseNet, and VGG. All models were initialized with ImageNet-pretrained weights, with the final classification layer modified to match the binary output space. Images were resized to 224&#xd7;224 pixels and normalized by the ImageNet-specific preprocessing (<xref ref-type="bibr" rid="B26">26</xref>). Optimization was conducted using Adam, AdamW, or stochastic gradient descent (SGD) with momentum, with the default learning rate set to <inline-formula>
<mml:math display="inline" id="im2"><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#xd7;</mml:mo><mml:msup><mml:mrow><mml:mn>10</mml:mn></mml:mrow><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>.</p>
<p>To address the substantial class imbalance between MSI and MSS images, we initially evaluated several imbalance-aware loss functions, including focal loss (<xref ref-type="bibr" rid="B27">27</xref>), Tversky loss (<xref ref-type="bibr" rid="B28">28</xref>), focal Tversky loss (<xref ref-type="bibr" rid="B29">29</xref>), soft <italic>F<sub>&#x3b2;</sub></italic> loss (<xref ref-type="bibr" rid="B30">30</xref>), and soft precision&#x2013;recall penalties (<xref ref-type="bibr" rid="B31">31</xref>). These losses enhance minority-class learning by down-weighting easy majority-class samples, penalizing false negatives more strongly, or directly optimizing recall-oriented objectives. Although these approaches resulted in moderate performance gains, particularly in recall, the most substantial improvements were observed after applying targeted data augmentation to increase the diversity of MSI samples. After augmentation, standard binary cross-entropy provided the most stable and robust performance; therefore, all final models in this study were trained using binary cross-entropy.</p>
</sec>
<sec id="s2_3_2">
<label>2.3.2</label>
<title>Clinical data&#x2013;based machine learning model</title>
<p>For the tabular clinical features, we implemented a modular machine learning classification framework that supports four widely used algorithms: logistic regression (LR), support vector machine (SVM), random forest (RF), and gradient boosting classifier (GBC). All models were designed to output probabilistic predictions, thereby enabling downstream ensemble learning and calibration for clinical interpretability.</p>
<p>To address the class imbalance between MSI and MSS samples, we initially experimented with a range of class-weight configurations across all algorithms (for example, assigning higher penalties to the minority MSI class). However, empirical evaluation showed that class weighting did not improve model performance and, in some cases, slightly reduced it. We therefore adopted a data-level strategy and applied SMOTE to generate synthetic MSI samples during training. After SMOTE, unweighted versions of all classifiers yielded the most stable and robust performance; accordingly, all final clinical models in this study were trained using SMOTE-augmented data with unweighted class settings.</p>
</sec>
<sec id="s2_3_3">
<label>2.3.3</label>
<title>Training detail</title>
<p>Model training was conducted in two stages. First, stratified five-fold cross-validation was employed to identify the optimal key hyperparameters, including learning rate, number of training epochs, optimizer choice, and convergence settings, enhancing the balance between predictive performance and generalizability. All image-based and clinical models were trained independently during this stage, without any parameter sharing or joint optimization.</p>
<p>For image-based models, the final configuration adopted for training employed the stochastic gradient descent (SGD) optimizer (learning rate <inline-formula>
<mml:math display="inline" id="im3"><mml:mrow><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#xd7;</mml:mo><mml:msup><mml:mrow><mml:mn>10</mml:mn></mml:mrow><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:msup><mml:mo>,</mml:mo></mml:mrow></mml:math></inline-formula> batch size = 128), binary cross-entropy loss, and early stopping with a patience of three epochs based on validation loss. All image models were implemented in PyTorch (v2.7.1) and trained on an NVIDIA GeForce RTX 4070 Ti Super GPU.</p>
<p>For clinical tabular features, LR and SVM models were optimized with a stringent convergence tolerance (<inline-formula>
<mml:math display="inline" id="im4"><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#xd7;</mml:mo><mml:msup><mml:mrow><mml:mn>10</mml:mn></mml:mrow><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>5</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>), and the maximum number of iterations for LR was set to <inline-formula>
<mml:math display="inline" id="im5"><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#xd7;</mml:mo><mml:msup><mml:mrow><mml:mn>10</mml:mn></mml:mrow><mml:mn>6</mml:mn></mml:msup></mml:mrow></mml:math></inline-formula> to ensure convergence stability. RF and GBC classifiers were trained with 200 estimators to achieve a balance between computational efficiency and predictive accuracy.</p>
<p>The optimal hyperparameters identified through cross-validation were used to retrain each model on the full training set. All models were trained independently, with no joint optimization or shared fine-tuning, and the resulting classifiers were then integrated using an ensemble learning strategy to generate the final multimodal predictions.</p>
</sec>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Multimodal ensemble integration</title>
<p>For ensemble learning, model selection was guided by multiple performance metrics, including accuracy, AUC, precision, and recall (<xref ref-type="bibr" rid="B32">32</xref>). For image&#x2013;based prediction, VGG16, ViT, EfficientNet, and ResNet50 were chosen, as each demonstrated strengths across different evaluation criteria (see Section 3.2 for detailed results). For clinical tabular features, LR, GBC, and RF emerged as the top-performing models (see Section 3.3 for detailed results).</p>
<p>The ensemble was constructed in a <italic>post-hoc</italic> manner: once all selected models were fully trained, their predicted probabilities were aggregated using a probability-based majority voting strategy. Unlike hard voting, this approach integrates the calibrated probability outputs of individual models, allowing more nuanced decision-making and reducing the risk of dominance by any single classifier. In addition to soft probability-based majority voting, we also implemented a stacking ensemble using a multi-layer perceptron (MLP) meta-classifier. Stacking results were evaluated but not adopted, as it consistently demonstrated substantially lower recall for the minority MSI class. Full stacking performance is reported in <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table S2</bold></xref>.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Study cohort characteristics</title>
<p>Among the 1,844 patients (MSI = 113; MSS = 1,731), MSI cases were significantly younger (median 60 vs. 66 years; <italic>p</italic> = 0.004) and more frequently located in the right colon (51.3% vs. 14.8%; <italic>p &lt;</italic> 0.001). MSI patients had a lower prevalence of hypertension (32.7% vs. 44.6%; <italic>p</italic> = 0.014) and exhibited lower CEA (<italic>p &lt;</italic> 0.001). Hematologic indices revealed lower HGB, MCV, pLYM, and cLYM, with higher RDW and pNEUT in MSI cases (all <italic>p &lt;</italic> 0.001). In addition, MSI tumours were associated with lower bilirubin and lipid levels (e.g., LDL; <italic>p &lt;</italic> 0.05). These findings are consistent with the distinct clinical and biological profile of MSI colorectal cancer (<xref ref-type="bibr" rid="B33">33</xref>). Baseline characteristics of the cohort are summarized in <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Baseline characteristics of the study cohort.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Characteristic</th>
<th valign="middle" align="left">Overall (N = 1,844)</th>
<th valign="middle" align="left">MSI (N = 113)</th>
<th valign="middle" align="left">MSS (N = 1,731)</th>
<th valign="middle" align="left">P-value</th>
</tr>
</thead>
<tbody>
<tr>
<th valign="middle" align="left">Gender</th>
<th valign="middle" align="center"/>
<th valign="middle" align="center"/>
<th valign="middle" align="center"/>
<th valign="middle" align="center">0.098</th>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;Female</td>
<td valign="middle" align="center">729 (39.5%)</td>
<td valign="middle" align="center">53 (46.9%)</td>
<td valign="middle" align="center">676 (39.1%)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;Male</td>
<td valign="middle" align="center">1,115 (60.5%)</td>
<td valign="middle" align="center">60 (53.1%)</td>
<td valign="middle" align="center">1,055 (60.9%)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">Age</td>
<td valign="middle" align="center">66 (58, 73)</td>
<td valign="middle" align="center">60 (50, 72)</td>
<td valign="middle" align="center">66 (58, 73)</td>
<td valign="middle" align="center">0.004</td>
</tr>
<tr>
<td valign="middle" align="left">Other primary tumor</td>
<td valign="middle" align="center">76 (4.1%)</td>
<td valign="middle" align="center">9 (8.0%)</td>
<td valign="middle" align="center">67 (3.9%)</td>
<td valign="middle" align="center">0.047</td>
</tr>
<tr>
<td valign="middle" align="left">Hypertension</td>
<td valign="middle" align="center">799 (43.8%)</td>
<td valign="middle" align="center">37 (32.7%)</td>
<td valign="middle" align="center">762 (44.6%)</td>
<td valign="middle" align="center">0.014</td>
</tr>
<tr>
<td valign="middle" align="left">Diabetes</td>
<td valign="middle" align="center">303 (16.6%)</td>
<td valign="middle" align="center">12 (10.6%)</td>
<td valign="middle" align="center">291 (17.0%)</td>
<td valign="middle" align="center">0.076</td>
</tr>
<tr>
<td valign="middle" align="left">Family history of tumor</td>
<td valign="middle" align="center">29 (1.6%)</td>
<td valign="middle" align="center">3 (2.7%)</td>
<td valign="middle" align="center">26 (1.5%)</td>
<td valign="middle" align="center">0.419</td>
</tr>
<tr>
<td valign="middle" align="left">Smoking</td>
<td valign="middle" align="center">497 (27.0%)</td>
<td valign="middle" align="center">27 (23.9%)</td>
<td valign="middle" align="center">470 (27.2%)</td>
<td valign="middle" align="center">0.441</td>
</tr>
<tr>
<td valign="middle" align="left">Drinking</td>
<td valign="middle" align="center">378 (20.5%)</td>
<td valign="middle" align="center">16 (14.2%)</td>
<td valign="middle" align="center">362 (21.0%)</td>
<td valign="middle" align="center">0.083</td>
</tr>
<tr>
<td valign="middle" align="left">Height</td>
<td valign="middle" align="center">165 (160,170)</td>
<td valign="middle" align="center">165 (160,170)</td>
<td valign="middle" align="center">165 (160,170)</td>
<td valign="middle" align="center">0.874</td>
</tr>
<tr>
<td valign="middle" align="left">Weight</td>
<td valign="middle" align="center">65 (57,72)</td>
<td valign="middle" align="center">62 (55,70)</td>
<td valign="middle" align="center">65 (57.5,72)</td>
<td valign="middle" align="center">0.056</td>
</tr>
<tr>
<td valign="middle" align="left">BMI</td>
<td valign="middle" align="center">23.6 (21.5,25.7)</td>
<td valign="middle" align="center">23.1 (20.9,25.2)</td>
<td valign="middle" align="center">23.6 (21.6,25.8)</td>
<td valign="middle" align="center">0.033</td>
</tr>
<tr>
<th valign="middle" align="left"/>
<th valign="middle" align="center"/>
<th valign="middle" align="center"/>
<th valign="middle" align="center"/>
<th valign="middle" align="center"><italic>&lt;</italic> 0.001</th>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;Left colon</td>
<td valign="middle" align="center">560 (30.4%)</td>
<td valign="middle" align="center">30 (26.6%)</td>
<td valign="middle" align="center">530 (30.6%)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;Rectum</td>
<td valign="middle" align="center">969 (52.6%)</td>
<td valign="middle" align="center">25 (22.1%)</td>
<td valign="middle" align="center">944 (54.6%)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;Right colon</td>
<td valign="middle" align="center">314 (17.0%)</td>
<td valign="middle" align="center">58 (51.3%)</td>
<td valign="middle" align="center">256 (14.8%)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">AFP</td>
<td valign="middle" align="center">2.58 (1.90,3.57)</td>
<td valign="middle" align="center">2.36 (1.74,3.15)</td>
<td valign="middle" align="center">2.60 (1.92,3.59)</td>
<td valign="middle" align="center">0.036</td>
</tr>
<tr>
<td valign="middle" align="left">CEA</td>
<td valign="middle" align="center">3.80 (2.16,8.60)</td>
<td valign="middle" align="center">2.50 (1.51,5.15)</td>
<td valign="middle" align="center">3.86 (2.24,8.78)</td>
<td valign="middle" align="center"><italic>&lt;</italic> 0.001</td>
</tr>
<tr>
<td valign="middle" align="left">CA125</td>
<td valign="middle" align="center">9.75 (6.92,13.97)</td>
<td valign="middle" align="center">10.63 (8.03,14.95)</td>
<td valign="middle" align="center">9.60 (6.88,13.96)</td>
<td valign="middle" align="center">0.018</td>
</tr>
<tr>
<td valign="middle" align="left">CA199</td>
<td valign="middle" align="center">12.06 (7.29,21.68)</td>
<td valign="middle" align="center">10.96 (6.58,17.37)</td>
<td valign="middle" align="center">12.15 (7.30,21.88)</td>
<td valign="middle" align="center">0.174</td>
</tr>
<tr>
<td valign="middle" align="left">CRP</td>
<td valign="middle" align="center">4.00 (2.00,12.00)</td>
<td valign="middle" align="center">9.35 (3.00,27.80)</td>
<td valign="middle" align="center">3.50 (2.00,10.00)</td>
<td valign="middle" align="center"><italic>&lt;</italic> 0.001</td>
</tr>
<tr>
<td valign="middle" align="left">WBC</td>
<td valign="middle" align="center">6.10 (5.00,7.40)</td>
<td valign="middle" align="center">5.90 (5.00,7.70)</td>
<td valign="middle" align="center">6.10 (5.02,7.32)</td>
<td valign="middle" align="center">0.626</td>
</tr>
<tr>
<td valign="middle" align="left">pNEUT</td>
<td valign="middle" align="center">61.8 (55.9,68.5)</td>
<td valign="middle" align="center">64.4 (58.8,71.6)</td>
<td valign="middle" align="center">61.4 (55.6,68.3)</td>
<td valign="middle" align="center"><italic>&lt;</italic> 0.001</td>
</tr>
<tr>
<td valign="middle" align="left">pLYM</td>
<td valign="middle" align="center">27.4 (21.3,33.0)</td>
<td valign="middle" align="center">24.5 (19.8,30.3)</td>
<td valign="middle" align="center">27.5 (21.5,33.3)</td>
<td valign="middle" align="center">0.001</td>
</tr>
<tr>
<td valign="middle" align="left">pMONO</td>
<td valign="middle" align="center">6.40 (5.40,7.50)</td>
<td valign="middle" align="center">6.50 (5.60,7.40)</td>
<td valign="middle" align="center">6.40 (5.40,7.50)</td>
<td valign="middle" align="center">0.429</td>
</tr>
<tr>
<td valign="middle" align="left">cNEUT</td>
<td valign="middle" align="center">3.70 (2.90,4.77)</td>
<td valign="middle" align="center">3.79 (3.01,5.04)</td>
<td valign="middle" align="center">3.70 (2.90,4.74)</td>
<td valign="middle" align="center">0.376</td>
</tr>
<tr>
<td valign="middle" align="left">cLYM</td>
<td valign="middle" align="center">1.60 (1.26,2.00)</td>
<td valign="middle" align="center">1.43 (1.13,1.83)</td>
<td valign="middle" align="center">1.61 (1.27,2.02)</td>
<td valign="middle" align="center">0.003</td>
</tr>
<tr>
<td valign="middle" align="left">cMONO</td>
<td valign="middle" align="center">0.39 (0.31,0.49)</td>
<td valign="middle" align="center">0.39 (0.30,0.53)</td>
<td valign="middle" align="center">0.39 (0.31,0.49)</td>
<td valign="middle" align="center">0.504</td>
</tr>
<tr>
<td valign="middle" align="left">RBC</td>
<td valign="middle" align="center">4.22 (3.87,4.57)</td>
<td valign="middle" align="center">4.10 (3.68,4.34)</td>
<td valign="middle" align="center">4.23 (3.88,4.59)</td>
<td valign="middle" align="center"><italic>&lt;</italic> 0.001</td>
</tr>
<tr>
<td valign="middle" align="left">HGB</td>
<td valign="middle" align="center">126 (111,137)</td>
<td valign="middle" align="center">114 (91,127)</td>
<td valign="middle" align="center">126 (112,138)</td>
<td valign="middle" align="center"><italic>&lt;</italic> 0.001</td>
</tr>
<tr>
<td valign="middle" align="left">MCV</td>
<td valign="middle" align="center">90.1 (85.7,93.5)</td>
<td valign="middle" align="center">87.4 (78.8,91.1)</td>
<td valign="middle" align="center">90.3 (85.9,93.6)</td>
<td valign="middle" align="center"><italic>&lt;</italic> 0.001</td>
</tr>
<tr>
<td valign="middle" align="left">RDW</td>
<td valign="middle" align="center">13.3 (12.8,14.1)</td>
<td valign="middle" align="center">13.6 (13.0,15.3)</td>
<td valign="middle" align="center">13.3 (12.8,14.1)</td>
<td valign="middle" align="center"><italic>&lt;</italic> 0.001</td>
</tr>
<tr>
<td valign="middle" align="left">PLT</td>
<td valign="middle" align="center">212 (166,266)</td>
<td valign="middle" align="center">221 (152,281)</td>
<td valign="middle" align="center">211 (166,264)</td>
<td valign="middle" align="center">0.645</td>
</tr>
<tr>
<td valign="middle" align="left">ALT</td>
<td valign="middle" align="center">15 (10,21)</td>
<td valign="middle" align="center">12 (9,18)</td>
<td valign="middle" align="center">15 (10,21)</td>
<td valign="middle" align="center">0.001</td>
</tr>
<tr>
<td valign="middle" align="left">AST</td>
<td valign="middle" align="center">16 (13,21)</td>
<td valign="middle" align="center">15 (11.5,19)</td>
<td valign="middle" align="center">16 (13,21)</td>
<td valign="middle" align="center">0.004</td>
</tr>
<tr>
<td valign="middle" align="left">TP</td>
<td valign="middle" align="center">64.9 (61.5,69.0)</td>
<td valign="middle" align="center">64.9 (60.9,69.1)</td>
<td valign="middle" align="center">64.9 (61.6,68.9)</td>
<td valign="middle" align="center">0.727</td>
</tr>
<tr>
<td valign="middle" align="left">ALB</td>
<td valign="middle" align="center">38.2 (35.7,40.8)</td>
<td valign="middle" align="center">37.5 (34.8,39.8)</td>
<td valign="middle" align="center">38.2 (35.7,40.9)</td>
<td valign="middle" align="center">0.027</td>
</tr>
<tr>
<td valign="middle" align="left">GLB</td>
<td valign="middle" align="center">27.1 (24.3,30.1)</td>
<td valign="middle" align="center">27.8 (24.4,31.1)</td>
<td valign="middle" align="center">27.0 (24.3,30.0)</td>
<td valign="middle" align="center">0.096</td>
</tr>
<tr>
<td valign="middle" align="left">A/G ratio</td>
<td valign="middle" align="center">1.38 (1.10,1.55)</td>
<td valign="middle" align="center">1.32 (1.00,1.49)</td>
<td valign="middle" align="center">1.39 (1.11,1.56)</td>
<td valign="middle" align="center">0.020</td>
</tr>
<tr>
<td valign="middle" align="left">TBil</td>
<td valign="middle" align="center">11.3 (8.4,14.9)</td>
<td valign="middle" align="center">9.7 (6.7,12.5)</td>
<td valign="middle" align="center">11.3 (8.5,15.0)</td>
<td valign="middle" align="center"><italic>&lt;</italic> 0.001</td>
</tr>
<tr>
<td valign="middle" align="left">DBil</td>
<td valign="middle" align="center">3.00 (2.20,4.00)</td>
<td valign="middle" align="center">2.50 (2.00,3.50)</td>
<td valign="middle" align="center">3.00 (2.20,4.00)</td>
<td valign="middle" align="center">0.001</td>
</tr>
<tr>
<td valign="middle" align="left">IBil</td>
<td valign="middle" align="center">8.10 (5.90,11.00)</td>
<td valign="middle" align="center">7.20 (4.90,9.10)</td>
<td valign="middle" align="center">8.20 (6.05,11.15)</td>
<td valign="middle" align="center"><italic>&lt;</italic> 0.001</td>
</tr>
<tr>
<td valign="middle" align="left">Glu</td>
<td valign="middle" align="center">5.19 (4.68,5.97)</td>
<td valign="middle" align="center">5.09 (4.75,5.92)</td>
<td valign="middle" align="center">5.19 (4.67,5.97)</td>
<td valign="middle" align="center">0.722</td>
</tr>
<tr>
<td valign="middle" align="left">TC</td>
<td valign="middle" align="center">4.59 (3.98,5.23)</td>
<td valign="middle" align="center">4.29 (3.80,5.04)</td>
<td valign="middle" align="center">4.60 (4.00,5.25)</td>
<td valign="middle" align="center">0.006</td>
</tr>
<tr>
<td valign="middle" align="left">TG</td>
<td valign="middle" align="center">1.18 (0.88,1.62)</td>
<td valign="middle" align="center">1.09 (0.82,1.50)</td>
<td valign="middle" align="center">1.19 (0.89,1.63)</td>
<td valign="middle" align="center">0.064</td>
</tr>
<tr>
<td valign="middle" align="left">HDL</td>
<td valign="middle" align="center">1.16 (1.00,1.36)</td>
<td valign="middle" align="center">1.08 (0.93,1.33)</td>
<td valign="middle" align="center">1.16 (1.00,1.36)</td>
<td valign="middle" align="center">0.011</td>
</tr>
<tr>
<td valign="middle" align="left">LDL</td>
<td valign="middle" align="center">2.63 (2.17,3.13)</td>
<td valign="middle" align="center">2.41 (2.05,2.93)</td>
<td valign="middle" align="center">2.64 (2.19,3.14)</td>
<td valign="middle" align="center">0.027</td>
</tr>
<tr>
<td valign="middle" align="left">Urea</td>
<td valign="middle" align="center">5.50 (4.55,6.70)</td>
<td valign="middle" align="center">5.22 (3.99,6.19)</td>
<td valign="middle" align="center">5.52 (4.58,6.73)</td>
<td valign="middle" align="center">0.005</td>
</tr>
<tr>
<td valign="middle" align="left">Crea</td>
<td valign="middle" align="center">66.0 (56.0,78.0)</td>
<td valign="middle" align="center">64.0 (50.9,75.0)</td>
<td valign="middle" align="center">67.0 (56.0,78.0)</td>
<td valign="middle" align="center">0.054</td>
</tr>
<tr>
<td valign="middle" align="left">UA</td>
<td valign="middle" align="center">302 (245,368)</td>
<td valign="middle" align="center">292 (225,363)</td>
<td valign="middle" align="center">303 (246,368)</td>
<td valign="middle" align="center">0.159</td>
</tr>
<tr>
<td valign="middle" align="left">GFR</td>
<td valign="middle" align="center">98 (89,104)</td>
<td valign="middle" align="center">99 (90,107)</td>
<td valign="middle" align="center">98 (89,104)</td>
<td valign="middle" align="center">0.396</td>
</tr>
<tr>
<th valign="middle" align="left">FOBT</th>
<th valign="middle" align="center"/>
<th valign="middle" align="center"/>
<th valign="middle" align="center"/>
<th valign="middle" align="center">0.801</th>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;Negative</td>
<td valign="middle" align="center">89 (8.2%)</td>
<td valign="middle" align="center">5 (7.5%)</td>
<td valign="middle" align="center">84 (8.3%)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;Positive</td>
<td valign="middle" align="center">985 (91.8%)</td>
<td valign="middle" align="center">62 (92.5%)</td>
<td valign="middle" align="center">923 (91.7%)</td>
<td valign="middle" align="center"/>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p><italic>Statistical tests:</italic> Pearson&#x2019;s Chi-squared test, Wilcoxon rank-sum test, Fisher&#x2019;s exact test.</p></fn>
<fn>
<p>Continuous variables are expressed as median (IQR). MSI, microsatellite instability; MSS, microsatellite stable.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Image-based deep learning model evaluation</title>
<p>We trained and evaluated five deep learning architectures on colonoscopy images for MSI prediction: ResNet50, EfficientNet, DenseNet, VGG16, and ViT. As summarized in <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>, all models demonstrated good discriminative ability, with AUROC values ranging from 0.873 to 0.896. VGG16 achieved the best overall balance, with the highest accuracy (0.832), precision of 0.943, recall of 0.708, and an AUROC of 0.894. ViT showed comparable performance, achieving the highest recall (0.721) and the best AUROC (0.896), although with slightly lower precision (0.911). ResNet50 and EfficientNet reached very high precision (0.955 and 0.963, respectively) but lower recall (&#x223c; 0.68), indicating that their positive predictions were highly reliable but more conservative, potentially missing MSI cases. DenseNet also performed well (accuracy 0.818, AUROC 0.891, precision 0.941, recall 0.678), though it did not outperform VGG16 or ViT.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Performance of image-based deep learning classifiers for MSI prediction.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Classifier</th>
<th valign="middle" align="center">Accuracy</th>
<th valign="middle" align="center">Precision</th>
<th valign="middle" align="center">Recall</th>
<th valign="middle" align="center">AUROC</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">ResNet-50</td>
<td valign="middle" align="center">0.825</td>
<td valign="middle" align="center">0.955</td>
<td valign="middle" align="center">0.681</td>
<td valign="middle" align="center">0.873</td>
</tr>
<tr>
<td valign="middle" align="left">EfficientNet</td>
<td valign="middle" align="center">0.825</td>
<td valign="middle" align="center"><bold>0.963</bold></td>
<td valign="middle" align="center">0.675</td>
<td valign="middle" align="center">0.877</td>
</tr>
<tr>
<td valign="middle" align="left">DenseNet</td>
<td valign="middle" align="center">0.818</td>
<td valign="middle" align="center">0.941</td>
<td valign="middle" align="center">0.678</td>
<td valign="middle" align="center">0.891</td>
</tr>
<tr>
<td valign="middle" align="left">VGG-16</td>
<td valign="middle" align="center"><bold>0.832</bold></td>
<td valign="middle" align="center">0.943</td>
<td valign="middle" align="center">0.708</td>
<td valign="middle" align="center">0.894</td>
</tr>
<tr>
<td valign="middle" align="left">Vision Transformer (ViT)</td>
<td valign="middle" align="center">0.825</td>
<td valign="middle" align="center">0.911</td>
<td valign="middle" align="center"><bold>0.721</bold></td>
<td valign="middle" align="center"><bold>0.896</bold></td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Results are reported on the independent test set. Best results for each metric are highlighted in bold.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>Receiver operating characteristic (ROC) and precision&#x2013;recall (PR) curves for these models are provided in <xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>, which further illustrate these trade-offs. Specifically, ResNet50 and EfficientNet emphasize conservative, high-precision predictions, while VGG16 and ViT demonstrate a more favorable balance between sensitivity and specificity. Collectively, these findings confirm that image-based deep learning models can effectively discriminate MSI from MSS, with VGG16 and ViT offering the most clinically relevant performance, and ResNet50 and EfficientNet contributing complementary high-precision predictors.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Receiver operating characteristic (ROC) and precision&#x2013;recall (PR) curves for the five image-based deep learning models. The ROC (left) and PR (right) curves illustrate the discriminative performance of five convolutional architectures (ResNet-50, EfficientNet, DenseNet, VGG-16, and Vision Transformer).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1734076-g003.tif">
<alt-text content-type="machine-generated">Two side-by-side graphs compare model performance. The left graph is an ROC curve showing the true positive rate versus false positive rate for ResNet, EfficientNet, DenseNet, VGG, and ViT. VGG performs best with an AUC of 0.8938. The right graph is a Precision-Recall curve showing precision versus recall for the same models, with ViT performing best with an AP of 0.9218. The gray dashed line on the ROC curve represents random performance.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Clinical data&#x2013;based machine learning model evaluation</title>
<p>We compared the performance of four machine learning classifiers trained on routine clinical variables: LR, SVM, RF, and GBC. As summarized in <xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref>, LR achieved the most balanced performance with an accuracy of 0.825 and an AUROC of 0.898. Tree-based models (RF and GBC) and SVM demonstrated higher precision (&#x2265; 0.93) but substantially lower recall (0.55&#x2013;0.60), indicating that they identified fewer true MSI cases despite fewer false positives. In contrast, LR maintained a favorable trade-off between precision (0.823) and recall (0.828), suggesting superior sensitivity for MSI detection.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Performance of clinical data&#x2013;based machine learning classifiers for MSI prediction.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Classifier</th>
<th valign="middle" align="center">Accuracy</th>
<th valign="middle" align="center">Precision</th>
<th valign="middle" align="center">Recall</th>
<th valign="middle" align="center">AUROC</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Logistic Regression (LR)</td>
<td valign="middle" align="center"><bold>0.825</bold></td>
<td valign="middle" align="center">0.823</td>
<td valign="middle" align="center"><bold>0.828</bold></td>
<td valign="middle" align="center">0.898</td>
</tr>
<tr>
<td valign="middle" align="left">Support Vector Machine (SVM)</td>
<td valign="middle" align="center">0.768</td>
<td valign="middle" align="center">0.932</td>
<td valign="middle" align="center">0.579</td>
<td valign="middle" align="center"><bold>0.943</bold></td>
</tr>
<tr>
<td valign="middle" align="left">Random Forest (RF)</td>
<td valign="middle" align="center">0.757</td>
<td valign="middle" align="center">0.938</td>
<td valign="middle" align="center">0.551</td>
<td valign="middle" align="center">0.940</td>
</tr>
<tr>
<td valign="middle" align="left">Gradient Boosting Classifier (GBC)</td>
<td valign="middle" align="center">0.781</td>
<td valign="middle" align="center"><bold>0.946</bold></td>
<td valign="middle" align="center">0.596</td>
<td valign="middle" align="center">0.939</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Model performance was evaluated on the independent test set. Best results for each metric are in bold.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>ROC and PR curves for these classifiers are provided in <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref>. These visualizations further illustrate the trade-offs between model sensitivity and specificity, showing that while RF, GBC, and SVM achieved strong discriminative capability, LR provided the most robust and clinically practical performance by balancing precision and recall across the decision threshold.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Receiver operating characteristic (ROC) and precision&#x2013;recall (PR) curves for the four clinical data&#x2013;based machine learning models. The ROC (left) and PR (right) curves compare four classifiers trained on clinical variables: Logistic Regression (LR), Support Vector Machine (SVM), Random Forest (RF), and Gradient Boosting Classifier (GBC).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1734076-g004.tif">
<alt-text content-type="machine-generated">Side-by-side comparison of ROC and Precision-Recall curves for different models. The ROC curve plot includes models lr (AUC = 0.8982), svm (AUC = 0.9432), rf (AUC = 0.9449), gbc (AUC = 0.9392), with a dashed line for luck. The Precision-Recall curve plot shows models lr (AP = 0.8773), svm (AP = 0.9231), rf (AP = 0.9342), gbc (AP = 0.9355). Each model is represented with distinct colors.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Comparative performance across modalities</title>
<p>To better understand the complementary contributions of colonoscopy image&#x2013;based deep learning models and clinical data&#x2013;based machine learning models, we conducted a comparative analysis across the two modalities.</p>
<p>The image-based models demonstrated relatively higher recall for MSI prediction. For example, VGG-16 and ViT achieved recalls of 0.708 and 0.721, respectively, reflecting the ability of deep networks to capture fine-grained morphological patterns such as mucosal irregularity, glandular disruption, and abnormal vascularity. These cues appear particularly informative for detecting MSI tumors. However, the AUROC values of image models (0.873&#x2013;0.896) were modestly lower than those of the best clinical models, indicating greater variability in overall discrimination.</p>
<p>In contrast, the clinical machine learning models achieved higher AUROC but lower recall. Logistic Regression obtained the highest AUROC (0.898) and showed stable precision, highlighting the structured and biologically informative nature of clinical variables such as biochemical markers, hematologic indices, and patient demographics. However, these models were more conservative in predicting the MSI class, resulting in lower sensitivity. This pattern suggests that clinical variables better support global discrimination but are less effective at identifying minority-class MSI cases.</p>
<p>Taken together, these results demonstrate that each modality captures distinct and complementary aspects of MSI biology. Image-based models excel in sensitivity by detecting subtle morphological alterations, whereas clinical models provide stronger overall discrimination but miss more MSI cases. By integrating high-recall image predictors with high-AUROC clinical predictors, the ensemble achieves improved robustness and sensitivity compared with either modality alone.</p>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Ensemble model evaluation</title>
<p>Using a majority voting strategy, we evaluated multiple combinations of clinical (LR, GBC, RF) and image-based (VGG16, ViT, ResNet50, EfficientNet, DenseNet) models (<xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref>). Overall, ensembles consistently outperformed most single models in terms of balanced accuracy and recall. The best-performing ensembles included both clinical and image models, particularly LR + RF + ResNet + ViT + VGG + EfficientNet, which achieved the highest accuracy (0.886), recall (0.845), and AUROC (0.886) while maintaining high precision (0.920). Similar performance was observed when DenseNet was additionally included, suggesting that adding further redundant models did not provide incremental benefit. In contrast, smaller ensembles (e.g., LR + GBC + ViT + VGG) achieved lower recall (0.734), underscoring the importance of including diverse architectures.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Performance of ensemble models integrating clinical and image-based predictors for MSI prediction.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Ensemble model</th>
<th valign="middle" align="center">Accuracy</th>
<th valign="middle" align="center">Precision</th>
<th valign="middle" align="center">Recall</th>
<th valign="middle" align="center">AUROC</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Ensemble-1</td>
<td valign="middle" align="center">0.840</td>
<td valign="middle" align="center">0.932</td>
<td valign="middle" align="center">0.734</td>
<td valign="middle" align="center">0.840</td>
</tr>
<tr>
<td valign="middle" align="left">Ensemble-2</td>
<td valign="middle" align="center">0.883</td>
<td valign="middle" align="center">0.920</td>
<td valign="middle" align="center">0.839</td>
<td valign="middle" align="center">0.883</td>
</tr>
<tr>
<td valign="middle" align="left">Ensemble-3</td>
<td valign="middle" align="center">0.883</td>
<td valign="middle" align="center">0.920</td>
<td valign="middle" align="center">0.839</td>
<td valign="middle" align="center">0.883</td>
</tr>
<tr>
<td valign="middle" align="left">Ensemble-4</td>
<td valign="middle" align="center">0.843</td>
<td valign="middle" align="center">0.932</td>
<td valign="middle" align="center">0.740</td>
<td valign="middle" align="center">0.843</td>
</tr>
<tr>
<td valign="middle" align="left">Ensemble-5</td>
<td valign="middle" align="center">0.840</td>
<td valign="middle" align="center"><bold>0.948</bold></td>
<td valign="middle" align="center">0.720</td>
<td valign="middle" align="center">0.840</td>
</tr>
<tr>
<td valign="middle" align="left">Ensemble-6</td>
<td valign="middle" align="center"><bold>0.886</bold></td>
<td valign="middle" align="center">0.920</td>
<td valign="middle" align="center"><bold>0.845</bold></td>
<td valign="middle" align="center"><bold>0.886</bold></td>
</tr>
<tr>
<td valign="middle" align="left">Ensemble-7</td>
<td valign="middle" align="center">0.877</td>
<td valign="middle" align="center">0.913</td>
<td valign="middle" align="center">0.833</td>
<td valign="middle" align="center">0.877</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>All results are reported on the independent test set. Best results for each metric are highlighted in bold.</p></fn>
<fn>
<p>Ensemble configurations combine clinical and image-based models as follows: Ensemble-1 = LR + GBC + ViT + VGG; Ensemble-2 = LR + RF + ViT + VGG; Ensemble-3 = LR + RF + ResNet + ViT + VGG; Ensemble-4 = LR + GBC + ResNet + ViT + VGG; Ensemble-5 = LR + GBC + RF + ResNet + ViT + VGG; Ensemble-6 = LR + RF + ResNet + ViT + VGG + EfficientNet Ensemble&#x2013;7 = LR + RF + ResNet + ViT + VGG + EfficientNet + DenseNet.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>Collectively, these findings indicate that integrating both clinical and image-based models provides more robust and reliable predictions of MSI status than individual models, with majority voting effectively balancing sensitivity and specificity. For completeness, we also evaluated stacking ensembles using the same model combinations. However, stacking yielded lower performance, especially in recall (0.54&#x2013;0.60), and therefore was not selected as the primary integration strategy. Detailed stacking results are provided in <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table S2</bold></xref>.</p>
</sec>
<sec id="s3_6">
<label>3.6</label>
<title>Interpretability analysis</title>
<p>To enhance transparency and clinical relevance, interpretability analyses were conducted for both image and clinical models. For deep learning models, Grad-CAM was applied to visualize salient image regions most influential in MSI predictions, allowing qualitative assessment of whether the model attended to relevant mucosal and vascular patterns. For clinical machine learning models, SHAP were used to quantify the contribution of each variable to predictions. SHAP values provided both global feature importance rankings and local instance-level explanations.</p>
<p>For the image models, Grad-CAM visualization revealed that the networks primarily focused on tumor regions and surrounding mucosal structures when generating predictions. In MSS cases, highlighted areas tended to align with the tumor bulk and adjacent mucosa (<xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref>), whereas in MSI cases, attention maps often emphasized irregular lesion borders and heterogeneous mucosal patterns (<xref ref-type="fig" rid="f6"><bold>Figure&#xa0;6</bold></xref>). These findings suggest that the models leveraged clinically plausible visual cues consistent with endoscopic examination, thereby supporting the biological interpretability of the predictions.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Representative Grad-CAM visualizations for MSS colonoscopy images. Each row shows the original image (left), the Grad-CAM activation map (middle), and the merged overlay (right).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1734076-g005.tif">
<alt-text content-type="machine-generated">Three sets of images show colonoscopy results, gradcam heatmaps, and merged images predicting class 1 abnormalities. The left column displays original colon images with lesions, the middle shows corresponding gradcam maps highlighting areas in red and yellow, and the right column merges these, indicating predicted abnormal areas.</alt-text>
</graphic></fig>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Representative Grad-CAM visualizations for MSI colonoscopy images. Each row shows the original image (left), the Grad-CAM activation map (middle), and the merged overlay (right).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1734076-g006.tif">
<alt-text content-type="machine-generated">Three rows of medical images. The left column shows original endoscopic images of a throat. The middle column displays corresponding Grad-CAM heatmaps, highlighting areas of interest in blue, yellow, and red. The right column merges the original images with Grad-CAM overlays to indicate predicted classifications.</alt-text>
</graphic></fig>
<p>For the clinical models, SHAP analysis identified both demographic and clinical factors as key predictors of MSI. Height, gender, GLB, A/G ratio, weight, and tumor location emerged as the most influential features, with additional contributions from BMI, hypertension, and peripheral blood indices such as pNEUT and cLYM (<xref ref-type="fig" rid="f7"><bold>Figure&#xa0;7a</bold></xref>). The beeswarm plots highlighted patient-level heterogeneity, with certain variables (e.g., tumor location and anthropometric measures) exerting consistent directional effects, whereas others showed more variable impacts (<xref ref-type="fig" rid="f7"><bold>Figure&#xa0;7b</bold></xref>). At the individual patient level, waterfall plots demonstrated how combinations of features synergistically increased or decreased the likelihood of MSI prediction, providing a transparent rationale for model outputs (<xref ref-type="fig" rid="f7"><bold>Figure&#xa0;7c</bold></xref>).</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>SHAP-based interpretability analysis of the clinical model for MSI prediction. <bold>(A)</bold> Global feature importance ranked by mean absolute SHAP value. <bold>(B)</bold> SHAP beeswarm plot showing the distribution and direction of feature effects across all patients; red represents higher feature values and blue lower values. <bold>(C)</bold> Example of an individual patient&#x2019;s SHAP explanation (waterfall plot), illustrating how specific feature values increased (blue) or decreased (red) the predicted probability of MSI.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1734076-g007.tif">
<alt-text content-type="machine-generated">Three SHAP plots are displayed: Panel A is a bar chart showing the average impact of features on model output, with &#x201c;Height&#x201d; and &#x201c;GenderMale&#x201d; as top features. Panel B is a beeswarm plot displaying feature impact distribution, highlighting &#x201c;Height&#x201d; and &#x201c;GenderMale&#x201d; with varying feature values. Panel C is a SHAP explanation plot for one sample, illustrating how features like &#x201c;Tumor_Location.Rectum&#x201d; and &#x201c;Weight&#x201d; influence model prediction, with positive values in red and negative in blue.</alt-text>
</graphic></fig>
<p>Together, the interpretability analyses confirm that both clinical and image-based models captured meaningful features, enhancing trust in the ensemble framework by linking predictive signals to clinically relevant patterns.</p>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>In this study, we developed and evaluated a multimodal ensemble model integrating colonoscopy images and clinical data for MSI prediction in colorectal cancer. Both image and clinical-based models showed strong discriminative performance, with VGG16 excelling among image models, and LR performing best among clinical models. Importantly, a majority-voting ensemble combining image and clinical data achieved better performance than single models.</p>
<p>Prior work has shown that MSI can be predicted from pathology and radiology images, including H&amp;E whole-slide models such as WiseMSI (<xref ref-type="bibr" rid="B34">34</xref>) and CT/MRI radiomics approaches (<xref ref-type="bibr" rid="B35">35</xref>, <xref ref-type="bibr" rid="B36">36</xref>). While these imaging modalities have demonstrated strong performance, they typically require tissue sampling, specialized scanners, or labor-intensive tumor segmentation, which may limit scalability in routine clinical practice. In contrast, colonoscopy is widely available, non-invasive, and performed before treatment in nearly all patients, making it a practical platform for real-time MSI risk stratification.</p>
<p>Colonoscopy has been explored as a promising modality for MSI prediction in colorectal cancer. Lo et&#xa0;al. (<xref ref-type="bibr" rid="B19">19</xref>) applied a Vision Transformer to data from 441 patients (34 MSI, 407 MSS), achieving an AUC of 0.86, with a sensitivity of 0.47 and a specificity of 0.94. Cai et&#xa0;al. (<xref ref-type="bibr" rid="B18">18</xref>) subsequently developed MMR-Scopy, a ResNet50-based model trained on 5,226 colonoscopy images, which achieved an AUROC of 0.948 in the internal test set and 0.807 in external validation, with a sensitivity of 0.796 and a specificity of 0.670. In contrast to these unimodal approaches, our study leveraged a substantially larger cohort of 1,844 patients (113 MSI, 1,731 MSS) and 11,507 colonoscopy images, providing greater statistical power and model robustness. Furthermore, we extended the framework beyond image-only prediction to multimodal integration. By combining image-based deep learning with clinical data&#x2013;based machine learning, our ensemble model outperformed individual modalities, achieving higher precision (0.920) and recall (0.845) and thereby improving the identification of MSI cases.</p>
<p>Although some single models (such as ViT and LR) achieved slightly higher AUROC scores than the ensemble, this difference is partly attributable to class imbalance. AUROC is relatively insensitive to minority-class performance (<xref ref-type="bibr" rid="B37">37</xref>), and conservative models such as LR or models that capture strong morphological cues such as ViT may appear to perform better under AUROC even though they miss a greater number of MSI cases. In contrast, the ensemble consistently achieved substantially higher recall, reflecting its ability to integrate complementary strengths from both image-based and clinical models. This trade-off is expected because the ensemble focuses on balanced performance rather than maximizing a single discrimination metric. Recall is especially important in MSI screening, because missed MSI cases can delay immunotherapy eligibility or reduce the likelihood of identifying patients with Lynch syndrome. Therefore, the ensemble&#x2019;s improved sensitivity is more aligned with real-world clinical priorities.</p>
<p>We also evaluated more complex integration strategies, including stacking with an MLP meta-classifier. However, stacking consistently resulted in substantially lower recall. This likely occurs because the meta-model inherits the class imbalance present in the training data, which biases predictions toward the majority MSS class. In contrast, probability-based majority voting introduces no additional trainable parameters and therefore avoids amplifying imbalance. Its transparent combination of model probabilities further enhances interpretability in clinical settings. For completeness, the stacking results are reported in the <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table S2</bold></xref>.</p>
<p>The strong performance of VGG-16 among image-based models and LR among clinical models can be explained by the interplay between model architecture and data characteristics. VGG16 performs well on colonoscopy images because the visual patterns relevant to MSI, including coarse mucosal textures, vascular irregularities, and tumor-surface morphology, are effectively captured by stacked 3&#xd7;3 convolutions without the need for deep residual blocks. This architectural simplicity helps reduce overfitting and supports stable training on a dataset of moderate size. In contrast, the structured clinical variables exhibit largely linear or monotonic relationships with MSI status (<xref ref-type="bibr" rid="B38">38</xref>), which makes LR particularly appropriate. Tree-based models and SVM tended to overfit the minority MSI class or required more extensive hyperparameter tuning. For these reasons, the observed performance reflects the compatibility between each model and the underlying data rather than differences in model complexity.</p>
<p>A critical barrier to clinical adoption of AI models is their interpretability. We therefore conducted model explainability analyses using Grad-CAM for the image-based models and SHAP for the clinical models. The Grad-CAM visualizations of the image-based models showed that the networks attended to clinically relevant tumor regions and mucosal abnormalities in colonoscopy images, rather than being driven by irrelevant background structures. For MSI tumors, Grad-CAM maps highlighted irregular vascular patterns and mucosal disruptions. In MSS tumors, attention was more diffusely distributed but still focused on tumor mass regions. Together, these interpretability analyses not only enhance clinician trust in model predictions but also provide insight into potential endoscopic correlates of MSI biology. Histopathologically, MSI tumors are characterized by heterogeneous glandular, mucinous, and solid components, along with increased microvascular density (<xref ref-type="bibr" rid="B39">39</xref>). These features likely translate endoscopically into tumors with more prominent mucosal secretions and irregular, enlarged vascular patterns, which were consistently emphasized by the Grad-CAM outputs.</p>
<p>Our SHAP analysis revealed key determinants underlying MSI prediction. Tumor location was the most influential factor, with rectal tumors contributing negatively, consistent with the predominance of MSI in right-sided colon cancers (<xref ref-type="bibr" rid="B40">40</xref>). Anthropometric features such as height, weight, and BMI were also important, aligning with evidence that obesity is more strongly linked to MSS colorectal cancer (<xref ref-type="bibr" rid="B41">41</xref>). Gender showed a moderate effect, supporting findings that MSI-H tumors are more frequent in men, while estrogen may exert a protective role in women (<xref ref-type="bibr" rid="B42">42</xref>). Immune and hematologic indices (pNEUT, cLYM) reflected the immune-rich and inflammatory microenvironment of MSI tumors (<xref ref-type="bibr" rid="B43">43</xref>), whereas liver function arkers (ALB, GLB, A/G ratio) suggested potential metabolic associations. Although hypertension also contributed, its biological relevance remains uncertain. Together, these findings confirm that the model captured clinically plausible and biologically meaningful predictors, reinforcing its interpretability and translational potential.</p>
<p>Our results have several important clinical implications. MSI is a critical biomarker in CRC, with relevance for both prognosis and therapy selection, particularly response to immune checkpoint inhibitors. Conventional MSI testing relies on IHC or PCR, which are invasive, time-consuming, and costly. By leveraging colonoscopy images and routine clinical data, our approach offers a non-invasive, rapid, and cost-effective alternative for MSI pre-screening. In practice, such a system could be deployed at the time of colonoscopy, providing immediate stratification and guiding subsequent confirmatory testing. For example, patients predicted as MSS with high confidence could bypass unnecessary molecular testing, reducing diagnostic burden and cost, while MSI-positive predictions could be prioritized for confirmatory IHC or PCR. This workflow has the potential to accelerate treatment decision-making, improve resource allocation, and reduce the workload of pathologists and laboratory personnel.</p>
<p>Several limitations should be acknowledged. First, This retrospective single-center design may introduce selection bias and limit the generalizability of our findings. External validation using independent multi-center datasets is essential to assess generalizability and clinical applicability. Second, although the low MSI prevalence reflects real-world epidemiology, it may reduce sensitivity for minority-class detection despite augmentation and ensemble strategies. This may be because some patients in our hospital did not undergo MSI testing due to cost or other factors. Nonetheless, this further strengthens the potential of our model to pre-screen MSI status and guide decisions regarding the necessity of IHC or PCR testing. Finally, although the ensemble improves predictive balance, it introduces additional computational overhead that may affect real-time deployment during colonoscopy. Future work will explore lightweight architectures, model distillation, or on-device optimization to improve efficiency. Notably, our soft probability&#x2013;based voting module itself requires minimal computation, which helps mitigate deployment challenges.</p>
<p>In conclusion, we demonstrate that combining colonoscopy image&#x2013;based deep learning and clinical machine learning models through ensemble learning enables accurate, interpretable, and non-invasive MSI prediction. Grad-CAM and SHAP analyses enhance transparency and clinical trust by linking predictions to biologically meaningful patterns. This ensemble framework holds promise as a practical adjunct to molecular assays, with potential to streamline diagnostics, reduce testing burden, and support personalized treatment strategies in colorectal cancer.</p>
</sec>
</body>
<back>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>The datasets generated during and/or analyzed during the current study are available from the corresponding author on reasonable request.</p></sec>
<sec id="s6" sec-type="ethics-statement">
<title>Ethics statement</title>
<p>The studies involving humans were approved by Ethics Committee of Nanjing Hospital of Chinese Medicine. The studies were conducted in accordance with the local legislation and institutional requirements. Written informed consent for participation was not required from the participants or the participants&#x2019; legal guardians/next of kin in accordance with the national legislation and institutional requirements.</p></sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>JY: Data curation, Writing &#x2013; original draft, Methodology, Funding acquisition, Investigation, Writing &#x2013; review &amp; editing, Formal analysis, Conceptualization. SZ: Visualization, Data curation, Methodology, Formal analysis, Writing &#x2013; review &amp; editing, Software, Writing &#x2013; original draft. JZ: Investigation, Writing &#x2013; original draft. YC: Writing &#x2013; original draft, Investigation. MZ: Investigation, Writing &#x2013; original draft. CZ: Conceptualization, Writing &#x2013; review &amp; editing, Funding acquisition. BJ: Funding acquisition, Supervision, Conceptualization, Writing &#x2013; review &amp; editing.</p></sec>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s10" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<sec id="s12" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fonc.2025.1734076/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fonc.2025.1734076/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="DataSheet1.pdf" id="SM1" mimetype="application/pdf"/></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bray</surname> <given-names>F</given-names></name>
<name><surname>Laversanne</surname> <given-names>M</given-names></name>
<name><surname>Sung</surname> <given-names>H</given-names></name>
<name><surname>Ferlay</surname> <given-names>J</given-names></name>
<name><surname>Siegel</surname> <given-names>RL</given-names></name>
<name><surname>Soerjomataram</surname> <given-names>I</given-names></name>
<etal/>
</person-group>. 
<article-title>Global cancer statistics 2022: Globocan estimates of incidence and mortality worldwide for 36 cancers in 185 countries</article-title>. <source>CA: Cancer J Clin</source>. (<year>2024</year>) <volume>74</volume>:<page-range>229&#x2013;63</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.3322/caac.21834</pub-id>, PMID: <pub-id pub-id-type="pmid">38572751</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<label>2</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Amato</surname> <given-names>M</given-names></name>
<name><surname>Franco</surname> <given-names>R</given-names></name>
<name><surname>Facchini</surname> <given-names>G</given-names></name>
<name><surname>Addeo</surname> <given-names>R</given-names></name>
<name><surname>Ciardiello</surname> <given-names>F</given-names></name>
<name><surname>Berretta</surname> <given-names>M</given-names></name>
<etal/>
</person-group>. 
<article-title>Microsatellite instability: from the implementation of the detection to a prognostic and predictive role in cancers</article-title>. <source>Int J Mol Sci</source>. (<year>2022</year>) <volume>23</volume>:<fpage>8726</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/ijms23158726</pub-id>, PMID: <pub-id pub-id-type="pmid">35955855</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<label>3</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Petrelli</surname> <given-names>F</given-names></name>
<name><surname>Ghidini</surname> <given-names>M</given-names></name>
<name><surname>Cabiddu</surname> <given-names>M</given-names></name>
<name><surname>Pezzica</surname> <given-names>E</given-names></name>
<name><surname>Corti</surname> <given-names>D</given-names></name>
<name><surname>Turati</surname> <given-names>L</given-names></name>
<etal/>
</person-group>. 
<article-title>Microsatellite instability and survival in stage ii colorectal cancer: a systematic review and meta-analysis</article-title>. <source>Anticancer Res</source>. (<year>2019</year>) <volume>39</volume>:<page-range>6431&#x2013;41</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.21873/anticanres.13857</pub-id>, PMID: <pub-id pub-id-type="pmid">31810907</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<label>4</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lenz</surname> <given-names>HJ</given-names></name>
<name><surname>Van Cutsem</surname> <given-names>E</given-names></name>
<name><surname>Luisa Limon</surname> <given-names>M</given-names></name>
<name><surname>Wong</surname> <given-names>KYM</given-names></name>
<name><surname>Hendlisz</surname> <given-names>A</given-names></name>
<name><surname>Aglietta</surname> <given-names>M</given-names></name>
<etal/>
</person-group>. 
<article-title>First-line nivolumab plus low-dose ipilimumab for microsatellite instability-high/mismatch repair-deficient metastatic colorectal cancer: the phase ii checkmate 142 study</article-title>. <source>J Clin Oncol</source>. (<year>2022</year>) <volume>40</volume>:<page-range>161&#x2013;70</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1200/JCO.21.01015</pub-id>, PMID: <pub-id pub-id-type="pmid">34637336</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<label>5</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Dabir</surname> <given-names>PD</given-names></name>
<name><surname>Bruggeling</surname> <given-names>CE</given-names></name>
<name><surname>van der Post</surname> <given-names>RS</given-names></name>
<name><surname>Dutilh</surname> <given-names>BE</given-names></name>
<name><surname>Hoogerbrugge</surname> <given-names>N</given-names></name>
<name><surname>Ligtenberg</surname> <given-names>MJ</given-names></name>
<etal/>
</person-group>. 
<article-title>Microsatellite instability screening in colorectal adenomas to detect lynch syndrome patients? a systematic review and meta-analysis</article-title>. <source>Eur J Hum Genet</source>. (<year>2020</year>) <volume>28</volume>:<page-range>277&#x2013;86</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41431-019-0538-7</pub-id>, PMID: <pub-id pub-id-type="pmid">31695176</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<label>6</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lebedeva</surname> <given-names>A</given-names></name>
<name><surname>Taraskina</surname> <given-names>A</given-names></name>
<name><surname>Grigoreva</surname> <given-names>T</given-names></name>
<name><surname>Belova</surname> <given-names>E</given-names></name>
<name><surname>Kuznetsova</surname> <given-names>O</given-names></name>
<name><surname>Ivanilova</surname> <given-names>D</given-names></name>
<etal/>
</person-group>. 
<article-title>The role of msi testing methodology and its heterogeneity in predicting colorectal cancer immunotherapy response</article-title>. <source>Int J Mol Sci</source>. (<year>2025</year>) <volume>26</volume>:<fpage>3420</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/ijms26073420</pub-id>, PMID: <pub-id pub-id-type="pmid">40244273</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<label>7</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yakushina</surname> <given-names>V</given-names></name>
<name><surname>Kavun</surname> <given-names>A</given-names></name>
<name><surname>Veselovsky</surname> <given-names>E</given-names></name>
<name><surname>Grigoreva</surname> <given-names>T</given-names></name>
<name><surname>Belova</surname> <given-names>E</given-names></name>
<name><surname>Lebedeva</surname> <given-names>A</given-names></name>
<etal/>
</person-group>. 
<article-title>Microsatellite instability detection: the current standards, limitations, and misinterpretations</article-title>. <source>JCO Precis Oncol</source>. (<year>2023</year>) <volume>7</volume>:<elocation-id>e2300010</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1200/PO.23.00010</pub-id>, PMID: <pub-id pub-id-type="pmid">37315263</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<label>8</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yin</surname> <given-names>Y</given-names></name>
<name><surname>Zhang</surname> <given-names>R</given-names></name>
<name><surname>Liu</surname> <given-names>P</given-names></name>
<name><surname>Deng</surname> <given-names>W</given-names></name>
<name><surname>Hu</surname> <given-names>D</given-names></name>
<name><surname>He</surname> <given-names>S</given-names></name>
<etal/>
</person-group>. 
<article-title>Artificial neural networks for finger vein recognition: a survey</article-title>. <source>Eng Appl Artif Intell</source>. (<year>2025</year>) <volume>150</volume>:<fpage>110586</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.engappai.2025.110586</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<label>9</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jiang</surname> <given-names>H</given-names></name>
<name><surname>Yin</surname> <given-names>Y</given-names></name>
<name><surname>Zhang</surname> <given-names>J</given-names></name>
<name><surname>Deng</surname> <given-names>W</given-names></name>
<name><surname>Li</surname> <given-names>C</given-names></name>
</person-group>. 
<article-title>Deep learning for liver cancer histopathology image analysis: A comprehensive survey</article-title>. <source>Eng Appl Artif Intell</source>. (<year>2024</year>) <volume>133</volume>:<fpage>108436</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.engappai.2024.108436</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<label>10</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Gerwert</surname> <given-names>K</given-names></name>
<name><surname>Sch&#xf6;rner</surname> <given-names>S</given-names></name>
<name><surname>Gro&#xdf;erueschkamp</surname> <given-names>F</given-names></name>
<name><surname>Kraeft</surname> <given-names>AL</given-names></name>
<name><surname>Schuhmacher</surname> <given-names>D</given-names></name>
<name><surname>Sternemann</surname> <given-names>C</given-names></name>
<etal/>
</person-group>. 
<article-title>Fast and label-free automated detection of microsatellite status in early colon cancer using artificial intelligence integrated infrared imaging</article-title>. <source>Eur J Cancer</source>. (<year>2023</year>) <volume>182</volume>:<page-range>122&#x2013;31</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ejca.2022.12.026</pub-id>, PMID: <pub-id pub-id-type="pmid">36773401</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<label>11</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wagner</surname> <given-names>SJ</given-names></name>
<name><surname>Reisenb&#xfc;chler</surname> <given-names>D</given-names></name>
<name><surname>West</surname> <given-names>NP</given-names></name>
<name><surname>Niehues</surname> <given-names>JM</given-names></name>
<name><surname>Zhu</surname> <given-names>J</given-names></name>
<name><surname>Foersch</surname> <given-names>S</given-names></name>
<etal/>
</person-group>. 
<article-title>Transformer-based biomarker prediction from colorectal cancer histology: A large-scale multicentric study</article-title>. <source>Cancer Cell</source>. (<year>2023</year>) <volume>41</volume>:<page-range>1650&#x2013;61</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ccell.2023.08.002</pub-id>, PMID: <pub-id pub-id-type="pmid">37652006</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<label>12</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chen</surname> <given-names>W</given-names></name>
<name><surname>Zheng</surname> <given-names>K</given-names></name>
<name><surname>Yuan</surname> <given-names>W</given-names></name>
<name><surname>Jia</surname> <given-names>Z</given-names></name>
<name><surname>Wu</surname> <given-names>Y</given-names></name>
<name><surname>Duan</surname> <given-names>X</given-names></name>
<etal/>
</person-group>. 
<article-title>A ct-based deep learning for segmenting tumors and predicting microsatellite instability in patients with colorectal cancers: a multicenter cohort study</article-title>. <source>La radiologia Med</source>. (<year>2025</year>) <volume>130</volume>:<page-range>214&#x2013;25</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11547-024-01909-5</pub-id>, PMID: <pub-id pub-id-type="pmid">39586941</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<label>13</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chen</surname> <given-names>S</given-names></name>
<name><surname>Du</surname> <given-names>W</given-names></name>
<name><surname>Cao</surname> <given-names>Y</given-names></name>
<name><surname>Kong</surname> <given-names>J</given-names></name>
<name><surname>Wang</surname> <given-names>X</given-names></name>
<name><surname>Wang</surname> <given-names>Y</given-names></name>
<etal/>
</person-group>. 
<article-title>Preoperative contrast-enhanced ct imaging and clinicopathological characteristics analysis of mismatch repair-deficient colorectal cancer</article-title>. <source>Cancer Imaging</source>. (<year>2023</year>) <volume>23</volume>:<fpage>97</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s40644-023-00591-6</pub-id>, PMID: <pub-id pub-id-type="pmid">37828626</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<label>14</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>Z</given-names></name>
<name><surname>Zhong</surname> <given-names>Q</given-names></name>
<name><surname>Zhang</surname> <given-names>L</given-names></name>
<name><surname>Wang</surname> <given-names>M</given-names></name>
<name><surname>Xiao</surname> <given-names>W</given-names></name>
<name><surname>Cui</surname> <given-names>F</given-names></name>
<etal/>
</person-group>. 
<article-title>Computed tomography-based radiomics model to preoperatively predict microsatellite instability status in colorectal cancer: A multicenter study</article-title>. <source>Front Oncol</source>. (<year>2021</year>) <volume>11</volume>:<elocation-id>666786</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fonc.2021.666786</pub-id>, PMID: <pub-id pub-id-type="pmid">34277413</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<label>15</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>W</given-names></name>
<name><surname>Huang</surname> <given-names>Z</given-names></name>
<name><surname>Zhao</surname> <given-names>J</given-names></name>
<name><surname>He</surname> <given-names>D</given-names></name>
<name><surname>Li</surname> <given-names>M</given-names></name>
<name><surname>Yin</surname> <given-names>H</given-names></name>
<etal/>
</person-group>. 
<article-title>Development and validation of magnetic resonance imaging-based radiomics models for preoperative prediction of microsatellite instability in rectal cancer</article-title>. <source>Ann Trans Med</source>. (<year>2021</year>) <volume>9</volume>:<fpage>134</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.21037/atm-20-7673</pub-id>, PMID: <pub-id pub-id-type="pmid">33569436</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<label>16</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sung</surname> <given-names>JJ</given-names></name>
<name><surname>Chiu</surname> <given-names>HM</given-names></name>
<name><surname>Lieberman</surname> <given-names>D</given-names></name>
<name><surname>Kuipers</surname> <given-names>EJ</given-names></name>
<name><surname>Rutter</surname> <given-names>MD</given-names></name>
<name><surname>Macrae</surname> <given-names>F</given-names></name>
<etal/>
</person-group>. 
<article-title>Third asia-pacific consensus recommendations on colorectal cancer screening and postpolypectomy surveillance</article-title>. <source>Gut</source>. (<year>2022</year>) <volume>71</volume>:<page-range>2152&#x2013;66</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1136/gutjnl-2022-327377</pub-id>, PMID: <pub-id pub-id-type="pmid">36002247</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<label>17</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Davidson</surname> <given-names>KW</given-names></name>
<name><surname>Barry</surname> <given-names>MJ</given-names></name>
<name><surname>Mangione</surname> <given-names>CM</given-names></name>
<name><surname>Cabana</surname> <given-names>M</given-names></name>
<name><surname>Caughey</surname> <given-names>AB</given-names></name>
<name><surname>Davis</surname> <given-names>EM</given-names></name>
<etal/>
</person-group>. 
<article-title>Screening for colorectal cancer: Us preventive services task force recommendation statement</article-title>. <source>Jama</source>. (<year>2021</year>) <volume>325</volume>:<page-range>1965&#x2013;77</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1001/jama.2021.6238</pub-id>, PMID: <pub-id pub-id-type="pmid">34003218</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<label>18</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Cai</surname> <given-names>Y</given-names></name>
<name><surname>Chen</surname> <given-names>X</given-names></name>
<name><surname>Chen</surname> <given-names>J</given-names></name>
<name><surname>Liao</surname> <given-names>J</given-names></name>
<name><surname>Han</surname> <given-names>M</given-names></name>
<name><surname>Lin</surname> <given-names>D</given-names></name>
<etal/>
</person-group>. 
<article-title>Deep learning-assisted colonoscopy images for prediction of mismatch repair deficiency in colorectal cancer</article-title>. <source>Surg Endoscopy</source>. (<year>2025</year>) <volume>39</volume>:<page-range>859&#x2013;67</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00464-024-11426-1</pub-id>, PMID: <pub-id pub-id-type="pmid">39623175</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<label>19</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lo</surname> <given-names>CM</given-names></name>
<name><surname>Jiang</surname> <given-names>JK</given-names></name>
<name><surname>Lin</surname> <given-names>CC</given-names></name>
</person-group>. 
<article-title>Detecting microsatellite instability in colorectal cancer using transformer based colonoscopy image classification and retrieval</article-title>. <source>PLoS One</source>. (<year>2024</year>) <volume>19</volume>:<elocation-id>e0292277</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0292277</pub-id>, PMID: <pub-id pub-id-type="pmid">38271352</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<label>20</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Schouten</surname> <given-names>D</given-names></name>
<name><surname>Nicoletti</surname> <given-names>G</given-names></name>
<name><surname>Dille</surname> <given-names>B</given-names></name>
<name><surname>Chia</surname> <given-names>C</given-names></name>
<name><surname>Vendittelli</surname> <given-names>P</given-names></name>
<name><surname>Schuurmans</surname> <given-names>M</given-names></name>
<etal/>
</person-group>. 
<article-title>Navigating the landscape of multimodal ai in medicine: a scoping review on technical challenges and clinical applications</article-title>. <source>Med Image Anal</source>. (<year>2025</year>) <volume>105</volume>:<fpage>103621</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.media.2025.103621</pub-id>, PMID: <pub-id pub-id-type="pmid">40482561</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<label>21</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hezi</surname> <given-names>H</given-names></name>
<name><surname>Gelber</surname> <given-names>M</given-names></name>
<name><surname>Balabanov</surname> <given-names>A</given-names></name>
<name><surname>Maruvka</surname> <given-names>YE</given-names></name>
<name><surname>Freiman</surname> <given-names>M</given-names></name>
</person-group>. 
<article-title>Cimil-crc: A clinically-informed multiple instance learning framework for patient-level colorectal cancer molecular subtypes classification from h&amp;e stained images</article-title>. <source>Comput Methods Programs Biomedicine</source>. (<year>2025</year>) <volume>259</volume>:<fpage>108513</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.cmpb.2024.108513</pub-id>, PMID: <pub-id pub-id-type="pmid">39581068</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<label>22</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>Z</given-names></name>
<name><surname>Zhang</surname> <given-names>J</given-names></name>
<name><surname>Zhong</surname> <given-names>Q</given-names></name>
<name><surname>Feng</surname> <given-names>Z</given-names></name>
<name><surname>Shi</surname> <given-names>Y</given-names></name>
<name><surname>Xu</surname> <given-names>L</given-names></name>
<etal/>
</person-group>. 
<article-title>Development and external validation of a multiparametric mri-based radiomics model for preoperative prediction of microsatellite instability status in rectal cancer: a retrospective multicenter study</article-title>. <source>Eur Radiol</source>. (<year>2023</year>) <volume>33</volume>:<page-range>1835&#x2013;43</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00330-022-09160-0</pub-id>, PMID: <pub-id pub-id-type="pmid">36282309</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<label>23</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lo</surname> <given-names>CM</given-names></name>
<name><surname>Yang</surname> <given-names>YW</given-names></name>
<name><surname>Lin</surname> <given-names>JK</given-names></name>
<name><surname>Lin</surname> <given-names>TC</given-names></name>
<name><surname>Chen</surname> <given-names>WS</given-names></name>
<name><surname>Yang</surname> <given-names>SH</given-names></name>
<etal/>
</person-group>. 
<article-title>Modeling the survival of colorectal cancer patients based on colonoscopic features in a feature ensemble vision transformer</article-title>. <source>Computerized Med Imaging Graphics</source>. (<year>2023</year>) <volume>166</volume>:<fpage>102242</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compmedimag.2023.102242</pub-id>, PMID: <pub-id pub-id-type="pmid">37172354</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<label>24</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Cui</surname> <given-names>H</given-names></name>
<name><surname>Zhao</surname> <given-names>Y</given-names></name>
<name><surname>Xiong</surname> <given-names>S</given-names></name>
<name><surname>Feng</surname> <given-names>Y</given-names></name>
<name><surname>Li</surname> <given-names>P</given-names></name>
<name><surname>Lv</surname> <given-names>Y</given-names></name>
<etal/>
</person-group>. 
<article-title>Diagnosing solid lesions in the pancreas with multimodal artificial intelligence: a randomized crossover trial</article-title>. <source>JAMA Network Open</source>. (<year>2024</year>) <volume>7</volume>:<fpage>e2422454</fpage>&#x2013;<lpage>e2422454</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1001/jamanetworkopen.2024.22454</pub-id>, PMID: <pub-id pub-id-type="pmid">39028670</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<label>25</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>H</given-names></name>
<name><surname>He</surname> <given-names>Y</given-names></name>
<name><surname>Wan</surname> <given-names>L</given-names></name>
<name><surname>Li</surname> <given-names>C</given-names></name>
<name><surname>Li</surname> <given-names>Z</given-names></name>
<name><surname>Li</surname> <given-names>Z</given-names></name>
<etal/>
</person-group>. 
<article-title>Deep learning models in classifying primary bone tumors and bone infections based on radiographs</article-title>. <source>NPJ Precis Oncol</source>. (<year>2025</year>) <volume>9</volume>:<fpage>72</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41698-025-00855-3</pub-id>, PMID: <pub-id pub-id-type="pmid">40074845</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<label>26</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Russakovsky</surname> <given-names>O</given-names></name>
<name><surname>Deng</surname> <given-names>J</given-names></name>
<name><surname>Su</surname> <given-names>H</given-names></name>
<name><surname>Krause</surname> <given-names>J</given-names></name>
<name><surname>Satheesh</surname> <given-names>S</given-names></name>
<name><surname>Ma</surname> <given-names>S</given-names></name>
<etal/>
</person-group>. 
<article-title>Imagenet large scale visual recognition challenge</article-title>. <source>Int J Comput Vision</source>. (<year>2015</year>) <volume>115</volume>:<page-range>211&#x2013;52</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11263-015-0816-y</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<label>27</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Lin</surname> <given-names>TY</given-names></name>
<name><surname>Goyal</surname> <given-names>P</given-names></name>
<name><surname>Girshick</surname> <given-names>R</given-names></name>
<name><surname>He</surname> <given-names>K</given-names></name>
<name><surname>Doll&#xe1;r</surname> <given-names>P</given-names></name>
</person-group>. (<year>2017</year>). 
<article-title>Focal loss for dense object detection</article-title>, in: <conf-name>Proceedings of the IEEE international conference on computer vision</conf-name>. <publisher-loc>Venice, Italy</publisher-loc>: 
<publisher-name>IEEE</publisher-name>. pp. <page-range>2980&#x2013;8</page-range>.
</mixed-citation>
</ref>
<ref id="B28">
<label>28</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Salehi</surname> <given-names>SSM</given-names></name>
<name><surname>Erdogmus</surname> <given-names>D</given-names></name>
<name><surname>Gholipour</surname> <given-names>A</given-names></name>
</person-group>. 
<article-title>Tversky loss function for image segmentation using 3d fully convolutional deep networks</article-title>. In: <source>International workshop on machine learning in medical imaging</source>. <publisher-loc>Cham</publisher-loc>: 
<publisher-name>Springer</publisher-name> (<year>2017</year>). p. <page-range>379&#x2013;87</page-range>.
</mixed-citation>
</ref>
<ref id="B29">
<label>29</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Abraham</surname> <given-names>N</given-names></name>
<name><surname>Khan</surname> <given-names>NM</given-names></name>
</person-group>. 
<article-title>A novel focal tversky loss function with improved attention u-net for lesion segmentation</article-title>. In: <source>2019 IEEE 16th international symposium on biomedical imaging (ISBI 2019)</source>. <publisher-loc>Venice, Italy</publisher-loc>: 
<publisher-name>IEEE</publisher-name> (<year>2019</year>). p. <page-range>683&#x2013;7</page-range>.
</mixed-citation>
</ref>
<ref id="B30">
<label>30</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lee</surname> <given-names>N</given-names></name>
<name><surname>Yang</surname> <given-names>H</given-names></name>
<name><surname>Yoo</surname> <given-names>H</given-names></name>
</person-group>. 
<article-title>A surrogate loss function for optimization of <italic>f<sub>&#x3b2;</sub></italic> score in binary classification with imbalanced data</article-title>. <source>arXiv preprint arXiv:2104.01459</source>. (<year>2021</year>) <volume>abs/2104.01459</volume>.
</mixed-citation>
</ref>
<ref id="B31">
<label>31</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Fr&#xe4;nti</surname> <given-names>P</given-names></name>
<name><surname>Mariescu-Istodor</surname> <given-names>R</given-names></name>
</person-group>. 
<article-title>Soft precision and recall</article-title>. <source>Pattern Recognition Lett</source>. (<year>2023</year>) <volume>167</volume>:<page-range>115&#x2013;21</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.patrec.2023.02.005</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<label>32</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mursil</surname> <given-names>M</given-names></name>
<name><surname>Rashwan</surname> <given-names>HA</given-names></name>
<name><surname>Khalid</surname> <given-names>A</given-names></name>
<name><surname>Cavall&#xe9;-Busquets</surname> <given-names>P</given-names></name>
<name><surname>Santos-Calderon</surname> <given-names>L</given-names></name>
<name><surname>Murphy</surname> <given-names>MM</given-names></name>
<etal/>
</person-group>. 
<article-title>Interpretable deep neural networks for advancing early neonatal birth weight prediction using multimodal maternal factors</article-title>. <source>J Biomed Inf</source>. (<year>2025</year>), <fpage>104838</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jbi.2025.104838</pub-id>, PMID: <pub-id pub-id-type="pmid">40339967</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<label>33</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mao</surname> <given-names>J</given-names></name>
<name><surname>He</surname> <given-names>Y</given-names></name>
<name><surname>Chu</surname> <given-names>J</given-names></name>
<name><surname>Hu</surname> <given-names>B</given-names></name>
<name><surname>Yao</surname> <given-names>Y</given-names></name>
<name><surname>Yan</surname> <given-names>Q</given-names></name>
<etal/>
</person-group>. 
<article-title>Analysis of clinical characteristics of mismatch repair status in colorectal cancer: a multicenter retrospective study</article-title>. <source>Int J Colorectal Dis</source>. (<year>2024</year>) <volume>39</volume>:<fpage>100</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00384-024-04674-z</pub-id>, PMID: <pub-id pub-id-type="pmid">38967814</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<label>34</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chang</surname> <given-names>X</given-names></name>
<name><surname>Wang</surname> <given-names>J</given-names></name>
<name><surname>Zhang</surname> <given-names>G</given-names></name>
<name><surname>Yang</surname> <given-names>M</given-names></name>
<name><surname>Xi</surname> <given-names>Y</given-names></name>
<name><surname>Xi</surname> <given-names>C</given-names></name>
<etal/>
</person-group>. 
<article-title>Predicting colorectal cancer microsatellite instability with a self-attention-enabled convolutional neural network</article-title>. <source>Cell Rep Med</source>. (<year>2023</year>) <volume>4</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.xcrm.2022.100914</pub-id>, PMID: <pub-id pub-id-type="pmid">36720223</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<label>35</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bodalal</surname> <given-names>Z</given-names></name>
<name><surname>Hong</surname> <given-names>EK</given-names></name>
<name><surname>Trebeschi</surname> <given-names>S</given-names></name>
<name><surname>Kurilova</surname> <given-names>I</given-names></name>
<name><surname>Landolfi</surname> <given-names>F</given-names></name>
<name><surname>Bogveradze</surname> <given-names>N</given-names></name>
<etal/>
</person-group>. 
<article-title>Non-invasive ct radiomic biomarkers predict microsatellite stability status in colorectal cancer: a multicenter validation study</article-title>. <source>Eur Radiol Exp</source>. (<year>2024</year>) <volume>8</volume>:<fpage>98</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s41747-024-00484-8</pub-id>, PMID: <pub-id pub-id-type="pmid">39186200</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<label>36</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>Y</given-names></name>
<name><surname>Xie</surname> <given-names>B</given-names></name>
<name><surname>Wang</surname> <given-names>K</given-names></name>
<name><surname>Zou</surname> <given-names>W</given-names></name>
<name><surname>Liu</surname> <given-names>A</given-names></name>
<name><surname>Xue</surname> <given-names>Z</given-names></name>
<etal/>
</person-group>. 
<article-title>Multi-parametric mri habitat radiomics based on interpretable machine learning for preoperative assessment of microsatellite instability in rectal cancer</article-title>. <source>Acad Radiol</source>. (<year>2025</year>) <volume>32</volume>:<page-range>3975&#x2013;3988</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.acra.2025.02.009</pub-id>, PMID: <pub-id pub-id-type="pmid">40016002</pub-id>
</mixed-citation>
</ref>
<ref id="B37">
<label>37</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>McDermott</surname> <given-names>M</given-names></name>
<name><surname>Zhang</surname> <given-names>H</given-names></name>
<name><surname>Hansen</surname> <given-names>L</given-names></name>
<name><surname>Angelotti</surname> <given-names>G</given-names></name>
<name><surname>Gallifant</surname> <given-names>J</given-names></name>
</person-group>. 
<article-title>A closer look at auroc and auprc under class imbalance</article-title>. <source>Adv Neural Inf Process Syst</source>. (<year>2024</year>) <volume>37</volume>:<page-range>44102&#x2013;63</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.52202/079017-1400</pub-id>
</mixed-citation>
</ref>
<ref id="B38">
<label>38</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Pei</surname> <given-names>Q</given-names></name>
<name><surname>Yi</surname> <given-names>X</given-names></name>
<name><surname>Chen</surname> <given-names>C</given-names></name>
<name><surname>Pang</surname> <given-names>P</given-names></name>
<name><surname>Fu</surname> <given-names>Y</given-names></name>
<name><surname>Lei</surname> <given-names>G</given-names></name>
<etal/>
</person-group>. 
<article-title>Pre-treatment ct-based radiomics nomogram for predicting microsatellite instability status in colorectal cancer</article-title>. <source>Eur Radiol</source>. (<year>2022</year>) <volume>32</volume>:<page-range>714&#x2013;24</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00330-021-08167-3</pub-id>, PMID: <pub-id pub-id-type="pmid">34258636</pub-id>
</mixed-citation>
</ref>
<ref id="B39">
<label>39</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ying</surname> <given-names>M</given-names></name>
<name><surname>Pan</surname> <given-names>J</given-names></name>
<name><surname>Lu</surname> <given-names>G</given-names></name>
<name><surname>Zhou</surname> <given-names>S</given-names></name>
<name><surname>Fu</surname> <given-names>J</given-names></name>
<name><surname>Wang</surname> <given-names>Q</given-names></name>
<etal/>
</person-group>. 
<article-title>Development and validation of a radiomics-based nomogram for the preoperative prediction of microsatellite instability in colorectal cancer</article-title>. <source>BMC Cancer</source>. (<year>2022</year>) <volume>22</volume>:<fpage>524</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12885-022-09584-3</pub-id>, PMID: <pub-id pub-id-type="pmid">35534797</pub-id>
</mixed-citation>
</ref>
<ref id="B40">
<label>40</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Song</surname> <given-names>Y</given-names></name>
<name><surname>Wang</surname> <given-names>L</given-names></name>
<name><surname>Ran</surname> <given-names>W</given-names></name>
<name><surname>Li</surname> <given-names>G</given-names></name>
<name><surname>Xiao</surname> <given-names>Y</given-names></name>
<name><surname>Wang</surname> <given-names>X</given-names></name>
<etal/>
</person-group>. 
<article-title>Effect of tumor location on clinicopathological and molecular markers in colorectal cancer in eastern China patients: an analysis of 2,356 cases</article-title>. <source>Front Genet</source>. (<year>2020</year>) <volume>11</volume>:<elocation-id>96</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fgene.2020.00096</pub-id>, PMID: <pub-id pub-id-type="pmid">32161617</pub-id>
</mixed-citation>
</ref>
<ref id="B41">
<label>41</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hoffmeister</surname> <given-names>M</given-names></name>
<name><surname>Bl&#xe4;ker</surname> <given-names>H</given-names></name>
<name><surname>Kloor</surname> <given-names>M</given-names></name>
<name><surname>Roth</surname> <given-names>W</given-names></name>
<name><surname>Toth</surname> <given-names>C</given-names></name>
<name><surname>Herpel</surname> <given-names>E</given-names></name>
<etal/>
</person-group>. 
<article-title>Body mass index and microsatellite instability in colorectal cancer: a population-based study</article-title>. <source>Cancer epidemiology Biomarkers Prev</source>. (<year>2013</year>) <volume>22</volume>:<page-range>2303&#x2013;11</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1158/1055-9965.EPI-13-0239</pub-id>, PMID: <pub-id pub-id-type="pmid">24127414</pub-id>
</mixed-citation>
</ref>
<ref id="B42">
<label>42</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jin</surname> <given-names>P</given-names></name>
<name><surname>Lu</surname> <given-names>XJ</given-names></name>
<name><surname>Sheng</surname> <given-names>JQ</given-names></name>
<name><surname>Fu</surname> <given-names>L</given-names></name>
<name><surname>Meng</surname> <given-names>XM</given-names></name>
<name><surname>Wang</surname> <given-names>X</given-names></name>
<etal/>
</person-group>. 
<article-title>Estrogen stimulates the expression of mismatch repair gene hmlh1 in colonic epithelial cells</article-title>. <source>Cancer Prev Res</source>. (<year>2010</year>) <volume>3</volume>:<page-range>910&#x2013;6</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1158/1940-6207.CAPR-09-0228</pub-id>, PMID: <pub-id pub-id-type="pmid">20663978</pub-id>
</mixed-citation>
</ref>
<ref id="B43">
<label>43</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sui</surname> <given-names>Q</given-names></name>
<name><surname>Zhang</surname> <given-names>X</given-names></name>
<name><surname>Chen</surname> <given-names>C</given-names></name>
<name><surname>Tang</surname> <given-names>J</given-names></name>
<name><surname>Yu</surname> <given-names>J</given-names></name>
<name><surname>Li</surname> <given-names>W</given-names></name>
<etal/>
</person-group>. 
<article-title>Inflammation promotes resistance to immune checkpoint inhibitors in high microsatellite instability colorectal cancer</article-title>. <source>Nat Commun</source>. (<year>2022</year>) <volume>13</volume>:<fpage>7316</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41467-022-35096-6</pub-id>, PMID: <pub-id pub-id-type="pmid">36443332</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/734918">Gavino Faa</ext-link>, University of Cagliari, Italy</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3122823">Jinghua Zhang</ext-link>, Hohai University, China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3275332">Zhicheng Du</ext-link>, Tsinghua University, China</p></fn>
</fn-group>
</back>
</article>