<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Oncol.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Oncology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Oncol.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2234-943X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fonc.2025.1643852</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Deep learning analysis of MRI to assess rectal cancer treatment</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Selby</surname><given-names>Heather M.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3085532/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Son</surname><given-names>Ashley Y.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Sheth</surname><given-names>Vipul R.</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project-administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Wagner</surname><given-names>Todd H.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1247259/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project-administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Pollom</surname><given-names>Erqi L.</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/794890/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Morris</surname><given-names>Arden M.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3262192/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project-administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Stanford-Surgery Policy Improvement Research and Education Center (S-SPIRE Center), Department of Surgery, Stanford School of Medicine</institution>, <city>Palo Alto</city>, <state>CA</state>,&#xa0;<country country="us">United States</country></aff>
<aff id="aff2"><label>2</label><institution>Department of Radiology, Stanford School of Medicine</institution>, <city>Stanford</city>, <state>CA</state>,&#xa0;<country country="us">United States</country></aff>
<aff id="aff3"><label>3</label><institution>Veterans Affairs Medical Center</institution>, <city>Palo Alto</city>, <state>CA</state>,&#xa0;<country country="us">United States</country></aff>
<aff id="aff4"><label>4</label><institution>Department of Radiation Oncology, Stanford University School of Medicine</institution>, <city>Stanford</city>, <state>CA</state>,&#xa0;<country country="us">United States</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Heather M. Selby, <email xlink:href="mailto:selbyh@stanford.edu">selbyh@stanford.edu</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-09">
<day>09</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>15</volume>
<elocation-id>1643852</elocation-id>
<history>
<date date-type="received">
<day>23</day>
<month>06</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>18</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>14</day>
<month>10</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Selby, Son, Sheth, Wagner, Pollom and Morris.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Selby, Son, Sheth, Wagner, Pollom and Morris</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-09">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Traditional neoadjuvant therapy for locally advanced rectal cancer (LARC) results in pathologic complete response (pCR) in approximately 15% of patients, supporting non-operative strategies for those with clinical complete response (cCR). The subjectivity and variability in MRI-based cCR assessments highlight the need for objective, quantitative tools.</p>
</sec>
<sec>
<title>Objective</title>
<p>To develop deep learning models for automated rectal tumor segmentation on pre- and post-treatment MRIs, and to identify radiomic features differentiating cCR from non-cCR patients.</p>
</sec>
<sec>
<title>Materials and methods</title>
<p>We retrospectively analyzed pre- and post-treatment MRIs from 37 LARC patients enrolled in a Phase 2 TNT trial (NCT04380337). Rectal tumors were segmented on T2-weighted images by two data scientists, refined by a radiologist (reference standard), and independently segmented by a fellow. For pre-treatment segmentation, Model 1 (baseline; <inline-formula>
<mml:math display="inline" id="im1"><mml:mrow><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mn>37</mml:mn></mml:mrow></mml:math></inline-formula>) was trained on reference cases, then used to generate pseudo-labels for 81 additional cases. Model 2 (semi-supervised; <inline-formula>
<mml:math display="inline" id="im2"><mml:mrow><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mn>118</mml:mn></mml:mrow></mml:math></inline-formula>) was trained on the combined dataset. Model 3 (baseline; <inline-formula>
<mml:math display="inline" id="im3"><mml:mrow><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mn>37</mml:mn></mml:mrow></mml:math></inline-formula>) was trained on post-treatment cases. Radiomic features were extracted from post-treatment ADC maps, filtered by reproducibility (ICC <inline-formula>
<mml:math display="inline" id="im4"><mml:mrow><mml:mo>&#x2265;</mml:mo><mml:mn>0.8</mml:mn></mml:mrow></mml:math></inline-formula>) and redundancy (Spearman <inline-formula>
<mml:math display="inline" id="im5"><mml:mrow><mml:mi>&#x3c1;</mml:mi><mml:mo>&#x2264;</mml:mo><mml:mn>0.95</mml:mn></mml:mrow></mml:math></inline-formula>), then analyzed using unsupervised hierarchical clustering.</p>
</sec>
<sec>
<title>Results</title>
<p>For pre-treatment segmentation, radiologist-fellow inter-rater agreement was DSC <inline-formula>
<mml:math display="inline" id="im6"><mml:mrow><mml:mo>=</mml:mo><mml:mn>0.748</mml:mn><mml:mo>&#xb1;</mml:mo><mml:mn>0.092</mml:mn></mml:mrow></mml:math></inline-formula>. Model 1 achieved mean DSC <inline-formula>
<mml:math display="inline" id="im7"><mml:mrow><mml:mo>=</mml:mo><mml:mn>0.682</mml:mn><mml:mo>&#xb1;</mml:mo><mml:mn>0.254</mml:mn></mml:mrow></mml:math></inline-formula> versus the radiologist, significantly lower than inter-rater agreement. Model 2 improved performance to mean DSC <inline-formula>
<mml:math display="inline" id="im8"><mml:mrow><mml:mo>=</mml:mo><mml:mn>0.769</mml:mn><mml:mo>&#xb1;</mml:mo><mml:mn>0.214</mml:mn></mml:mrow></mml:math></inline-formula> (mean gain <inline-formula>
<mml:math display="inline" id="im9"><mml:mrow><mml:mo>=</mml:mo><mml:mn>0.087</mml:mn></mml:mrow></mml:math></inline-formula>; <inline-formula>
<mml:math display="inline" id="im10"><mml:mrow><mml:mn>12.8</mml:mn><mml:mo>%</mml:mo></mml:mrow></mml:math></inline-formula> relative improvement; <inline-formula>
<mml:math display="inline" id="im11"><mml:mrow><mml:mi>p</mml:mi><mml:mo>&lt;</mml:mo><mml:mn>0.001</mml:mn></mml:mrow></mml:math></inline-formula>), slightly outperforming inter-rater agreement. For post-treatment segmentation, inter-rater agreement declined to mean DSC <inline-formula>
<mml:math display="inline" id="im12"><mml:mrow><mml:mo>=</mml:mo><mml:mn>0.362</mml:mn><mml:mo>&#xb1;</mml:mo><mml:mn>0.256</mml:mn></mml:mrow></mml:math></inline-formula>, while Model 3 achieved mean DSC <inline-formula>
<mml:math display="inline" id="im13"><mml:mrow><mml:mo>=</mml:mo><mml:mn>0.175</mml:mn><mml:mo>&#xb1;</mml:mo><mml:mn>0.231</mml:mn></mml:mrow></mml:math></inline-formula> versus the radiologist, reflecting challenges from treatment-induced tissue changes affecting both automated models and human raters. Radiomic clustering revealed two distinct patient groups aligned with cCR and non-cCR status.</p>
</sec>
<sec>
<title>Conclusion</title>
<p>This study demonstrates the feasibility of deep learning-based automated segmentation and radiomic profiling for differentiating treatment response in rectal cancer. Semi-supervised learning with pseudo-labeled data significantly improved segmentation performance, offering a practical approach to overcome limited annotations. Radiomic features warrant validation in larger multi-center studies for clinical translation.</p>
</sec>
</abstract>
<kwd-group>
<kwd>rectal cancer</kwd>
<kwd>MRI</kwd>
<kwd>clinical complete response</kwd>
<kwd>deep learning</kwd>
<kwd>segmentation</kwd>
<kwd>nnU-Net</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement>
</funding-group>
<counts>
<fig-count count="5"/>
<table-count count="3"/>
<equation-count count="2"/>
<ref-count count="19"/>
<page-count count="11"/>
<word-count count="5656"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Gastrointestinal Cancers: Colorectal Cancer</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Traditional treatment for locally advanced rectal cancer (LARC) &#x2013; neoadjuvant chemoradiation, radical surgery, and systemic chemotherapy &#x2013; is effective but often debilitating. Surgical removal of the rectum incurs complication rates as high as 46% (<xref ref-type="bibr" rid="B1">1</xref>). Even without complications, approximately 25% of patients who undergo surgery require a colostomy, while the remaining 75% often struggle with bowel dysfunction, bladder control, and sexual health problems (<xref ref-type="bibr" rid="B2">2</xref>&#x2013;<xref ref-type="bibr" rid="B4">4</xref>). In recent years, total neoadjuvant therapy (TNT) has emerged as a promising alternative, potentially enabling patients to avoid surgery altogether. After initial staging by pre-treatment magnetic resonance imaging (MRI), TNT consists of preoperative administration of radiation and full-dose systemic chemotherapy. Another MRI is then used to evaluate the clinical response to TNT. If the patient achieves a clinical complete response (cCR) &#x2013; no evident tumor on clinical testing &#x2013; they may be eligible for a &#x201c;watch and wait&#x201d; approach with close surveillance rather than surgery.</p>
<p>Due to its critical anatomical detail, MRI has become essential in the new paradigm of rectal cancer treatment and surveillance. In recent studies, up to 30&#x2013;50% of patients who undergo TNT achieve a cCR and can safely avoid surgery (<xref ref-type="bibr" rid="B5">5</xref>&#x2013;<xref ref-type="bibr" rid="B8">8</xref>). Accurate cCR assessment depends on the combined interpretation of MRI, endoscopy, and digital rectal examination (DRE), with MRI evaluation by the radiologist serving as a key determinant. The first step in interpretation involves manual segmentation of the rectal tumor by delineating the MRI of the tumor&#x2019;s 3D volume of interest. Even among expert radiologists, however, manual segmentation can be inaccurate among up to 40% of cases (<xref ref-type="bibr" rid="B9">9</xref>) which can impact patient outcomes. Inter- and intra-rater variability, along with the challenges posed by indistinct tumor boundaries and the complexity of tumor morphology, underscores the need for standardized MRI protocols (<xref ref-type="bibr" rid="B10">10</xref>). An automated segmentation model tailored to rectal tumors on pre- and post-treatment MRI could enhance clinical decision-making, improve efficiency, and support radiologists&#x2019; interpretation, ultimately leading to better patient care in rectal cancer management.</p>
<p>Deep learning methods can automatically learn relevant features from medical images without manual feature engineering. We employed &#x201c;no new U-Net&#x201d; (nnU-Net) (<xref ref-type="bibr" rid="B11">11</xref>), a self-configuring deep learning framework that automatically adapts its architecture, pre-processing, and training procedures to dataset characteristics, eliminating manual hyperparameter tuning while achieving state-of-the-art segmentation performance. These automated segmentations then enable radiomic analysis, which extracts quantitative features capturing tumor intensity, texture, shape, and heterogeneity. The radiomic workflow includes feature extraction (first-order statistics, shape descriptors, and texture features) followed by statistical analysis to identify features associated with clinical outcomes. This approach provides objective, reproducible imaging biomarkers that can predict treatment response and support clinical decision-making.</p>
<p>Our long-term objective is to develop and validate an AI-driven model capable of reliably predicting cCR, thereby accurately identifying rectal cancer patients who can safely forgo surgery. In the current study, we aimed to take a critical step toward this objective by developing automated deep learning models, based on the nnU-Net framework (<xref ref-type="bibr" rid="B11">11</xref>), to segment rectal tumors on pre- and post-treatment MRIs.</p>
<p>Although automated segmentation already has demonstrated effectiveness in cancers such as lung, breast, and prostate, its application to rectal cancer has been limited by the scarcity of large-scale, annotated MRI datasets.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Study cohort</title>
<p>We conducted a secondary analysis of MRI data from patients enrolled in a Phase 2 clinical trial of TNT for rectal cancer (<xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>; NCT04380337), conducted between May 2020 and April 2023 (<xref ref-type="bibr" rid="B12">12</xref>). The study was approved by Stanford&#x2019;s Institutional Review Board, (Protocol #: IRB-62555). This trial assessed the efficacy of combining short-course radiotherapy with chemotherapy (FOLFOXIRI) to enhance cCR rates and facilitate organ preservation.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>MRI timing during the treatment schema for the Phase 2 clinical trial of TNT in patients with LARC (NCT04380337). Created in BioRender. Selby, H. (2025) <ext-link ext-link-type="uri" xlink:href="https://BioRender.com/frgsmrc">https://BioRender.com/frgsmrc</ext-link>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1643852-g001.tif">
<alt-text content-type="machine-generated">Timing of MRI during the treatment schema for locally advanced rectal cancer (LARC). Step 1: Pre-treatment MRI. Step 2: Mid-treatment MRI after short-course radiation and before 8 cycles of FOLFOXIRI. Step 3: 8 weeks post-treatment MRI. Outcomes of treatment: Clinical complete response leads to watch-and-wait approach; non-complete response results in surgery.</alt-text>
</graphic></fig>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Treatment protocol and clinical response</title>
<p>As previously reported (<xref ref-type="bibr" rid="B12">12</xref>), patients received short-course radiation therapy consisting of 25<italic>Gy</italic> in 5 daily fractions with a sequential 5<italic>Gy</italic> boost to gross disease (total 30<italic>Gy</italic> in 6 fractions), delivered via intensity-modulated radiation therapy (IMRT). Following a 2&#x2212;4 week interval, patients received up to 8 cycles of FOLFOXIRI chemotherapy: oxaliplatin 85<italic>mg/m</italic><sup>2</sup>, irinotecan 165<italic>mg/m</italic><sup>2</sup>, leucovorin 200<italic>mg/m</italic><sup>2</sup>, and 5-fluorouracil 3200<italic>mg/m</italic><sup>2</sup> continuous infusion over 48 hours, repeated every 14 days. Clinical response was evaluated 8 &#xb1; 4 weeks after chemotherapy completion using pelvic MRI with tumor regression grading (mrTRG), flexible sigmoidoscopy, and DRE. Clinical complete response (cCR) was defined as absence of residual tumor on endoscopy/DRE and mrTRG 1&#x2212;2 on MRI. Patients achieving cCR were offered organ preservation with intensive surveillance including quarterly endoscopy, semi-annual MRI, and biannual CT imaging.</p>
<p>Among the 37 trial patients, 9 achieved cCR and 28 did not (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table S1</bold></xref>). The median age of the study cohort was 52 years (IQR: 45&#x2212;61). Patients with cCR were significantly older (median 57 years, IQR: 54&#x2212;65) than those without cCR (median 50 years, IQR: 41&#x2212;58). The cCR group also contained more men (78%) than the non-cCR group (61%). Before treatment, 70% of all patients had tumors with poor prognostic features, including 70.3% T3 stage, 22% T4 stage, and only 8% T2 stage. Nodal staging was evenly distributed among patients who did and did not have cCR.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>MRI protocol</title>
<p>MRIs were acquired at three time points: pre-treatment, mid-treatment, and post-treatment. Imaging was performed using a 3T MRI scanner (GE or Siemens) with the following sequences and parameters: 2D Fast Spin Echo T2WI: TR 4000<italic>ms</italic>, TE 100<italic>ms</italic>, 288&#xd7;288 matrix, 3<italic>mm</italic> slice thickness, no inter-slice gap; and DWI: reduced field-of-view, 112&#xd7;64 matrix, 24<italic>cm</italic> field-of-view, 6<italic>mm</italic> slice thickness, acquired at <italic>b</italic>-values of 50 and 800<italic>s/mm</italic><sup>2</sup>. Consistent with best practice, patient preparation included administration of a micro-enema, application of 50&#x2212;150<italic>mL</italic> rectal gel based on tumor location, and intravenous injection of 1<italic>mg</italic> IV glucagon prior to axial T2WI to reduce peristalsis.</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Automated tumor segmentation using deep learning</title>
<p>We used the deep learning-based segmentation framework nnU-Net (<xref ref-type="bibr" rid="B11">11</xref>) (<xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>). Three models were trained and validated using 5-fold cross-validation: Model 1 (baseline model trained on <italic>n</italic> = 37 pre-treatment MRIs), Model 2 (semi-supervised model trained on <italic>n</italic> = 118 pre-treatment images), and Model 3 (baseline model trained on <italic>n</italic> = 37 post-treatment images). All models used magnetic resonance imaging (MRI) sequences including T2-weighted images (T2WIs), synthetic diffusion-weighted images (sDWIs; <italic>b</italic>-value= 1500<italic>s/mm</italic><sup>2</sup>), and apparent diffusion coefficient (ADC) maps, with corresponding manual tumor segmentations delineated by a radiologist and fellow on T2WIs. For pre-treatment MRI segmentation, we trained Model 1 on 37 reference standard cases, then used it to generate pseudo-labels for 81 additional cases. Model 2 was trained on the combined 118 cases (37 reference standard + 81 pseudo-labeled cases). For post-treatment MRI segmentation, we trained Model 3 on 37 reference standard cases.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>T2WIs, DWIs, and ADC maps from a Phase 2 clinical trial (n=37), along with rectal tumor segmentations delineated by a radiologist (blue) were used to train automated segmentation models (nnUNet) using 5-fold cross-validation. Three models were developed: Model 1 (baseline; trained on n=37 pre-treatment images), Model 2 (semi-supervised; trained on n=118 pre-treatment images including 81 pseudo-labeled cases generated by Model 1), and Model 3 (baseline; trained on n=37 post-treatment images). Example segmentation output from Model 1 is shown in orange. Created in BioRender. Selby, H. (2025) <ext-link ext-link-type="uri" xlink:href="https://BioRender.com/frgsmrc">https://BioRender.com/frgsmrc</ext-link>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1643852-g002.tif">
<alt-text content-type="machine-generated">MRI image processing using nnU-Net is depicted, with training data showing stacked T2-weighted, diffusion-weighted, and ADC images marked by a radiologist. A diagram illustrates the nnU-Net architecture, which includes convolutional processes. Testing results show an automated segmentation highlighted in yellow.</alt-text>
</graphic></fig>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>Manual segmentation</title>
<p>Rectal tumors were manually segmented in 3D on pre- and post-treatment axial T2WIs using Slicer 5.8.0.13 (<xref ref-type="bibr" rid="B13">13</xref>). Initial contours were delineated slice-by-slice by two data scientists, which were then reviewed and refined by a U.S. board-certified radiologist to establish the reference standard (<xref ref-type="bibr" rid="B14">14</xref>). Additionally, a radiology fellow independently segmented all tumors to enable inter-rater variability assessment.</p>
</sec>
<sec id="s2_6">
<label>2.6</label>
<title>MRI and segmentation pre-processing</title>
<p>Each DWI was resampled to match the reference T2WI space using B-spline interpolation. This resampling ensured accurate spatial alignment, preserving anatomical details and tumor boundary fidelity, facilitating effective integration of multi-modal imaging data. Segmentation pre-processing included maximum connected volume selection, retaining only the largest contiguous voxel group, thus eliminating smaller, disconnected segments caused by artifacts or stray pixels. Additionally, hole filling was performed to include unsegmented regions completely enclosed within segmented areas, ensuring comprehensive and accurate tumor delineation. Imaging and segmentation pre-processing was done in Python v3.12.1 using SimpleITK v2.4.0.</p>
</sec>
<sec id="s2_7">
<label>2.7</label>
<title>Calculated ADC and sDWIs</title>
<p>ADC maps quantify water diffusion within tissues, derived from signal decay observed in DWIs, and are essential for tumor characterization. ADC maps were calculated using DWIs acquired at <italic>b</italic>-values of 0, 50, and 800<italic>s/mm</italic><sup>2</sup> based on the mono-exponential decay model (General model in <xref ref-type="disp-formula" rid="eq1"><bold>Equation 1</bold></xref>; our parameters applied to the model in <xref ref-type="disp-formula" rid="eq1"><bold>Equation 2</bold></xref>):</p>
<disp-formula id="eq1"><label>(1)</label>
<mml:math display="block" id="M1"><mml:mrow><mml:mi>S</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>b</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo>.</mml:mo><mml:msup><mml:mi>e</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mi>b</mml:mi><mml:mi>A</mml:mi><mml:mi>D</mml:mi><mml:mi>C</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup></mml:mrow></mml:math>
</disp-formula>
<p>where <italic>S</italic>(<italic>b</italic>) is signal intensity at a given <italic>b</italic>-value, <italic>b</italic> is diffusion weighting (<italic>s/mm</italic><sup>2</sup>), and ADC is in <italic>mm<sup>/</sup>s</italic>. sDWIs at high <italic>b</italic>-value (<italic>b</italic> = 1500<italic>s/mm</italic><sup>2</sup>) were generated from the calculated ADC maps to simulate diffusion contrast without direct acquisition:</p>
<disp-formula id="eq2"><label>(2)</label>
<mml:math display="block" id="M2"><mml:mrow><mml:mi>s</mml:mi><mml:mi>D</mml:mi><mml:mi>W</mml:mi><mml:msub><mml:mi>I</mml:mi><mml:mrow><mml:mn>1500</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo>.</mml:mo><mml:msup><mml:mi>e</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mn>1500.</mml:mn><mml:mi>A</mml:mi><mml:mi>D</mml:mi><mml:mi>C</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup></mml:mrow></mml:math>
</disp-formula>
<p>This approach enhances image quality and tumor conspicuity while reducing scan time and patient discomfort. This step was performed in Python v3.12.1 using SimpleITK v2.4.0.</p>
</sec>
<sec id="s2_8">
<label>2.8</label>
<title>Radiomic analysis</title>
<p>The radiomic feature extraction and analysis workflow is summarized in <xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>. Features were extracted from post-treatment ADC maps using rectal tumor segmentations delineated by a radiologist on T2WIs. ADC was chosen for its quantitative nature compared to conventional T2WIs and DWIs. Features included shape (<italic>n</italic> = 16), first-order statistics (<italic>n</italic> = 19), and texture features (<italic>n</italic> = 75), derived from the original images as well as from filtered images using Laplacian of Gaussian (LoG, <italic>&#x3c3;</italic> = 1.0&#x2212;5.0) and wavelet decompositions. To emulate inter-rater variability and assess feature robustness, automated tumor segmentations were dilated and eroded. Robust features were retained based on high intra-class correlations (ICC &#x2265; 0.8), while redundant features showing high Spearman correlation (Spearman <italic>&#x3c1;</italic> &#x2265; 0.95) were discarded. Clinical and imaging features were clustered hierarchically and assessed for correlation with treatment outcomes (cCR versus non-cCR), based on chart review.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Radiomics workflow: 1) Image and Segmentation: Rectal tumors were manually segmented on axial T2WIs by a radiologist. 2) Radiomic Feature Extraction: A total of 1317 radiomic features were extracted from the corresponding quantitative ADC maps using the segmented rectal tumors. These included shape (n=16), first-order statistics (n=19), and texture features (n=75), derived from the original images, as well as from images processed with Laplacian of Gaussian (LoG, <italic>&#x3c3;</italic> = 1&#x2013;5) and wavelet filters. 3) Radiomic Feature Selection: Robust and reproducible features were retained based on intraclass correlation coefficient (ICC &#x2265; 0.8) and low inter-feature correlation (<italic>&#x3c1;</italic> &#x2264; 0.95). 4) Unsupervised Learning: Hierarchical clustering was performed to group patients based on radiomic feature similarity, independent of clinical labels. Created in BioRender. Selby, H. (2025) <ext-link ext-link-type="uri" xlink:href="https://BioRender.com/ofq574j">https://BioRender.com/ofq574j</ext-link>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1643852-g003.tif">
<alt-text content-type="machine-generated">Diagram illustrating the workflow of radiomic analysis. The first panel includes an image with axial T2-weighted imaging and 3D tumor segmentation. The second panel shows radiomic feature extraction with diagrams of shape, first order statistics, and texture. The third panel presents radiomic feature selection, including a bar graph of robust features and a heatmap of redundant correlated features. The fourth panel illustrates unsupervised learning with a hierarchical clustering heatmap.</alt-text>
</graphic></fig>
</sec>
<sec id="s2_9">
<label>2.9</label>
<title>Statistical analysis</title>
<p>Sample size n=37 was calculated for the primary clinical endpoint (cCR rate) (<xref ref-type="bibr" rid="B12">12</xref>). For the current secondary automated segmentation, we enriched the training data to <italic>n</italic> = 118 through semi-supervised learning with pseudo-labeled data for an additional <italic>n</italic> = 81 cases, while maintaining the same <italic>n</italic> = 37 test cases for evaluation. We compared cCR and non-cCR groups using Wilcoxon rank-sum test for continuous variables (age) and chi-square or Fisher&#x2019;s exact test for categorical variables (sex, tumor stage, nodal stage, tumor location, MRI manufacturer, and slice thickness). Dice Similarity Coefficients (DSCs) were calculated to assess pairwise agreement between segmentations from the radiologist, fellow, and three deep learning models: Model 1 (baseline model trained on <italic>n</italic> = 37 pre-treatment images), Model 2 (semi-supervised model trained on <italic>n</italic> = 118 pre-treatment images), and Model 3 (baseline model trained on <italic>n</italic> = 37 post-treatment images). For each comparison, we computed mean DSC &#xb1; standard deviation (SD) along with mean differences and bias-corrected and accelerated (BCa) bootstrapped 95% confidence intervals (CI) using 10,000 resamples. Statistical significance was assessed using both paired <italic>t</italic>-tests (assuming normally distributed differences) and Wilcoxon signed-rank tests (non-parametric alternative). Radiologist-fellow inter-rater agreement served as the reference standard for model performance evaluation. To evaluate the semi-supervised learning approach, we directly compared Model 2 versus Model 1 performance using paired statistical tests on the same <italic>n</italic> = 37 test cases. All statistical tests were 2-sided; <italic>p &lt;</italic> 0.05 was considered statistically significant. Pre-processing, post-processing, calculations and data analysis were performed with Python v3.12.1.</p>
</sec>
<sec id="s2_10">
<label>2.10</label>
<title>Code availability</title>
<p>Code for training and inference, along with model hyper-parameters and weights, was developed
using Python v3.12.1 and PyTorch v2.6. These resources are publicly available on the GitHub repository <uri xlink:href="https://github.com/s-spire-research/Rectal-Tumor-MRI-SEG">https://github.com/s-spire-research/Rectal-Tumor-MRI-SEG.</uri></p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>MRI characteristics</title>
<p>Pre-treatment MRIs were acquired using scanners from GE Medical Systems (65%), Siemens (22%), and Philips (14%) (<xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>). Slice thickness was predominantly standardized at 3.0<italic>mm</italic> (92%), with a small number acquired at 2.5<italic>mm</italic> (5%) or 3.5<italic>mm</italic> (3%). Post-treatment MRIs were almost exclusively obtained using GE Medical Systems scanners, with a single scan performed on a Siemens system. All post-treatment MRIs were acquired with a slice thickness of 3.0<italic>mm</italic>.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>MRI acquisition parameters by clinical response group.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Timepoint</th>
<th valign="middle" align="left">MRI characteristic</th>
<th valign="middle" align="right">non-cCR (<italic>n</italic> = 28)</th>
<th valign="middle" align="right">cCR (<italic>n</italic> = 9)</th>
<th valign="middle" align="right">Total (<italic>n</italic> = 37)</th>
<th valign="middle" align="center"><italic>P</italic>-value</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left" rowspan="8">Pre-Tx</td>
<td valign="middle" align="left">MRI Manufacturer, n (%)</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right"/>
<td valign="middle" align="right"/>
<td valign="middle" align="center">0.62</td>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;GE Medical Systems</td>
<td valign="middle" align="right">19 (68)</td>
<td valign="middle" align="right">5 (56)</td>
<td valign="middle" align="right">24 (65)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;Philips</td>
<td valign="middle" align="right">4 (14)</td>
<td valign="middle" align="right">1 (11)</td>
<td valign="middle" align="right">5 (14)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;Siemens</td>
<td valign="middle" align="right">5 (18)</td>
<td valign="middle" align="right">3 (33)</td>
<td valign="middle" align="right">8 (22)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">Slice Thickness (mm), n (%)</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right"/>
<td valign="middle" align="right"/>
<td valign="middle" align="center">0.59</td>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;2.5</td>
<td valign="middle" align="right">2 (7)</td>
<td valign="middle" align="right">0</td>
<td valign="middle" align="right">2 (5)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;3.0</td>
<td valign="middle" align="right">25 (89)</td>
<td valign="middle" align="right">9 (100)</td>
<td valign="middle" align="right">34 (92)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;3.5</td>
<td valign="middle" align="right">1 (4)</td>
<td valign="middle" align="right">0</td>
<td valign="middle" align="right">1 (3)</td>
<td valign="middle" align="left"/>
</tr>
<tr>
<td valign="middle" align="left" rowspan="5">Pre-Tx</td>
<td valign="middle" align="left">MRI Manufacturer, n (%)</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right"/>
<td valign="middle" align="right"/>
<td valign="middle" align="center">1.00</td>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;GE Medical Systems</td>
<td valign="middle" align="right">27 (96)</td>
<td valign="middle" align="right">9 (100)</td>
<td valign="middle" align="right">36 (97)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;Siemens</td>
<td valign="middle" align="right">1 (4)</td>
<td valign="middle" align="right">0</td>
<td valign="middle" align="right">1 (3)</td>
<td valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">Slice Thickness (mm), n (%)</td>
<td valign="middle" align="right"/>
<td valign="middle" align="right"/>
<td valign="middle" align="right"/>
<td valign="middle" align="center">1.00</td>
</tr>
<tr>
<td valign="middle" align="left">&#x2003;3.0</td>
<td valign="middle" align="right">28 (100)</td>
<td valign="middle" align="right">9 (100)</td>
<td valign="middle" align="right">37 (100)</td>
<td valign="middle" align="center"/>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Pre-Tx, pre-treatment; Post-Tx, post-treatment; cCR, clinical complete response; non-cCR, non-clinical complete response.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Automated segmentation (nnU-Net) performance</title>
<p><xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref> illustrates representative rectal tumor segmentations for a patient who achieved cCR. In panels a and b (pre-treatment), both automated models showed strong agreement with manual segmentations: Model 1 (orange) achieved DSCs of 0.892 and 0.854 compared to the radiologist (blue) and fellow (green), respectively, while Model 2 (purple) achieved DSCs of 0.911 and 0.857. Post-treatment (panels c and d), Model 3 (red) achieved DSCs of 0.452 and 0.339 compared to the radiologist and fellow, respectively. Inter-rater agreement between the radiologist and fellow was DSC = 0.858 pre-treatment and declined to DSC = 0.652 post-treatment. This exemplar case illustrates the inherent difficulty of post-treatment tumor delineation, where residual fibrosis and treatment effects obscure tumor boundaries for human raters.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Pre-treatment <bold>(a, b)</bold> and post-treatment <bold>(c, d)</bold> rectal tumor segmentations for a representative patient who achieved complete clinical response. T2WIs <bold>(a, c)</bold> and corresponding 3D renderings <bold>(b, d)</bold> show manual segmentations by the radiologist (blue) and fellow (green), alongside automated nnU-Net predictions: Model 1 (orange), Model 2 (purple), and Model 3 (red). For this case, pre-treatment DSCs were: Model 1 vs. radiologist = 0.892, vs. fellow = 0.854; Model 2 vs. radiologist = 0.911, vs. fellow = 0.857. Post-treatment DSCs were: Model 3 vs. radiologist = 0.452, vs. fellow = 0.339. Inter-rater agreement was DSC = 0.858 pre-treatment and 0.652 post-treatment. Model 2 demonstrated superior pre-treatment overlap, while post-treatment segmentation (Model 3) remained challenging due to treatment-induced tissue changes. Created in BioRender. Selby, H. (2025) <ext-link ext-link-type="uri" xlink:href="https://BioRender.com/ga308oy">https://BioRender.com/ga308oy</ext-link>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1643852-g004.tif">
<alt-text content-type="machine-generated">MRI images and 3D visualizations of rectal tumor segmentations. Panels a) and c) show MRI cross-sections with colored outlines indicating segmentations by a radiologist, fellow, and models. Panels b) and d) display corresponding 3D models: tumor segmentations use colors blue, green, yellow, purple, and red for radiologist, fellow, Model 1, Model 2, and Model 3, respectively.</alt-text>
</graphic></fig>
<p><xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref> presents quantitative segmentation performance across all 37 patients. For pre-treatment segmentation, radiologist-fellow inter-rater agreement served as the reference standard with a mean DSC of 0.748 &#xb1; 0.092. Model 1 (baseline; <italic>n</italic> = 37) achieved mean DSCs of 0.682 &#xb1; 0.254 when compared to the radiologist and 0.639 &#xb1; 0.249 when compared to the fellow, both significantly lower than the inter-rater benchmark. Model 1 underperformed both raters, with mean differences of &#x2212;0.067 vs. the radiologist (95% BCa CI: &#x2212;0.16 to 0.001; Wilcoxon <italic>p</italic> = 0.79) and &#x2212;0.11 vs. the fellow (95% BCa CI: &#x2212;0.20 to &#x2212;0.047; paired <italic>t</italic>-test <italic>p</italic> = 0.007, Wilcoxon <italic>p</italic> = 0.042). Model 2 (semi-supervised; <italic>n</italic> = 118) demonstrated improved performance with mean DSCs of 0.769 &#xb1; 0.214 compared to the radiologist and 0.687 &#xb1; 0.201 compared to the fellow. Notably, Model 2 slightly outperformed the fellow (mean difference: +0.021, 95% BCa CI: &#x2212;0.073 to 0.076), though statistical significance varied by test (paired <italic>t</italic>-test <italic>p</italic> = 0.58, Wilcoxon <italic>p</italic> = 0.008). These discordant results between paired t-test (<italic>p</italic> = 0.58) and Wilcoxon test (<italic>p</italic> = 0.008) suggest non-normal distribution of differences, with the non-parametric Wilcoxon test considered more reliable. Model 2 showed a mean difference of &#x2212;0.061 (95% BCa CI: &#x2212;0.14 to &#x2212;0.015; paired <italic>t</italic>-test <italic>p</italic> = 0.060, Wilcoxon <italic>p</italic> = 0.29) compared to the fellow.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Dice similarity coefficients comparing radiologist, fellow, and automated models (<italic>n</italic> = 37 test cases).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="left">Timepoint</th>
<th valign="middle" rowspan="2" align="left">Comparison</th>
<th valign="middle" align="center">Mean DSC</th>
<th valign="middle" align="center">Difference</th>
<th valign="middle" align="center"><italic>t</italic>-test</th>
<th valign="middle" align="center">Wilcoxon</th>
</tr>
<tr>
<th valign="middle" align="center">(SD)</th>
<th valign="middle" align="center">[95% BCa CI]</th>
<th valign="middle" align="center"><italic>p</italic>-value<italic><xref ref-type="table-fn" rid="fnT2_1"><sup>a</sup></xref></italic></th>
<th valign="middle" align="center"><italic>p</italic>-value<italic><xref ref-type="table-fn" rid="fnT2_2"><sup>b</sup></xref></italic></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="5" align="left">Pre-Tx</td>
<td valign="middle" align="left">Radiologist vs Fellow<italic><xref ref-type="table-fn" rid="fnT2_3"><sup>c</sup></xref></italic></td>
<td valign="middle" align="center">0.748 (0.092)</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="middle" align="left">Model 1 vs Radiologist</td>
<td valign="middle" align="center">0.682 (0.254)</td>
<td valign="middle" align="center">&#x2212;0.067 [&#x2212;0.16 to 0.001]</td>
<td valign="middle" align="center">0.11</td>
<td valign="middle" align="center">0.79</td>
</tr>
<tr>
<td valign="middle" align="left">Model 1 vs Fellow</td>
<td valign="middle" align="center">0.639 (0.249)</td>
<td valign="middle" align="center">&#x2212;0.11 [&#x2212;0.20 to &#x2212;0.047]</td>
<td valign="middle" align="center">0.007</td>
<td valign="middle" align="center">0.042</td>
</tr>
<tr>
<td valign="middle" align="left">Model 2 vs Radiologist</td>
<td valign="middle" align="center">0.769 (0.214)</td>
<td valign="middle" align="center">+0.021 [&#x2212;0.073 to 0.076]</td>
<td valign="middle" align="center">0.58</td>
<td valign="middle" align="center"><bold>0</bold>.<bold>008</bold></td>
</tr>
<tr>
<td valign="middle" align="left">Model 2 vs Fellow</td>
<td valign="middle" align="center">0.687 (0.201)</td>
<td valign="middle" align="center">&#x2212;0.061 [&#x2212;0.14 to &#x2212;0.015]</td>
<td valign="middle" align="center">0.060</td>
<td valign="middle" align="center">0.29</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="left">Post-Tx</td>
<td valign="middle" align="left">Radiologist vs Fellow<italic><xref ref-type="table-fn" rid="fnT2_4"><sup>d</sup></xref></italic></td>
<td valign="middle" align="center">0.362 (0.256)</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="middle" align="left">Model 3 vs Radiologist</td>
<td valign="middle" align="center">0.175 (0.231)</td>
<td valign="middle" align="center">&#x2212;0.187 [&#x2212;0.283 to &#x2212;0.079]</td>
<td valign="middle" align="center"><italic>&lt;</italic> 0.001</td>
<td valign="middle" align="center"><italic>&lt;</italic> 0.001</td>
</tr>
<tr>
<td valign="middle" align="left">Model 3 vs Fellow</td>
<td valign="middle" align="center">0.125 (0.199)</td>
<td valign="middle" align="center">&#x2212;0.237 [&#x2212;0.322 to &#x2212;0.159]</td>
<td valign="middle" align="center"><italic>&lt;</italic> 0.001</td>
<td valign="middle" align="center"><italic>&lt;</italic> 0.001</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Pre-Tx, pre-treatment; Post-Tx, post-treatment; DSC, Dice similarity coefficient; SD, standard deviation; CI, confidence interval; BCa, bias-corrected and accelerated bootstrap with 10,000 resamples. Model 1 = baseline model trained on pre-treatment images (<italic>n</italic> = 37); Model 2 = semi-supervised model trained on pre-treatment images (<italic>n</italic> = 118); Model 3 = baseline model trained on post-treatment images (<italic>n</italic> = 37). For model comparisons, mean difference calculated as Model DSC &#x2212; Radiologist/Fellow DSC; positive values indicate model outperformed the reference rater, negative values indicate model underperformed.</p></fn>
<fn>
<p>Significant values indicating improved model performance (<italic>p &lt;</italic> 0.05) are bolded.</p></fn>
<fn id="fnT2_1"><label>a</label>
<p>Paired <italic>t</italic>-test assumes normally distributed differences.</p></fn>
<fn id="fnT2_2"><label>b</label>
<p>Wilcoxon signed-rank test is non-parametric (no normality assumption).</p></fn>
<fn id="fnT2_3"><label>c</label>
<p>Inter-rater agreement (reference standard) pre-treatment; DSC reported without statistical comparison.</p></fn>
<fn id="fnT2_4"><label>d</label>
<p>Inter-rater agreement (reference standard) post-treatment; DSC reported without statistical comparison.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>Post-treatment segmentation proved more challenging for both Model 3 and expert raters. Model 3 (baseline; <italic>n</italic> = 37) achieved mean DSCs of 0.175 &#xb1; 0.231 versus the radiologist and 0.125 &#xb1; 0.199 versus the fellow, both significantly lower than post-treatment inter-rater agreement (mean differences: &#x2212;0.187, 95% BCa CI: &#x2212;0.283 to &#x2212;0.079; and &#x2212;0.237, 95% BCa CI: &#x2212;0.322 to &#x2212;0.159; both <italic>p &lt;</italic> 0.001 for paired <italic>t</italic>-test and Wilcoxon test). Radiologist-fellow inter-rater agreement also declined from 0.748 &#xb1; 0.092 pre-treatment to 0.362 &#xb1; 0.256 post-treatment, underscoring again the inherent difficulty of post-treatment tumor delineation due to treatment-induced changes that challenged both human raters and automated models.</p>
<p><xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref> shows the direct comparison between Model 1 (baseline; <italic>n</italic> = 37) and Model 2 (semi-supervised; <italic>n</italic> = 118). Model 2 demonstrated an absolute mean gain of 0.087 (95% BCa CI: 0.052 to 0.132; 12.8% relative improvement; paired <italic>t</italic>-test <italic>p &lt;</italic> 0.001, Wilcoxon <italic>p &lt;</italic> 0.001) in agreement with the radiologist, improving in 36 of 37 cases (97.3%). The improvement in agreement with the fellow was more modest (mean gain: 0.049, 95% BCa CI: 0.019 to 0.085; 7.6% relative improvement; paired <italic>t</italic>-test <italic>p &lt;</italic> 0.01, Wilcoxon <italic>p &lt;</italic> 0.05), with 22 of 37 cases (59.5%) showing improvement and 15 (40.5%) showing decreased agreement, demonstrating that Model 2 primarily enhanced consistency with the radiologist&#x2019;s segmentation approach.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Direct comparison of model 2 versus model 1 performance (<italic>n</italic> = 37 cases).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Reference</th>
<th valign="middle" align="center">Mean gain</th>
<th valign="middle" align="center">95% BCa CI</th>
<th valign="middle" align="center">Paired <italic>t</italic>-test</th>
<th valign="middle" align="center">Wilcoxon</th>
<th valign="middle" align="center">Improved</th>
</tr>
<tr>
<th valign="middle" align="left">rater</th>
<th valign="middle" align="center">(Relative)</th>
<th valign="middle" align="center">[Difference]</th>
<th valign="middle" align="center"><italic>p</italic>-value</th>
<th valign="middle" align="center"><italic>p</italic>-value</th>
<th valign="middle" align="center">cases</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Radiologist</td>
<td valign="middle" align="center">0.087 (12.8%)</td>
<td valign="middle" align="center">[0.052 to 0.132]</td>
<td valign="middle" align="center"><italic>&lt;</italic><bold>0</bold>.<bold>001</bold></td>
<td valign="middle" align="center"><italic>&lt;</italic><bold>0</bold>.<bold>001</bold></td>
<td valign="middle" align="center">36<italic>/</italic>37 (97.3%)</td>
</tr>
<tr>
<td valign="middle" align="left">Fellow</td>
<td valign="middle" align="center">0.049 (7.6%)</td>
<td valign="middle" align="center">[0.019 to 0.085]</td>
<td valign="middle" align="center"><italic>&lt;</italic><bold>0</bold>.<bold>01</bold></td>
<td valign="middle" align="center"><italic>&lt;</italic><bold>0</bold>.<bold>05</bold></td>
<td valign="middle" align="center">22<italic>/</italic>37 (59.5%)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>BCa. bias-corrected and accelerated bootstrap with 10,000 resamples. Mean gain calculated as Model 2 DSC &#x2212; Model 1 DSC. Model 1 = baseline model trained on <italic>n</italic> = 37 images; Model 2 = semi-supervised model trained on <italic>n</italic> = 118 images. Both models evaluated on the same 37 cases. Significant values (<italic>p &lt;</italic> 0.05) are bolded.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Radiomics analysis</title>
<p>A total of 1317 radiomic features were extracted from ADC maps and the rectal tumor segmentations delineated by the radiologist on T2WIs. We identified 416 highly reliable radiomic features (ICC &#x2265; 0.8) with minimal redundancy (Spearman <italic>&#x3c1;</italic> &#x2264; 0.95), indicating robust reproducibility and relevance for clinical interpretations. We performed hierarchical clustering using these 416 robust and reproducible radiomic features, after applying z-score standardization to normalize feature scales across patients.</p>
<p><xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref> presents a hierarchical clustering heatmap of radiomic features extracted from quantitative post-treatment ADC maps using radiologist-delineated rectal tumor segmentations. Each row represents a radiomic feature, color-coded by category (blue: shape, orange: first-order, red: texture), and each column corresponds to an individual patient. Clustering was performed based solely on radiomic feature similarity, without knowledge of clinical outcomes. Despite this, the resulting clusters show clear alignment with clinical response: patients who achieved a cCR are highlighted in green, while those without cCR are shown in red. Two distinct patient clusters emerged, suggesting strong intrinsic differences in imaging phenotypes between response groups.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Hierarchical clustering heatmap of radiomic features extracted from rectal tumor segmentations on ADC images. Rows represent radiomic features&#x2014;shape, first-order, and texture&#x2014;color-coded at left (blue: shape; orange: first-order; red: texture). Columns represent individual patients, with cCR status indicated (green: cCR; red: non-cCR). Clustering was performed after z-score standardization and selection of 416 highly reliable radiomic features (ICC &#x2265; 0.8) with minimal redundancy (pairwise correlation &#x2264; 0.9), indicating robust reproducibility and clinical relevance. Heatmap colors reflect z-scored feature values (red = high, blue = low). Notable cases: SFX 002 had tumor regrowth, and SFX 011 and SFX 029 had a pathologic complete response following abdominoperineal resection (APR).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-15-1643852-g005.tif">
<alt-text content-type="machine-generated">Clustered heatmap displaying hierarchical clustering of data across samples and features. Rows represent features divided by categories: Shape (yellow), First Order (orange), Texture (purple), and columns for samples marked as cCR (green) or non-cCR (red). The color gradient from blue to red indicates data variability, with a dendrogram showing relationships.</alt-text>
</graphic></fig>
<p>We also noted the following 3 outliers based on chart review after post-treatment MRI: SFX 002, who experienced tumor regrowth; SFX 011 and SFX 029, who had a pathologic complete response following abdominoperineal resection (APR). These examples further support the concordance between radiomic clustering and meaningful clinical outcomes.</p>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>In this secondary analysis of data from a Phase 2 clinical trial of TNT for rectal cancer, we developed three nnU-Net models to automate rectal tumor segmentation on pre- and post-treatment MRIs. Model 1 (baseline; <italic>n</italic> = 37) achieved lower agreement with both expert raters than the raters achieved with each other, demonstrating the challenge of training deep learning models with limited annotated data. Model 2 (semi-supervised; <italic>n</italic> = 118) substantially improved performance, achieving agreement levels that matched or slightly exceeded radiologist-fellow inter-rater agreement. The improvement was greater in agreement with the radiologist (97.3% of cases) than the fellow (59.5%), as Model 1, trained on the radiologist&#x2019;s segmentations, generated the pseudo-labels for the additional 81 cases used in Model 2. This reinforced the radiologist&#x2019;s segmentation approach, demonstrating that pseudo-labeled data effectively captures and propagates the expert segmentation patterns from which they originate, offering a practical solution to limited annotations in medical imaging.</p>
<p>Post-treatment segmentation (Model 3) proved more challenging due to treatment-induced changes such as fibrosis, edema, and scar tissue. Model 3 achieved lower agreement with each rater than the raters achieved with each other, though radiologist-fellow inter-rater agreement itself declined from pre-treatment levels, with notably increased variability. This parallel decline in both human and automated performance highlights that this is not solely a model limitation but a fundamental imaging challenge affecting both automated algorithms and human raters. The decline underscores the biological complexity of treatment response assessment, where tumor boundaries become indistinct due to treatment-induced tissue changes such as fibrosis, edema, and tissue remodeling. Despite these post-treatment segmentation challenges, radiomic features extracted from the radiologist&#x2019;s segmentations still showed distinct imaging profiles associated with cCR versus non-cCR outcomes, demonstrating potential as predictive biomarkers for treatment response and suggesting that quantitative analysis may capture meaningful signal even when visual delineation is difficult.</p>
<p>Leveraging deep learning-based automated segmentation models further facilitates the extraction of robust radiomic features from MRI data, enhancing their applicability as predictive biomarkers. While the use of radiomics is well-established in breast, lung, and prostate cancers (<xref ref-type="bibr" rid="B15">15</xref>&#x2013;<xref ref-type="bibr" rid="B19">19</xref>), MRI-based radiomics in rectal cancer remains relatively novel. Our study demonstrates the utility and feasibility of MRI-based radiomics in rectal cancer, underscoring its potential to standardize MRI interpretation, reduce subjectivity among radiologists, and refine criteria for assessing treatment response. In particular, radiomic features derived from post-treatment segmentations and ADC maps effectively differentiated between cCR and non-cCR patients, supporting clinical decision-making and personalized management strategies. Continued radiomic analyses, especially those incorporating pre- and post-treatment segmentation, are essential for refining predictive accuracy and optimizing patient selection for non-operative management strategies as TNT becomes more widely adopted in rectal cancer management.</p>
<p>In addition to its potential utility in clinical care, automated segmentation models could improve the efficiency of radiomic studies with further refinement, as these typically depend on manual segmentation by multiple radiologists. Leveraging deep learning-based approaches can reduce the manual annotation workload, enabling scalable analysis of larger datasets and multi-institutional studies. However, due to suboptimal performance in post-treatment rectal tumor segmentation (Model 3), continued radiologist oversight remains essential, particularly in challenging post-treatment scenarios. Future efforts should focus specifically on improving post-treatment segmentation through larger datasets, potentially employing similar semi-supervised learning strategies that proved successful for pre-treatment segmentation, and developing specialized architectures optimized for post-treatment anatomy.</p>
<p>This study has several limitations. First, the small sample size of reference standard annotations limits generalizability, though we mitigated this through semi-supervised learning with additional pseudo-labeled cases (Model 2) for pre-treatment segmentation. While our study cohort enabled initial model development and identified promising radiomic signatures, validation in larger, multi-institutional datasets is essential before clinical implementation. Second, post-treatment segmentation performance declined, reflecting the inherent complexity of post-treatment anatomy where tumors are obscured by radiation-induced fibrosis, edema, and scar tissue. Importantly, inter-rater agreement also declined post-treatment, highlighting this is not solely a model limitation but a fundamental imaging challenge. Future work should prioritize post-treatment segmentation accuracy through: (1) applying semi-supervised learning approaches similar to those successful for pre-treatment segmentation, (2) training on larger post-treatment datasets, (3) incorporating longitudinal imaging to track changes, and (4) developing specialized architectures for post-treatment anatomy. Until these improvements are achieved, radiologist oversight remains essential for post-treatment assessments.</p>
<p>Third, nnU-Net models were trained on segmentations delineated by a radiologist. While we used morphological operations (erosion/dilation) to simulate inter-rater variability for robustness testing, we acknowledge this may not capture the full spectrum of clinical variability. The pseudo-labels generated by Model 1 inherently contain errors from the baseline model, yet training on this noisy data (Model 2) still improved performance, suggesting semi-supervised learning is robust to label imperfections. Model 2 demonstrated greater improvement in agreement with the radiologist compared to the fellow, suggesting the model may have learned the radiologist&#x2019;s specific segmentation approach rather than achieving universal improvement across all segmentation styles. This highlights that semi-supervised learning performance may be influenced by the characteristics of the initial training data and the source of pseudo-labels. Future studies should incorporate annotations from multiple radiologists to better represent clinical variability and establish more robust reference standard segmentations. Fourth, although rectal tumor segmentations were performed on T2WIs, nnU-Net training was conducted on sDWIs and ADC maps, while radiomic feature extraction was performed solely on ADC maps without prior image registration. This spatial mismatch between the segmentation masks and the underlying DWI and ADC map data may introduce inaccuracies and affect both model performance and feature reliability. We observed that conventional rigid registration was insufficient to align T2WIs, ADC maps, and sDWIs due to anatomical and acquisition differences, particularly in the pelvis.</p>
<p>Finally, although we incorporated T2WIs, ADC maps, and high <italic>b</italic>-value sDWIs into the nnU-Net framework as a three-channel input, the model treats all three modalities equally. This may not reflect the varying diagnostic value of each sequence, and future work could explore modality-specific weighting or attention mechanisms to optimize multi-modal integration. As a single-center secondary analysis, our findings require external validation. The specific patient population (primarily with high-risk features), treatment protocol (dose-escalated SCRT with FOLFOXIRI), and imaging protocols may not generalize to other centers or treatment approaches. Multi-institutional validation with diverse protocols and patient populations is essential before clinical translation. Despite these limitations, our study provides important proof-of-concept that automated segmentation with semi-supervised learning and radiomics can differentiate treatment response in rectal cancer, warranting larger validation studies.</p>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusion</title>
<p>We have demonstrated the feasibility and potential of integrating deep learning-based automated segmentation and ADC-based radiomics to enhance evaluation of rectal cancer treatment response. By employing a semi-supervised learning approach, we successfully addressed the challenge of limited training data &#x2013; a pervasive barrier in medical imaging AI. Model 2 (semi-supervised) improved performance compared to Model 1 (baseline trained on limited data), achieving agreement levels that approached or slightly exceeded inter-rater variability. This finding offers a practical and scalable strategy for developing robust deep learning models in annotated data-scarce clinical scenarios. While challenges remain in post-treatment segmentation, where both automated models and human raters face difficulties due to treatment-induced tissue changes, our results establish a foundation for future improvements. Ongoing refinements through semi-supervised learning with larger datasets, multi-radiologist validation, specialized post-treatment architectures, and standardized imaging protocols hold promise for enhancing automated segmentation accuracy. Such advancements have the potential to improve personalized clinical decision-making, enhance predictive accuracy, and optimize patient management strategies, ultimately facilitating safe and effective non-operative approaches in rectal cancer management.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The MRI data utilized in this study contains Protected Health Information (PHI) and is subject to HIPAA regulations, which prohibit public sharing to safeguard patient privacy. De-identified data may be made available upon reasonable request, subject to approval by the appropriate IRB or ethics committee, in accordance with HIPAA guidelines. Requests to access these datasets should be directed to <email xlink:href="mailto:selbyh@stanford.edu">selbyh@stanford.edu</email>.</p></sec>
<sec id="s7" sec-type="ethics-statement">
<title>Ethics statement</title>
<p>The studies involving humans were approved by Stanford University Institutional Review Board (IRB), Research Compliance Office, Stanford University, Stanford, California, USA. The studies were conducted in accordance with the local legislation and institutional requirements. The participants provided their written informed consent to participate in this study.</p></sec>
<sec id="s8" sec-type="author-contributions">
<title>Author contributions</title>
<p>HS: Formal Analysis, Visualization, Investigation, Data curation, Resources, Validation, Software, Methodology, Writing &#x2013; review &amp; editing, Writing &#x2013; original draft, Conceptualization. AS: Software, Data curation, Writing &#x2013; review &amp; editing, Methodology, Writing &#x2013; original draft. VS: Visualization, Writing &#x2013; original draft, Resources, Data curation, Validation, Methodology, Investigation, Conceptualization, Project administration, Supervision. TW: Methodology, Resources, Writing &#x2013; original draft, Investigation, Funding acquisition, Project administration, Conceptualization, Writing &#x2013; review &amp; editing, Supervision. EP: Resources, Writing &#x2013; review &amp; editing. AM: Funding acquisition, Writing &#x2013; review &amp; editing, Project administration, Writing &#x2013; original draft, Resources, Methodology, Conceptualization, Investigation, Supervision.</p></sec>
<ack>
<title>Acknowledgments</title>
<p>We want to acknowledge Dr. Muhammed Umer Nisar, a radiology fellow at Stanford, for his delineation of rectal tumors on pre- and post-treatment T2WIs, which was important to this study.</p>
</ack>
<sec id="s10" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s11" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s12" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<sec id="s13" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fonc.2025.1643852/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fonc.2025.1643852/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="DataSheet1.pdf" id="SM1" mimetype="application/pdf"><label>Supplementary Table&#xa0;1</label>
<caption>
<p>Patient characteristics from SFX trial (<xref ref-type="bibr" rid="B12">12</xref>). IQR, Interquartile Range; cCR, clinical complete response; non-cCR, non-clinical complete response; Pre-tx, pre-treatment; Post-tx, post-treatment. Significant values (<italic>p &lt;</italic> 0.05) are bolded.</p>
</caption></supplementary-material></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Gamboa</surname> <given-names>AC</given-names></name>
<name><surname>Lee</surname> <given-names>RM</given-names></name>
<name><surname>Turgeon</surname> <given-names>MK</given-names></name>
<name><surname>Varlamos</surname> <given-names>C</given-names></name>
<name><surname>Regenbogen</surname> <given-names>SE</given-names></name>
<name><surname>Hrebinko</surname> <given-names>KA</given-names></name>
<etal/>
</person-group>. 
<article-title>Impact of postoperative complications on oncologic outcomes after rectal cancer surgery: an analysis of the US rectal cancer consortium</article-title>. <source>Ann Surg Oncol</source>. (<year>2021</year>) <volume>28</volume>:<page-range>1712&#x2013;21</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1245/s10434-020-08976-8</pub-id>, PMID: <pub-id pub-id-type="pmid">32968958</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<label>2</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Kim</surname> <given-names>S</given-names></name>
<name><surname>Kim</surname> <given-names>MH</given-names></name>
<name><surname>Oh</surname> <given-names>JH</given-names></name>
<name><surname>Jeong</surname> <given-names>S</given-names></name>
<name><surname>Park</surname> <given-names>KJ</given-names></name>
<name><surname>Oh</surname> <given-names>H</given-names></name>
<etal/>
</person-group>. 
<article-title>Predictors of permanent stoma creation in patients with mid or low rectal cancer: results of a multicentre cohort study with preoperative evaluation of anal function</article-title>. <source>Colorectal Dis</source>. (<year>2020</year>) <volume>22</volume>:<fpage>399</fpage>&#x2013;<lpage>407</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/codi.14898</pub-id>, PMID: <pub-id pub-id-type="pmid">31698537</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<label>3</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Robitaille</surname> <given-names>S</given-names></name>
<name><surname>Maalouf</surname> <given-names>MF</given-names></name>
<name><surname>Penta</surname> <given-names>R</given-names></name>
<name><surname>Joshua</surname> <given-names>TG</given-names></name>
<name><surname>Liberman</surname> <given-names>AS</given-names></name>
<name><surname>Fiore</surname> <given-names>JF</given-names></name>
<etal/>
</person-group>. 
<article-title>The impact of restorative proctectomy versus permanent colostomy on health-related quality of life after rectal cancer surgery using the patient-generated index</article-title>. <source>Surgery</source>. (<year>2023</year>) <volume>174</volume>:<page-range>813&#x2013;8</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.surg.2023.06.033</pub-id>, PMID: <pub-id pub-id-type="pmid">37495462</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<label>4</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Rivard</surname> <given-names>SJ</given-names></name>
<name><surname>Vitous</surname> <given-names>CA</given-names></name>
<name><surname>Bamdad</surname> <given-names>MC</given-names></name>
<name><surname>Lussiez</surname> <given-names>A</given-names></name>
<name><surname>Anderson</surname> <given-names>MS</given-names></name>
<name><surname>Varlamos</surname> <given-names>C</given-names></name>
<etal/>
</person-group>. 
<article-title>I Wish There had been Resources&#x201d;: A Photo-Elicitation Study of Rectal Cancer Survivorship Care Needs</article-title>. <source>Ann Surg Oncol</source>. (<year>2023</year>) <volume>30</volume>:<page-range>3530&#x2013;7</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1245/s10434-022-13042-6</pub-id>, PMID: <pub-id pub-id-type="pmid">36847958</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<label>5</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Langenfeld</surname> <given-names>SJ</given-names></name>
<name><surname>Davis</surname> <given-names>BR</given-names></name>
<name><surname>Vogel</surname> <given-names>JD</given-names></name>
<name><surname>Davids</surname> <given-names>JS</given-names></name>
<name><surname>Temple</surname> <given-names>LKF</given-names></name>
<name><surname>Cologne</surname> <given-names>KG</given-names></name>
<etal/>
</person-group>. 
<article-title>The American society of colon and rectal surgeons clinical practice guidelines for the management of rectal cancer 2023 supplement</article-title>. <source>Dis Colon Rectum</source>. (<year>2024</year>) <volume>67</volume>:<fpage>18</fpage>&#x2013;<lpage>31</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1097/DCR.0000000000003057</pub-id>, PMID: <pub-id pub-id-type="pmid">37647138</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<label>6</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Rettig</surname> <given-names>RL</given-names></name>
<name><surname>Beard</surname> <given-names>BW</given-names></name>
<name><surname>Ryoo</surname> <given-names>JJ</given-names></name>
<name><surname>Kulkarni</surname> <given-names>S</given-names></name>
<name><surname>Gulati</surname> <given-names>M</given-names></name>
<name><surname>Tam</surname> <given-names>M</given-names></name>
<etal/>
</person-group>. 
<article-title>Total neoadjuvant therapy significantly increases complete clinical response</article-title>. <source>Dis Colon Rectum</source>. (<year>2023</year>) <volume>66</volume>:<page-range>374&#x2013;82</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1097/DCR.0000000000002290</pub-id>, PMID: <pub-id pub-id-type="pmid">35239525</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<label>7</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Erozkan</surname> <given-names>K</given-names></name>
<name><surname>Elamin</surname> <given-names>D</given-names></name>
<name><surname>Tasci</surname> <given-names>ME</given-names></name>
<name><surname>Liska</surname> <given-names>D</given-names></name>
<name><surname>Valente</surname> <given-names>MA</given-names></name>
<name><surname>Alipouriani</surname> <given-names>A</given-names></name>
<etal/>
</person-group>. 
<article-title>Evaluating complete response rates and predictors in total neoadjuvant therapy for rectal cancer</article-title>. <source>J Gastrointest Surg: Off J Soc Surg Aliment Tract</source>. (<year>2024</year>) <volume>28</volume>:<page-range>1605&#x2013;12</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.gassur.2024.07.015</pub-id>, PMID: <pub-id pub-id-type="pmid">39067745</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<label>8</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Asare</surname> <given-names>E</given-names></name>
<name><surname>Venner</surname> <given-names>E</given-names></name>
<name><surname>Batchelor</surname> <given-names>H</given-names></name>
<name><surname>Sanders</surname> <given-names>J</given-names></name>
<name><surname>Kunk</surname> <given-names>P</given-names></name>
<name><surname>Hedrick</surname> <given-names>T</given-names></name>
<etal/>
</person-group>. 
<article-title>Outcomes associated with total neoadjuvant therapy with non-operative intent for rectal adenocarcinoma</article-title>. <source>Front Oncol</source>. (<year>2024</year>) <volume>14</volume>:<elocation-id>1374360</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fonc.2024.1374360</pub-id>, PMID: <pub-id pub-id-type="pmid">39156701</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<label>9</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Siddiqui</surname> <given-names>MRS</given-names></name>
<name><surname>Bhoday</surname> <given-names>J</given-names></name>
<name><surname>Battersby</surname> <given-names>NJ</given-names></name>
<name><surname>Chand</surname> <given-names>M</given-names></name>
<name><surname>West</surname> <given-names>NP</given-names></name>
<name><surname>Abulafi</surname> <given-names>A-M</given-names></name>
<etal/>
</person-group>. 
<article-title>Defining response to radiotherapy in rectal cancer using magnetic resonance imaging and histopathological scales</article-title>. <source>World J Gastroenterol</source>. (<year>2016</year>) <volume>22</volume>:<page-range>8414&#x2013;34</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.3748/wjg.v22.i37.8414</pub-id>, PMID: <pub-id pub-id-type="pmid">27729748</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<label>10</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Delli Pizzi</surname> <given-names>A</given-names></name>
<name><surname>Basilico</surname> <given-names>R</given-names></name>
<name><surname>Cianci</surname> <given-names>R</given-names></name>
<name><surname>Seccia</surname> <given-names>B</given-names></name>
<name><surname>Timpani</surname> <given-names>M</given-names></name>
<name><surname>Tavoletta</surname> <given-names>A</given-names></name>
<etal/>
</person-group>. 
<article-title>Rectal cancer MRI: protocols, signs and future perspectives radiologists should consider in everyday clinical practice</article-title>. <source>Insights into Imaging</source>. (<year>2018</year>) <volume>9</volume>:<page-range>405&#x2013;12</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s13244-018-0606-5</pub-id>, PMID: <pub-id pub-id-type="pmid">29675627</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<label>11</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Isensee</surname> <given-names>F</given-names></name>
<name><surname>Jaeger</surname> <given-names>PF</given-names></name>
<name><surname>Kohl</surname> <given-names>SAA</given-names></name>
<name><surname>Petersen</surname> <given-names>J</given-names></name>
<name><surname>Maier-Hein</surname> <given-names>KH</given-names></name>
</person-group>. 
<article-title>nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation</article-title>. <source>Nat Methods</source>. (<year>2021</year>) <volume>18</volume>:<page-range>203&#x2013;11</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41592-020-01008-z</pub-id>, PMID: <pub-id pub-id-type="pmid">33288961</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<label>12</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Klebaner</surname> <given-names>D</given-names></name>
<name><surname>Brown</surname> <given-names>E</given-names></name>
<name><surname>Fisher</surname> <given-names>GA</given-names></name>
<name><surname>Shelton</surname> <given-names>A</given-names></name>
<name><surname>Johnson</surname> <given-names>TP</given-names></name>
<name><surname>Shaheen</surname> <given-names>S</given-names></name>
<etal/>
</person-group>. 
<article-title>Phase II trial of organ preservation program using short-course radiation and FOLFOXIRI for rectal cancer (SHORT-FOX): Two-Year primary outcome analysis</article-title>. <source>Radiother Oncol</source>. (<year>2025</year>) <volume>207</volume>:<elocation-id>110884</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.radonc.2025.110884</pub-id>, PMID: <pub-id pub-id-type="pmid">40209856</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<label>13</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Fedorov</surname> <given-names>A</given-names></name>
<name><surname>Beichel</surname> <given-names>R</given-names></name>
<name><surname>Kalpathy-Cramer</surname> <given-names>J</given-names></name>
<name><surname>Finet</surname> <given-names>J</given-names></name>
<name><surname>Fillion-Robin</surname> <given-names>J-C</given-names></name>
<name><surname>Pujol</surname> <given-names>S</given-names></name>
<etal/>
</person-group>. 
<article-title>3D Slicer as an image computing platform for the Quantitative Imaging Network</article-title>. <source>Magnetic Resonance Imaging</source>. (<year>2012</year>) <volume>30</volume>:<page-range>1323&#x2013;41</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.mri.2012.05.001</pub-id>, PMID: <pub-id pub-id-type="pmid">22770690</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<label>14</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Selby</surname> <given-names>HM</given-names></name>
<name><surname>Son</surname> <given-names>YA</given-names></name>
<name><surname>Sheth</surname> <given-names>VR</given-names></name>
<name><surname>Wagner</surname> <given-names>TH</given-names></name>
<name><surname>Pollom</surname> <given-names>EL</given-names></name>
<name><surname>Morris</surname> <given-names>AM</given-names></name>
</person-group>. 
<article-title>AI-ready rectal cancer MR imaging: a workflow for tumor detection and segmentation</article-title>. <source>BMC Med Imaging</source>. (<year>2025</year>) <volume>25</volume>:<elocation-id>88</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12880-025-01614-3</pub-id>, PMID: <pub-id pub-id-type="pmid">40087634</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<label>15</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>S</given-names></name>
<name><surname>Wei</surname> <given-names>Y</given-names></name>
<name><surname>Li</surname> <given-names>Z</given-names></name>
<name><surname>Xu</surname> <given-names>J</given-names></name>
<name><surname>Zhou</surname> <given-names>Y</given-names></name>
</person-group>. 
<article-title>Development and validation of an MRI radiomics-based signature to predict histological grade in patients with invasive breast cancer</article-title>. <source>Breast Cancer (Dove Med Press)</source>. (<year>2022</year>) <volume>14</volume>:<page-range>335&#x2013;42</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.2147/BCTT.S380651</pub-id>, PMID: <pub-id pub-id-type="pmid">36262333</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<label>16</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>You</surname> <given-names>C</given-names></name>
<name><surname>Su</surname> <given-names>G-H</given-names></name>
<name><surname>Zhang</surname> <given-names>X</given-names></name>
<name><surname>Xiao</surname> <given-names>Y</given-names></name>
<name><surname>Zheng</surname> <given-names>R-C</given-names></name>
<name><surname>Sun</surname> <given-names>S-Y</given-names></name>
<etal/>
</person-group>. 
<article-title>Multicenter radio multiomic analysis for predicting breast cancer outcome and unravelling imaging-biological connection</article-title>. <source>NPJ Precis Oncol</source>. (<year>2024</year>) <volume>8</volume>:<fpage>193</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41698-024-00666-y</pub-id>, PMID: <pub-id pub-id-type="pmid">39244594</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<label>17</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Selby</surname> <given-names>HM</given-names></name>
<name><surname>Mukherjee</surname> <given-names>P</given-names></name>
<name><surname>Parham</surname> <given-names>C</given-names></name>
<name><surname>Malik</surname> <given-names>SB</given-names></name>
<name><surname>Gevaert</surname> <given-names>O</given-names></name>
<name><surname>Napel</surname> <given-names>S</given-names></name>
<etal/>
</person-group>. 
<article-title>Performance of alternative manual and automated deep learning segmentation techniques for the prediction of benign and Malignant lung nodules</article-title>. <source>J Med Imaging</source>. (<year>2023</year>) <volume>10</volume>:<page-range>1&#x2013;14</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1117/1.JMI.10.4.044006</pub-id>, PMID: <pub-id pub-id-type="pmid">37564098</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<label>18</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Shah</surname> <given-names>RP</given-names></name>
<name><surname>Selby</surname> <given-names>HM</given-names></name>
<name><surname>Mukherjee</surname> <given-names>P</given-names></name>
<name><surname>Verma</surname> <given-names>S</given-names></name>
<name><surname>Xie</surname> <given-names>P</given-names></name>
<name><surname>Xu</surname> <given-names>Q</given-names></name>
<etal/>
</person-group>. 
<article-title>Machine learning radiomics model for early identification of small-cell lung cancer on computed tomography scans</article-title>. <source>JCO Clin Cancer Inf</source>. (<year>2021</year>) <volume>5</volume>:<page-range>746&#x2013;57</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1200/CCI.21.00021</pub-id>, PMID: <pub-id pub-id-type="pmid">34264747</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<label>19</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yang</surname> <given-names>F</given-names></name>
<name><surname>Ford</surname> <given-names>JC</given-names></name>
<name><surname>Dogan</surname> <given-names>N</given-names></name>
<name><surname>Padgett</surname> <given-names>KR</given-names></name>
<name><surname>Breto</surname> <given-names>AL</given-names></name>
<name><surname>Abramowitz</surname> <given-names>MC</given-names></name>
<etal/>
</person-group>. 
<article-title>Magnetic resonance imaging (MRI)-based radiomics for prostate cancer radiotherapy</article-title>. <source>Trans Androl Urol</source>. (<year>2018</year>) <volume>7</volume>:<page-range>445&#x2013;58</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.21037/tau.2018.06.05</pub-id>, PMID: <pub-id pub-id-type="pmid">30050803</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/733915">Cheng Wei</ext-link>, University of Dundee, United Kingdom</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/535901">Hao Liu</ext-link>, Southern Medical University, China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3150657">Jeba Karunya Ramireddy</ext-link>, Christian Medical College &amp; Hospital, India</p></fn>
</fn-group>
</back>
</article>