<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="brief-report" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Oncol.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Oncology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Oncol.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2234-943X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fonc.2026.1733312</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Brief Research Report</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Research on real-time detection of radiotherapy setup errors and intelligent quality control methods based on artificial intelligence and big data</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Lin</surname><given-names>Weixiang</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3257071/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Xiao</surname><given-names>Chengjian</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3095346/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Xiao</surname><given-names>Liangjie</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Fang</surname><given-names>Jianlan</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Xu</surname><given-names>Xiaobin</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Fang</surname><given-names>Yongwen</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3256735/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Department of Radiation Oncology, Ganzhou Cancer Hospital</institution>, <city>Ganzhou</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>National Cancer Laboratory (South China), Department of Radiation Therapy, Sun Yat-sen University Cancer Center</institution>, <city>Guangzhou</city>, <state>Guangdong</state>,&#xa0;<country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Xiaobin Xu, <email xlink:href="mailto:xuxiaobinxxb@qq.com">xuxiaobinxxb@qq.com</email>; Yongwen Fang, <email xlink:href="mailto:fangyw@sysucc.org.cn">fangyw@sysucc.org.cn</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-05">
<day>05</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>16</volume>
<elocation-id>1733312</elocation-id>
<history>
<date date-type="received">
<day>27</day>
<month>10</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>20</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="rev-recd">
<day>10</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Lin, Xiao, Xiao, Fang, Xu and Fang.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Lin, Xiao, Xiao, Fang, Xu and Fang</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-05">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Objective</title>
<p>This study aimed to develop and validate an unsupervised machine learning&#x2013;based approach for near-real-time alerting of statistically abnormal six-dimensional (6D) radiotherapy setup errors. Using large-scale clinical datasets, the robustness of the proposed approach was evaluated across different immobilization methods and treatment sites to support quality assurance alerting.</p>
</sec>
<sec>
<title>Methods</title>
<p>A total of 7,539 CBCT-based 6D setup error records collected at our center between May 2022 and March 2025 were analyzed. After data standardization and construction of proxy anomalous samples, two unsupervised models&#x2014;Isolation Forest (IF) and Local Outlier Factor (LOF)&#x2014;were developed. Model performance was assessed using ROC-AUC, PR-AUC, and sensitivity at a fixed false positive rate (FPR &#x2248; 5%). Subgroup analyses were performed by immobilization method and treatment site. Interpretability was explored using principal component analysis (PCA) and Spearman correlation. To provide minimal translational context, geometric tolerance exceedance rates based on translational and rotational thresholds were quantified.</p>
</sec>
<sec>
<title>Results</title>
<p>Overall, IF outperformed LOF (ROC-AUC = 0.960 [95% CI: 0.956&#x2013;0.964] vs. 0.880 [95% CI: 0.872&#x2013;0.888]). Most immobilization methods achieved AUC &#x2265; 0.92 (range: 0.912&#x2013;1.000), with dual-face SRT masks and neck&#x2013;thorax mask plus vacuum cushion combinations approaching ideal performance (AUC &#x2248; 1.00). Interpretability analyses indicated that the AP, Pitch, and LR directions were the primary contributors to abnormality detection. Longitudinal evaluation revealed stable performance without model drift.</p>
</sec>
<sec>
<title>Conclusion</title>
<p>This study demonstrates the feasibility of applying unsupervised learning to identify statistically unusual setup patterns and proposes a closed-loop &#x201c;setup&#x2013;monitoring&#x2013;alert&#x201d; framework. The approach is intended as an auxiliary alerting tool to support clinical workflows, rather than to replace dosimetric evaluation or clinical decision-making.</p>
</sec>
</abstract>
<kwd-group>
<kwd>anomaly detection</kwd>
<kwd>cone-beam computed tomography (CBCT)</kwd>
<kwd>machine learning</kwd>
<kwd>PCA</kwd>
<kwd>quality assurance</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement>
</funding-group>
<counts>
<fig-count count="5"/>
<table-count count="4"/>
<equation-count count="0"/>
<ref-count count="30"/>
<page-count count="11"/>
<word-count count="6240"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Radiation Oncology</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>With the advancement of precision radiotherapy, the management of setup errors has become a critical component in ensuring treatment safety and accurate dose delivery. In current clinical practice, image-guided radiotherapy (IGRT) and cone-beam computed tomography (CBCT) are commonly employed for periodic setup corrections. Although these approaches improve positioning accuracy, frequent imaging introduces additional radiation exposure and procedural workload, posing further challenges to clinical efficiency and staffing (<xref ref-type="bibr" rid="B1">1</xref>&#x2013;<xref ref-type="bibr" rid="B3">3</xref>). Existing quality assurance (QA) procedures largely rely on manual experience or fixed threshold criteria, which are highly subjective, lack real-time responsiveness, and often fail to balance sensitivity and scalability. The coupling among six-dimensional (6D) setup errors is complex, and threshold-based methods are prone to missed detections or false alarms. Achieving high sensitivity under a low false-alarm rate remains a significant challenge in clinical quality control.</p>
<p>In recent years, artificial intelligence (AI) has achieved substantial progress in medical image analysis and radiotherapy workflow optimization (<xref ref-type="bibr" rid="B4">4</xref>&#x2013;<xref ref-type="bibr" rid="B6">6</xref>). In particular, unsupervised learning methods can automatically identify abnormal patterns without the need for manual annotation, offering an alternative strategy for clinical quality assurance. For example, AI-based automatic organ segmentation systems have significantly improved contouring efficiency and consistency in clinical practice (<xref ref-type="bibr" rid="B7">7</xref>).</p>
<p>Although AI has demonstrated outstanding performance in treatment planning and quality assurance (QA) tasks, there is currently relatively limited systematic research, either domestically or internationally&#x2014;on real-time quality control of six-dimensional (6D) setup error detection. This research gap leaves radiotherapy QA largely in the &#x201c;offline detection&#x201d; stage, without achieving a closed-loop system of &#x201c;real-time monitoring and intelligent alerting.&#x201d;</p>
<p>In this study, a total of 7,539 CBCT-based 6D setup error datasets collected between May 2022 and March 2025 were used to construct and validate an unsupervised learning&#x2013;driven framework for real-time abnormality detection. The Isolation Forest (IF) algorithm was employed as the primary model, with the Local Outlier Factor (LOF) serving as a reference. Potential outliers were defined using a 3&#x3c3; threshold for performance evaluation.</p>
<p>Methodologically, this study integrates stratified analyses (across different immobilization methods and treatment sites), interpretability analyses (via principal component analysis [PCA] and Spearman correlation), and longitudinal stability assessment to comprehensively evaluate the model&#x2019;s performance and clinical feasibility. Finally, a closed-loop &#x201c;setup&#x2013;monitoring&#x2013;alert&#x201d; framework is proposed to facilitate multicenter translation and promote the clinical implementation of AI-driven radiotherapy quality assurance.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>General data</title>
<p>This study included cone-beam computed tomography (CBCT) setup data recorded between May 2022 and March 2025, comprising a total of 7,539 sets of six-dimensional (6D) setup registration errors. All patients were treated on a Varian VitalBeam linear accelerator (Varian Medical Systems, Palo Alto, CA, USA), and CBCT-based setup verification and registration were performed using the on-board imaging and registration system integrated with the accelerator, following the institutional IGRT workflow. The dataset covered multiple anatomical sites, including head and neck, thoracic (excluding breast cancer), and abdominal regions, with sample sizes of 3,477, 2,585, and 1,477 sets, respectively (see <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>). Each data record contained six degrees of freedom: three translational directions&#x2014;left&#x2013;right (LR), superior&#x2013;inferior (SI), and anterior&#x2013;posterior (AP)&#x2014;and three rotational directions&#x2014;yaw (Rtn), pitch (Pitch), and roll (Roll). All translational errors were measured in millimeters and rotational errors in degrees. During data preprocessing, a small number of missing values were identified across the six dimensions (AP: 2; SI: 1; LR: 1; Rtn: 10; Pitch: 8; Roll: 8). Missing values were imputed using the median substitution method. A very limited number of extreme outliers (&lt;0.3%) that could not be corrected and were likely to substantially affect the analysis were excluded from the dataset.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Model performance across anatomical sites (Isolation Forest, N = 7,539).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Anatomical site</th>
<th valign="middle" align="center">N</th>
<th valign="middle" align="center">ROC-AUC (95% CI)</th>
<th valign="middle" align="center">FDR-adjusted P</th>
<th valign="middle" align="center">&#x394;AUC vs Overall</th>
<th valign="middle" align="center">Remark</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">Head and Neck</td>
<td valign="middle" align="center">3,477</td>
<td valign="middle" align="center">0.973 (0.969 &#x2013; 0.977)</td>
<td valign="middle" align="center">0.18</td>
<td valign="middle" align="center">0.011</td>
<td valign="middle" align="center">Highest model performance; tight immobilization and stable geometry contribute to distinct error patterns.</td>
</tr>
<tr>
<td valign="middle" align="center">Thoracic (Chest)</td>
<td valign="middle" align="center">2,585</td>
<td valign="middle" align="center">0.954 (0.948 &#x2013; 0.960)</td>
<td valign="middle" align="center">0.31</td>
<td valign="middle" align="center">&#x2013;0.008</td>
<td valign="middle" align="center">Stable but lower AUC due to breathing-induced motion and chest-wall variability.</td>
</tr>
<tr>
<td valign="middle" align="center">Abdominal Region</td>
<td valign="middle" align="center">1,477</td>
<td valign="middle" align="center">0.946 (0.938 &#x2013; 0.954)</td>
<td valign="middle" align="center">0.22</td>
<td valign="middle" align="center">&#x2013;0.016</td>
<td valign="middle" align="center">Reduced performance from organ motion and respiratory drift; still clinically acceptable (AUC &gt; 0.90).</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Overall AUC (95% CI), 0.960 (0.956 &#x2013; 0.964); Differences among sites did not reach statistical significance after FDR correction (P &gt; 0.05). Positive &#x394;AUC values denote better-than-average discrimination; negative values indicate mild performance drop associated with motion-related uncertainty. All sites maintained ROC-AUC &#x2265; 0.94, demonstrating robust generalization of the unsupervised framework across anatomical regions.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>All six-dimensional (6D) setup corrections in this study were executed clinically by direct couch shifts as recorded in the CBCT registration outputs. In the Varian VitalBeam system, the displayed correction range for Pitch and Roll is limited to &#xb1;3&#xb0;, and corresponding values were therefore recorded and applied within the clinical system&#x2019;s operational display limits. All setup error records were retained as part of the real-world clinical workflow without <italic>post hoc</italic> modification.</p>
<p>This study was approved by the Institutional Ethics Committee of our hospital (Approval No. [2025] KELUNSHEN No. 241). All patient information was anonymized prior to analysis in accordance with relevant ethical and data privacy regulations.</p>
<p>All patients underwent standard procedures for simulation, immobilization, and treatment planning. The immobilization methods included head&#x2013;neck&#x2013;shoulder thermoplastic mask, neck&#x2013;thorax mask, vacuum cushion, thermoplastic body mold, stereotactic radiotherapy (SRT) dual-face mask, and stereotactic body radiotherapy (SBRT) fixation frame, as detailed in <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Model performance across immobilization methods (Isolation Forest, N = 7,539).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Immobilization method</th>
<th valign="middle" align="center">N</th>
<th valign="middle" align="center">AUC (95% CI)</th>
<th valign="middle" align="center">FDR-adjusted P</th>
<th valign="middle" align="center">&#x394;AUC vs Overall</th>
<th valign="middle" align="center">Remark</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">Neck-Thorax Mask + Vacuum Cushion</td>
<td valign="middle" align="center">41</td>
<td valign="middle" align="center">1.000 (1.000 &#x2013; 1.000)</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">0.04</td>
<td valign="middle" align="center">Best performance; ideal stability</td>
</tr>
<tr>
<td valign="middle" align="center">Neck-Thorax Mask + Thermoplastic Pad</td>
<td valign="middle" align="center">1,131</td>
<td valign="middle" align="center">0.995 (0.990 &#x2013; 1.000)</td>
<td valign="middle" align="center">0.3</td>
<td valign="middle" align="center">0.03</td>
<td valign="middle" align="center">Excellent stability; consistent with SRT dual-layer mask</td>
</tr>
<tr>
<td valign="middle" align="center">SRT Dual-Layer Mask</td>
<td valign="middle" align="center">388</td>
<td valign="middle" align="center">0.994 (0.987 &#x2013; 1.000)</td>
<td valign="middle" align="center">0.32</td>
<td valign="middle" align="center">0.03</td>
<td valign="middle" align="center">Excellent stability and reproducibility</td>
</tr>
<tr>
<td valign="middle" align="center">Head-Neck-Shoulder Mask + Shaping Pad</td>
<td valign="middle" align="center">2,431</td>
<td valign="middle" align="center">0.971 (0.968 &#x2013; 0.974)</td>
<td valign="middle" align="center">0.41</td>
<td valign="middle" align="center">0.01</td>
<td valign="middle" align="center">Mainstream fixation; consistent results</td>
</tr>
<tr>
<td valign="middle" align="center">Neck-Thorax Mask + Shaping Pad</td>
<td valign="middle" align="center">237</td>
<td valign="middle" align="center">0.971 (0.963 &#x2013; 0.979)</td>
<td valign="middle" align="center">0.45</td>
<td valign="middle" align="center">0.01</td>
<td valign="middle" align="center">Comparable to head-neck-shoulder pad</td>
</tr>
<tr>
<td valign="middle" align="center">SBRT Fixation Frame</td>
<td valign="middle" align="center">51</td>
<td valign="middle" align="center">0.952 (0.931 &#x2013; 0.972)</td>
<td valign="middle" align="center">0.18</td>
<td valign="middle" align="center">&#x2013;0.01</td>
<td valign="middle" align="center">Small sample; high precision</td>
</tr>
<tr>
<td valign="middle" align="center">Head-Neck-Shoulder Mask</td>
<td valign="middle" align="center">293</td>
<td valign="middle" align="center">0.944 (0.933 &#x2013; 0.956)</td>
<td valign="middle" align="center">0.22</td>
<td valign="middle" align="center">&#x2013;0.02</td>
<td valign="middle" align="center">Stable but less constrained</td>
</tr>
<tr>
<td valign="middle" align="center">Head-Neck-Shoulder Mask + Vacuum Cushion</td>
<td valign="middle" align="center">369</td>
<td valign="middle" align="center">0.943 (0.934 &#x2013; 0.952)</td>
<td valign="middle" align="center">0.2</td>
<td valign="middle" align="center">&#x2013;0.02</td>
<td valign="middle" align="center">Stable hybrid fixation</td>
</tr>
<tr>
<td valign="middle" align="center">Head-Neck-Shoulder Mask + Thermoplastic Pad</td>
<td valign="middle" align="center">164</td>
<td valign="middle" align="center">0.933 (0.921 &#x2013; 0.945)</td>
<td valign="middle" align="center">0.26</td>
<td valign="middle" align="center">&#x2013;0.03</td>
<td valign="middle" align="center">Moderate variability</td>
</tr>
<tr>
<td valign="middle" align="center">Vacuum Cushion</td>
<td valign="middle" align="center">2,225</td>
<td valign="middle" align="center">0.930 (0.927 &#x2013; 0.933)</td>
<td valign="middle" align="center">0.33</td>
<td valign="middle" align="center">&#x2013;0.03</td>
<td valign="middle" align="center">Less restrictive; robust overall</td>
</tr>
<tr>
<td valign="middle" align="center">U-Mask</td>
<td valign="middle" align="center">173</td>
<td valign="middle" align="center">0.925 (0.912 &#x2013; 0.938)</td>
<td valign="middle" align="center">0.29</td>
<td valign="middle" align="center">&#x2013;0.04</td>
<td valign="middle" align="center">Acceptable performance; higher variability</td>
</tr>
<tr>
<td valign="middle" align="center">Abdominal Mask + Shaping Pad</td>
<td valign="middle" align="center">30</td>
<td valign="middle" align="center">0.918 (0.900 &#x2013; 0.936)</td>
<td valign="middle" align="center">0.37</td>
<td valign="middle" align="center">&#x2013;0.05</td>
<td valign="middle" align="center">Limited sample; larger motion range</td>
</tr>
<tr>
<td valign="middle" align="center">Abdominal Mask + Vacuum Cushion</td>
<td valign="middle" align="center">6</td>
<td valign="middle" align="center">0.912 (0.872 &#x2013; 0.952)</td>
<td valign="middle" align="center">0.44</td>
<td valign="middle" align="center">&#x2013;0.05</td>
<td valign="middle" align="center">Minimal sample; highest variability</td>
</tr>
<tr>
<td valign="middle" align="center">Total</td>
<td valign="middle" align="center">7 539</td>
<td valign="middle" align="center">0.962 (0.959 &#x2013; 0.965)</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">Overall average performance</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Overall mean AUC (Isolation Forest, full dataset), 0.962 (0.959 &#x2013; 0.965). FDR-adjusted P values derived from Kruskal&#x2013;Wallis tests with Benjamini&#x2013;Hochberg correction. Positive &#x394;AUC values indicate higher discrimination relative to the overall model; negative values indicate marginally lower performance. All fixation methods maintained AUC &#x2265; 0.92, indicating robust generalization across clinical conditions.Subgroups with N &lt; 50 were retained for completeness but should be interpreted as underpowered, and their near-perfect AUC estimates are likely influenced by sampling variability rather than true performance.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Data preprocessing</title>
<p>To ensure modeling stability, all six-dimensional (6D) setup error components were standardized using Z-score normalization to eliminate differences in measurement scales. In the primary analysis, Z-score normalization was performed globally across the entire dataset rather than within site-specific strata, to allow direct comparison of anomaly scores across different treatment sites. CBCT-to-planning CT registration was performed using a gray-value&#x2013;based algorithm within predefined regions of interest (ROIs).To minimize operator-related variability, all registrations were performed by the same radiation therapist throughout the study period. The workflow was primarily automatic, with mandatory manual review and adjustment when necessary according to routine clinical acceptance criteria. Construction of abnormal samples: Abnormal instances were defined using the 3&#x3c3; criterion, where any direction with |z| &gt; 3 was considered a potential outlier. Subsets were then established according to immobilization method and treatment site. The final dataset formed a 6D error matrix (7539 &#xd7; 6) incorporating information on treatment site, immobilization type, and treatment fraction, which served as the input for model training and validation.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Model and experimental design</title>
<sec id="s2_3_1">
<label>2.3.1</label>
<title>Core model</title>
<p>The Isolation Forest (IF) algorithm isolates anomalies by randomly partitioning the feature space (<xref ref-type="bibr" rid="B8">8</xref>), demonstrating high efficiency and robustness when applied to high-dimensional data. To prevent overfitting (<xref ref-type="bibr" rid="B9">9</xref>, <xref ref-type="bibr" rid="B10">10</xref>), five-fold cross-validation was employed, with four folds used for training and one for evaluation. A bootstrap procedure with 500 resamples was performed to estimate 95% confidence intervals (CIs) for both the receiver operating characteristic (ROC) and precision&#x2013;recall (PR) metrics. As an unsupervised learning approach, the model was trained solely on the training folds to learn anomaly scores, while evaluation was conducted on the test folds using proxy labels defined by the criterion |z| &gt; 3. No additional fixed 80/20 hold-out dataset was used. The unsupervised anomaly detection design in this study was developed with reference to classical reviews of deep learning&#x2013;based anomaly detection methods (<xref ref-type="bibr" rid="B11">11</xref>).</p>
</sec>
<sec id="s2_3_2">
<label>2.3.2</label>
<title>Comparison models</title>
<p>The Local Outlier Factor (LOF) algorithm was employed as the comparative model. The parameter settings were as follows:</p>
<list list-type="simple">
<list-item>
<p>&#x2460; Isolation Forest (IF): n_estimators = 100, max_samples = 256, contamination = 0.05, max_features = 1.0, and random_state = 42.</p></list-item>
<list-item>
<p>&#x2461; Local Outlier Factor (LOF): n_neighbors = 20, contamination = 0.05, leaf_size = 30, and metric = &#x2018;euclidean&#x2019;.</p></list-item>
</list>
<p>The contamination value was set to 0.05 to approximate an anomaly rate of ~5% corresponding to the 3&#x3c3; threshold, ensuring a balance between sensitivity and false alarm rate. The LOF algorithm detects anomalies by comparing the local density of each sample with that of its neighbors, making it suitable for localized anomaly detection; however, its stability may decrease when applied to large-scale or complex distributions. For comparison with traditional statistical approaches, the z-score&#x2013;based 3&#x3c3; rule and Shewhart control charts were also used to detect abnormalities in the six-dimensional setup errors. Their performance was compared with that of the unsupervised learning models.</p>
</sec>
<sec id="s2_3_3">
<label>2.3.3</label>
<title>Construction of abnormal samples and reference standard</title>
<p>Because clinically validated &#x201c;true abnormal&#x201d; labels are unavailable, this study adopted a statistical proxy definition: any single-dimensional setup error with |z| &gt; 3 was considered a potential anomaly, whereas all other samples were regarded as normal. This threshold was chosen with reference to AAPM Task Group 147 and the clinically accepted setup tolerances of &#xb1;3 mm/&#xb1; 3&#xb0;, consistent with international standards for positional accuracy.</p>
<p>Although this approach does not represent actual clinical misalignment or mistreatment events, it provides a reasonable benchmark for performance evaluation in the context of large-scale datasets.</p>
<p>Because clinically adjudicated labels were unavailable, our primary proxy label defined an abnormal case as any single dimension with |z| &gt; 3. To assess robustness against multi-axis coupling, we additionally evaluated alternative multidimensional proxy definitions, including (i) the Euclidean norm of the 6D z-score vector (M) and (ii) Mahalanobis distance using the empirical covariance of standardized errors. These were used only for sensitivity analyses and not to claim clinical ground truth.</p>
</sec>
<sec id="s2_3_4">
<label>2.3.4</label>
<title>Interpretability analysis: principal component analysis</title>
<p>Dimensionality reduction was performed on the six-dimensional (6D) setup errors to visualize the spatial separation between normal and potential abnormal samples.</p>
<p>Feature importance analysis: Spearman correlation coefficients between each directional component (LR, SI, AP, Rtn, Pitch, Roll) and the Isolation Forest (IF) anomaly scores were calculated to evaluate the relative contribution of each feature to anomaly detection.</p>
</sec>
<sec id="s2_3_5">
<label>2.3.5</label>
<title>Model training and validation</title>
<p>A five-fold cross-validation strategy was adopted, using four folds for training and one for evaluation. For each test fold, ROC and PR curves were computed, and 500 bootstrap resamples were used to estimate the 95% confidence intervals (CIs).As an unsupervised learning approach, the model learned anomaly scores only from the training folds, while evaluation was performed on the test folds using proxy labels defined by |z| &gt; 3. No additional fixed 80/20 hold-out dataset was employed. Model performance was assessed using ROC-AUC, PR-AUC, and recall and precision at approximately 5% false positive rate (FPR &#x2248; 5%) as key metrics. Bootstrap sampling (n = 500) was used to compute 95% CIs for the ROC-AUC values.</p>
</sec>
<sec id="s2_3_6">
<label>2.3.6</label>
<title>Subgroup and longitudinal analyses</title>
<p>Three extended analyses were conducted to assess robustness and generalizability:</p>
<list list-type="simple">
<list-item>
<p>&#x2460; Stratified analysis by immobilization method;</p></list-item>
<list-item>
<p>&#x2461; Stratified analysis by treatment site (head and neck, thoracic, abdominal);</p></list-item>
<list-item>
<p>&#x2462; Longitudinal assessment of potential error drift and long-term model stability across different time periods.</p></list-item>
<list-item>
<p>&#x2463; Sensitivity analysis comparing global Z-score normalization with site-stratified Z-score normalization (head and neck, thoracic, abdominal), with all downstream model evaluations repeated under both settings.</p></list-item>
</list>
</sec>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Statistical analysis</title>
<p>All data processing and model implementation were performed in a Python 3.9.0 (64-bit) environment. The workflow included modules for model training and performance evaluation, data preprocessing, visualization, and interpretability analysis using standard open-source libraries.</p>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>Performance evaluation metrics</title>
<list list-type="simple">
<list-item>
<p>&#x2460;Area under the receiver operating characteristic curve (ROC-AUC): Quantifies the overall ability of the model to distinguish between normal and abnormal samples.</p></list-item>
<list-item>
<p>&#x2461; Area under the precision&#x2013;recall curve (PR-AUC): Evaluates model performance under varying thresholds, particularly in imbalanced datasets where the proportion of anomalies is low.</p></list-item>
<list-item>
<p>&#x2462; Recall and precision at approximately 5% false positive rate (FPR &#x2248; 5%): Simulates clinical conditions requiring high sensitivity and reliability under low false alarm rates.</p></list-item>
</list>
</sec>
<sec id="s2_6">
<label>2.6</label>
<title>Confidence intervals and statistical tests</title>
<p>Bootstrap resampling (500 iterations) was applied to estimate the 95% confidence intervals (CIs) of ROC-AUC and PR-AUC values. Differences in ROC-AUC between models (e.g., IF vs. LOF) were statistically compared using the DeLong test. For multi-group comparisons across different immobilization methods and treatment sites, the Kruskal&#x2013;Wallis test was employed. When significant differences were observed (P &lt; 0.05), Dunn&#x2019;s <italic>post hoc</italic> tests were conducted. To control for type I error inflation due to multiple comparisons, Benjamini&#x2013;Hochberg false discovery rate (FDR) correction was applied to subgroup P-values, and results were further validated using the Holm&#x2013;Bonferroni method.</p>
</sec>
<sec id="s2_7">
<label>2.7</label>
<title>Interpretability and dimensionality reduction analyses</title>
<p>Principal Component Analysis (PCA): The first two principal components were selected for visualization to assess the degree of separation between potential abnormal and normal samples in the reduced feature space.</p>
<p>Feature importance (IF proxy): The absolute Spearman correlation coefficients (|&#x3c1;|) between each setup error dimension and the IF anomaly scores were calculated and ranked to determine directional contributions.</p>
</sec>
<sec id="s2_8">
<label>2.8</label>
<title>Longitudinal trend analysis</title>
<p>A Shewhart control chart was constructed to monitor a composite index M, representing the longitudinal trend of 6D setup errors. After Z-score normalization of six directions&#x2014;Z(AP), Y(SI), X(LR), Rtn, Pitch, and Roll&#x2014;the Euclidean norm was calculated to obtain M. The overall mean was used as the center line (CL), and the &#xb1;3&#x3c3; range defined the upper and lower control limits (UCL/LCL). Any instance exceeding these limits was identified as an abnormal deviation.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Overall performance</title>
<p>In the overall dataset (N = 7,539), the Isolation Forest (IF) model performed better overall than the Local Outlier Factor (LOF) model.</p>
<list list-type="simple">
<list-item>
<p>&#x2460; ROC-AUC: IF = 0.960 (95% CI: 0.956&#x2013;0.964) vs. LOF = 0.880 (95% CI: 0.872&#x2013;0.888); the difference was statistically significant according to the DeLong test (P &lt; 0.01).</p></list-item>
<list-item>
<p>&#x2461; PR-AUC: IF = 0.480 vs. LOF = 0.420.</p></list-item>
<list-item>
<p>&#x2462; Recall and precision at FPR &#x2248; 5%: IF achieved a recall of 0.52 and a precision of 0.71, both higher than those of LOF (recall = 0.39; precision = 0.61) (see <xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref>).</p></list-item>
</list>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Overall model performance (N = 7,539).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Model</th>
<th valign="middle" align="center">ROC-AUC (95% CI)</th>
<th valign="middle" align="center">PR-AUC (95% CI)</th>
<th valign="middle" align="center">Recall @ FPR&#x2248;5%</th>
<th valign="middle" align="center">Precision @ FPR&#x2248;5%</th>
<th valign="middle" align="center">&#x394;AUC (vs LOF)</th>
<th valign="middle" align="center">P (DeLong)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">Isolation Forest</td>
<td valign="middle" align="center">0.960 (0.956&#x2013;0.964)</td>
<td valign="middle" align="center">0.480 (0.462&#x2013;0.498)</td>
<td valign="middle" align="center">0.52</td>
<td valign="middle" align="center">0.71</td>
<td valign="middle" align="center">0.08</td>
<td valign="middle" align="center">&lt;0.01</td>
</tr>
<tr>
<td valign="middle" align="center">Local Outlier Factor</td>
<td valign="middle" align="center">0.880 (0.872&#x2013;0.888)</td>
<td valign="middle" align="center">0.420 (0.405&#x2013;0.438)</td>
<td valign="middle" align="center">0.39</td>
<td valign="middle" align="center">0.61</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="middle" align="center">Z-score (3&#x3c3; rule) *</td>
<td valign="middle" align="center">0.754 (0.743&#x2013;0.765)</td>
<td valign="middle" align="center">0.331 (0.318&#x2013;0.346)</td>
<td valign="middle" align="center">0.25</td>
<td valign="middle" align="center">0.52</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>FPR, false positive rate; &#x394;AUC, difference in ROC-AUC relative to LOF; * Z-score (3&#x3c3; rule) indicates the traditional statistical threshold method, where data points beyond &#xb1;3 standard deviations are considered outliers.Multiple comparisons were corrected using the Benjamini&#x2013;Hochberg method, and Holm&#x2013;Bonferroni was used for sensitivity analysis.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>These results suggest that the IF model achieved higher discriminative ability and maintained better sensitivity&#x2013;specificity balance under low false alarm conditions. Comparable performance was observed in the sensitivity analysis using site-stratified Z-score normalization, and the overall conclusions remained unchanged.</p>
<p>Receiver operating characteristic (ROC) and precision&#x2013;recall (PR) curves comparing the Isolation Forest (IF) and Local Outlier Factor (LOF) models in the overall dataset (N = 7,539). The shaded regions represent the 95% bootstrap confidence intervals (500 resamplings). The IF achieved superior discrimination (ROC-AUC = 0.960 [95% CI 0.956&#x2013;0.964]; PR-AUC = 0.480 [0.462&#x2013;0.498]) compared with LOF (ROC-AUC = 0.880, <italic>P</italic> &lt; 0.01, DeLong test). The dashed vertical line indicates the low false-positive operating point (FPR &#x2248; 5%), at which recall and precision were computed (see <xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref>).</p>
<p>Across alternative proxy definitions, the relative performance ranking (IF &gt; LOF &gt; 3&#x3c3;) remained consistent, and operating-point metrics at low FPR changed only modestly, supporting the robustness of the proposed framework to label specification.</p>
<sec id="s3_1_1">
<label>3.1.1</label>
<title>Absolute distribution of six-dimensional setup errors</title>
<p>To describe the underlying geometric characteristics, absolute distributions of six-dimensional (6D) setup errors were summarized prior to Z-score normalization. Translational components (LR, SI, AP) were reported in millimeters, and rotational components (Rtn, Pitch, Roll) in degrees.</p>
<p>Across the full cohort (N = 7,539), translational errors showed smaller dispersion than rotational errors. Among translational directions, AP exhibited the widest distribution, whereas LR showed the smallest variability. Pitch demonstrated greater variability than Roll and Rtn among rotational components.</p>
<p>Both mean &#xb1; standard deviation and median with interquartile range (IQR) were reported, as summarized in <xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref>.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Absolute distribution of six-dimensional setup errors (N = 7,539).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Direction</th>
<th valign="middle" align="center">Mean &#xb1; SD</th>
<th valign="middle" align="center">Median (IQR)</th>
<th valign="middle" align="center">Unit</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">LR</td>
<td valign="middle" align="center">0.10 &#xb1; 0.09</td>
<td valign="middle" align="center">0.10 (0.03&#x2013;0.14)</td>
<td valign="middle" align="center">mm</td>
</tr>
<tr>
<td valign="middle" align="center">SI</td>
<td valign="middle" align="center">0.13 &#xb1; 0.11</td>
<td valign="middle" align="center">0.10 (0.06&#x2013;0.20)</td>
<td valign="middle" align="center">mm</td>
</tr>
<tr>
<td valign="middle" align="center">AP</td>
<td valign="middle" align="center">0.12 &#xb1; 0.09</td>
<td valign="middle" align="center">0.10 (0.05&#x2013;0.19)</td>
<td valign="middle" align="center">mm</td>
</tr>
<tr>
<td valign="middle" align="center">Rtn</td>
<td valign="middle" align="center">0.62 &#xb1; 0.64</td>
<td valign="middle" align="center">0.50 (0.20&#x2013;0.90)</td>
<td valign="middle" align="center">&#xb0;</td>
</tr>
<tr>
<td valign="middle" align="center">Pitch</td>
<td valign="middle" align="center">0.70 &#xb1; 0.68</td>
<td valign="middle" align="center">0.50 (0.20&#x2013;1.00)</td>
<td valign="middle" align="center">&#xb0;</td>
</tr>
<tr>
<td valign="middle" align="center">Roll</td>
<td valign="middle" align="center">0.92 &#xb1; 0.87</td>
<td valign="middle" align="center">0.70 (0.20&#x2013;1.40)</td>
<td valign="middle" align="center">&#xb0;</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>All values represent absolute setup errors.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Stratified analysis by immobilization method</title>
<p>To evaluate the influence of different immobilization strategies on model performance, a stratified analysis was conducted by immobilization method (see <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>, <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>). The area under the curve (AUC) values were estimated using 500 bootstrap resamples to obtain 95% confidence intervals (CIs). The results demonstrated that, except for a few small-sample subgroups, the Isolation Forest (IF) model consistently achieved AUC &#x2265; 0.92 across most immobilization methods, indicating stable performance under diverse clinical conditions.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Performance of the Isolation Forest across immobilization methods (forest plot).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-16-1733312-g001.tif">
<alt-text content-type="machine-generated">Forest plot showing model performance across various fixation methods, measured by Isolation Forest AUC values with 95% confidence intervals. Highest AUC is 1.000 for abdominal and neck-thorax masks with cushions. Other methods range from 0.925 to 0.995.</alt-text>
</graphic></fig>
<p>Specifically, the neck&#x2013;thorax mask combined with vacuum cushion (N = 41) and the SRT dual-face thermoplastic mask (N = 388) achieved the best performance, with AUCs of 1.000 and 0.994, respectively. The head&#x2013;neck&#x2013;shoulder mask with body mold (N = 2,431) and neck&#x2013;thorax mask with body mold (N = 237) also performed strongly (AUC = 0.971), maintaining high accuracy among the most common fixation techniques. The SBRT fixation frame (N = 51) yielded an AUC of 0.952&#x2014;despite its smaller sample size, it demonstrated strong discriminative capability. The vacuum cushion (N = 2,225) and U-shaped mask (N = 173) showed slightly lower but still robust results (AUC range: 0.925&#x2013;0.930).</p>
<p>Most intergroup differences were not statistically significant (FDR &gt; 0.05), and no significant differences were observed among smaller subgroups (P &gt; 0.05), suggesting that sample size variability should be interpreted with caution.</p>
<p>Overall, the AUC range (0.912&#x2013;1.000) exceeded the clinically acceptable threshold (&#x2265; 0.90), demonstrating the stability and robustness of the proposed framework across different immobilization strategies.</p>
<p>The forest plot shows the discriminative performance (AUC &#xb1; 95% confidence interval) of the isolation forest model across twelve fixed strategies. Each dot indicates the point estimate; horizontal lines indicate bootstrap 95% CIs. The vertical dashed line (AUC = 0.95) denotes the threshold for &#x201c;excellent&#x201d; model performance. Color intensity reflects the magnitude of AUC (darker = higher). Most immobilization methods achieved robust performance (AUC &#x2265; 0.92), with SRT Duplex Positioning Mask and Neck-Thorax Mask + Vacuum Cushion approaching ideal performance (AUC &#x2248; 1.00).</p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Stratified performance by treatment site</title>
<p>To further evaluate the influence of treatment site on model performance, the dataset was stratified into head and neck, thoracic, and abdominal patient groups (see <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>). The results were as follows:</p>
<list list-type="simple">
<list-item>
<p>&#x2460; Head and neck patients (N = 3,477): The model achieved the best overall performance, with ROC-AUC = 0.973 (95% CI: 0.969&#x2013;0.977).</p></list-item>
<list-item>
<p>&#x2461; Thoracic patients (N = 2,585): ROC-AUC = 0.954 (95% CI: 0.948&#x2013;0.960).</p></list-item>
<list-item>
<p>&#x2462; Abdominal patients (N = 1,477): ROC-AUC = 0.946 (95% CI: 0.938&#x2013;0.954).</p></list-item>
</list>
<p>These findings indicate that the proposed framework maintained high and consistent performance across different anatomical regions, with the highest discriminative accuracy observed in the head and neck cohort.</p>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Interpretability analysis</title>
<sec id="s3_4_1">
<label>3.4.1</label>
<title>Principal component analysis</title>
<p>Principal component analysis (PCA) was applied to reduce the six-dimensional (6D) setup error data into a two-dimensional space (see <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>). The results showed that PC1 was primarily driven by the superior&#x2013;inferior (SI) and Pitch directions, explaining approximately 25.0% of the total variance. PC2 was mainly influenced by the left&#x2013;right (LR) and Roll directions, accounting for about 20.0% of the total variance. Together, PC1 and PC2 explained approximately 45.0% of the overall variance, suggesting that the dominant variation in the six-dimensional errors originated mainly from longitudinal displacement and planar rotational components.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>PCA visualization of six-dimensional (6D) positioning errors (PC1: 25.0%, PC2: 20.0%, cumulative 45.0%).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-16-1733312-g002.tif">
<alt-text content-type="machine-generated">Scatter plot titled &#x201c;PCA of z-scored 6D errors&#x201d; shows two sets of data points. Orange dots represent normal observations, clustered around the center. Blue crosses, labeled as potential anomalies, are scattered mostly toward the periphery. The axes are labeled PC1 (25.0%) and PC2 (20.0%).</alt-text>
</graphic></fig>
<p>Two-dimensional principal component analysis (PCA) projection of standardized 6D positioning errors. Normal samples (yellow) form a compact central cluster, whereas potential anomalies (blue) occupy peripheral regions. The 95% confidence ellipses illustrate class separation. The first two components (PC1 = 25.0%, PC2 = 20.0%) explain 45.0% of total variance, dominated by SI and Pitch in PC1 and LR and Roll in PC2.</p>
</sec>
<sec id="s3_4_2">
<label>3.4.2</label>
<title>Feature importance analysis</title>
<p>Further analysis was conducted using the absolute Spearman correlation coefficient (|&#x3c1;|) between each directional component and the Isolation Forest (IF) anomaly scores across the entire dataset as a proxy for feature importance (see <xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>), reflecting relative associations rather than causal effects. The results indicated that the anterior&#x2013;posterior (AP) direction contributed the most to the anomaly detection score, followed by Pitch and left&#x2013;right (LR). The rotation (Rtn) and Roll components showed relatively lower contributions, while the superior&#x2013;inferior (SI) direction exhibited the smallest impact.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Feature-importance proxy from Isolation Forest score.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-16-1733312-g003.tif">
<alt-text content-type="machine-generated">Bar chart titled &#x201c;Feature importance proxy (IF score)&#x201d; displays the importance of six features measured by absolute Spearman rho values. AP has the highest score, followed by Pitch, LR, Rtn, Roll, and SI.</alt-text>
</graphic></fig>
<p>Bars show the absolute Spearman correlation (|&#x3c1;|) between each 6D error component and the IF anomaly score computed over the full dataset, used as a proxy for importance. AP contributes the most, followed by Pitch and LR, while Rtn and Roll show smaller associations; SI contributes minimally.</p>
</sec>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Temporal stability analysis</title>
<p>To evaluate the long-term stability of the model, all CBCT records were divided into four sequential cohorts (Cohort 1&#x2013;4) according to their acquisition order. Within each cohort, the proxy anomaly detection rate (defined as any dimension with |z| &gt; 3) was calculated and is shown in <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref>, and the out-of-fold testing results of the Isolation Forest (IF) model were used to estimate ROC-AUC and PR-AUC, both reported with bootstrap-derived 95% confidence intervals (CIs).</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Segment stability across cohorts.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-16-1733312-g004.tif">
<alt-text content-type="machine-generated">Line graphs and a box plot illustrate data across four cohorts, C1 to C4. The top left graph shows an increasing anomaly rate, with shaded confidence intervals. The bottom left graph displays a stable AUC with overlapping confidence intervals. The right box plot visualizes IF score trends, highlighting variability with statistical significance marked at p=0.04.</alt-text>
</graphic></fig>
<p>Differences in ROC-AUC values across the four cohorts were assessed using the Kruskal&#x2013;Wallis test based on the bootstrap AUC distributions. Since only a single performance metric was compared in this section, no false discovery rate (FDR) correction was applied.</p>
<p>Top: Anomaly detection rate (|z| &gt; 3) across four cohorts (Cohort 1&#x2013;4; n = 1,884, 1,884, 1,884, and 1,887) with 95% confidence interval (CI) shaded areas. Bottom: ROC-AUC (blue) and PR-AUC (orange) evaluated on each cohort using out-of-fold testing with bootstrap 95% CIs. Metrics remain broadly stable (Kruskal&#x2013;Wallis test on bootstrap AUC distributions: P &gt; 0.05), indicating no degradation in model performance over time. The inset boxplot shows Isolation Forest (IF) anomaly scores per cohort with a dashed regression line, indicating a mild upward trend (p &#x2248; 0.04). This upward shift in anomaly scores did not translate into deterioration of discrimination performance, as reflected by stable ROC-AUC and PR-AUC across cohorts.</p>
<p><xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref> illustrates the variations in anomaly detection rate and model performance (ROC-AUC and PR-AUC) across the four temporal cohorts. For each cohort, the sample size, anomaly detection rate (with 95% CI), mean &#xb1; SD of IF anomaly scores, ROC-AUC (95% CI), and PR-AUC (95% CI) are presented.Overall, both ROC-AUC and PR-AUC remained stable across the four cohorts (Kruskal&#x2013;Wallis test, P &gt; 0.05), with no evidence of performance degradation over time.</p>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>Based on a large-scale dataset of six-dimensional (6D) setup errors derived from CBCT, this study systematically evaluated the feasibility of applying unsupervised machine learning for anomaly detection and quality control (QC) in radiotherapy. The results demonstrated that the Isolation Forest (IF) model significantly outperformed the Local Outlier Factor (LOF) in overall performance, maintaining high recall and precision even under low false alarm conditions (see <xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref>, <xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref>). These findings validate the suitability and clinical potential of IF for real-time anomaly alerting in radiotherapy workflows (<xref ref-type="bibr" rid="B12">12</xref>&#x2013;<xref ref-type="bibr" rid="B15">15</xref>).</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Comparison of ROC and PR curves in the overall dataset.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-16-1733312-g005.tif">
<alt-text content-type="machine-generated">Two plots compare Isolation Forest (IF) and Local Outlier Factor (LOF). The left plot is a ROC curve showing IF with an AUC of 0.96 and LOF with 0.88. The right plot is a PR curve, with IF scoring an AUC of 0.48 and LOF 0.42. Legends indicate line colors for each method.</alt-text>
</graphic></fig>
<p>The moderate PR-AUC reflects the inherent trade-off between sensitivity and false alerts in low base-rate detection tasks, underscoring that alert frequency must be clinically calibrated to avoid alert fatigue.</p>
<p>Importantly, a statistically abnormal geometric setup does not necessarily translate into a clinically meaningful dosimetric impact. In routine image-guided radiotherapy, most setup deviations are corrected before dose delivery, and their dosimetric relevance depends on factors such as target margins, beam geometry, and the spatial relationship between targets and organs at risk.</p>
<p>In the absence of direct dosimetric recalculation or clinical outcome endpoints, we reported geometric tolerance exceedance rates based on recorded six-dimensional couch corrections as a minimal translational proxy. While translational exceedance beyond common clinical thresholds was rare, a small subset of fractions exhibited larger rotational deviations, which may warrant prioritized quality assurance review rather than direct inference of dose error. Accordingly, the proposed framework is positioned as an auxiliary quality control alerting tool rather than a surrogate for dosimetric evaluation or clinical decision-making.From the perspective of immobilization methods (<xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>, <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>), all subgroups achieved AUC &#x2265; 0.92, indicating consistent model performance under diverse fixation conditions. The SRT dual-face thermoplastic mask and neck&#x2013;thorax mask combined with a vacuum cushion achieved near-ideal performance (AUC = 0.994 and 1.000, respectively), suggesting that higher fixation stability enhances the separability between normal and abnormal samples, thereby improving detection accuracy. Conversely, fixation methods with greater adaptability but lower constraint&#x2014;such as the vacuum cushion and U-shaped mask&#x2014;still maintained robust AUC values (0.925&#x2013;0.930), demonstrating the framework&#x2019;s cross-strategy robustness. However, smaller subgroups, such as the SBRT fixation frame (N = 51) and abdominal mask with vacuum cushion (N = 6), exhibited greater uncertainty in AUC estimation due to limited sample size (<xref ref-type="bibr" rid="B16">16</xref>). The consistency of results under both global and site-stratified normalization further suggests that the reported performance was not driven by the choice of normalization strategy.This also implies that a proportion of statistically defined anomalies may not trigger alerts at low false-positive operating points, further supporting the role of this framework as a screening and prioritization tool rather than an exhaustive detector.</p>
<p>Across different treatment sites (<xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>), the model performed best in head and neck patients (AUC &#x2248; 0.973), followed by thoracic (AUC &#x2248; 0.954) and abdominal (AUC &#x2248; 0.946) cohorts. These variations may be attributed to differences in immobilization adaptability and the physiological motion of the respiratory and digestive systems. The findings suggest that, for thoracic and abdominal radiotherapy, reliance on a single surface or image registration modality may be insufficient. Future QC frameworks should incorporate respiratory gating or multimodal monitoring approaches (e.g., SGRT combined with pressure sensors) to enhance overall control accuracy (<xref ref-type="bibr" rid="B17">17</xref>, <xref ref-type="bibr" rid="B18">18</xref>).</p>
<p>The interpretability analysis further elucidated the principal factors driving anomaly detection. PCA-based dimensionality reduction revealed that abnormal samples formed relatively distinct clusters within the first two principal components, which together explained approximately 45% of total variance (<xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>). This supports the statistical independence of abnormal patterns in multidimensional space. The feature importance analysis indicated that the anterior&#x2013;posterior (AP) and Pitch directions contributed most strongly to anomaly scoring, consistent with the dominant axes of positioning errors in clinical thoracoabdominal radiotherapy (<xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>). These findings align with clinical observations&#x2014;that rotational deviations are often more difficult to detect through conventional surface inspection&#x2014;and provide quantitative support for the model&#x2019;s clinical interpretability (<xref ref-type="bibr" rid="B19">19</xref>, <xref ref-type="bibr" rid="B20">20</xref>). In the present study, feature importance was evaluated at the global level; subgroup-specific importance patterns were not separately modeled and warrant future investigation.</p>
<p>The longitudinal trend analysis (<xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref>) showed that performance metrics (ROC-AUC and PR-AUC) (<xref ref-type="bibr" rid="B21">21</xref>) remained stable across all temporal cohorts, with no evidence of systematic drift or degradation. This finding indicates that the model maintains long-term discriminative stability, supporting its potential use for continuous quality monitoring in radiotherapy.</p>
<p>Compared with the traditional z-score thresholding method (<xref ref-type="bibr" rid="B22">22</xref>), the Isolation Forest (IF) achieved an approximately 0.21 improvement in ROC-AUC (0.960 vs. 0.754). Under a clinically acceptable false positive rate (FPR &#x2248; 5%), both recall and precision were markedly higher (0.52/0.71 vs. 0.25/0.52), indicating superior usability of the proposed approach in real-time clinical alerting scenarios (see <xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref>).</p>
<p>Major Innovations of This Study:</p>
<p>1. Introduction of Unsupervised Learning into Radiotherapy</p>
<p>Quality Control:</p>
<p>To the best of our knowledge, this study is the first to integrate unsupervised machine learning into radiotherapy quality assurance (QA), moving beyond traditional statistical thresholds and human experience&#x2013;based approaches. The proposed method enables efficient, real-time, and scalable anomaly detection. The image-guided QA workflow was designed with reference to the AAPM TG-179 report on QA for CT-guided radiotherapy systems, emphasizing a &#x201c;human&#x2013;AI collaborative&#x201d; QC model in which AI serves only as an auxiliary tool for detection and alerting, while final decisions remain the responsibility of clinicians and medical physicists.</p>
<p>As volumetric modulated arc therapy (VMAT) has largely replaced conventional IMRT in multi-site radiotherapy due to its higher efficiency and superior organ-at-risk (OAR) sparing, the introduction of AI can further improve workflow efficiency. However, AI cannot fully replace human QA, as excessive reliance could introduce new safety risks (<xref ref-type="bibr" rid="B23">23</xref>&#x2013;<xref ref-type="bibr" rid="B26">26</xref>).</p>
<p>2. Multi-level Validation: The framework was validated from five complementary perspectives&#x2014;overall performance, immobilization methods, treatment sites, interpretability, and longitudinal stability&#x2014;ensuring systematic and generalizable results across diverse clinical contexts.</p>
<p>3. Enhanced Interpretability: Through principal component analysis (PCA) and feature importance analysis, key error dimensions contributing to anomalies were identified, providing clinically meaningful insights that improve understanding and practical adoption of AI-driven QC models.</p>
<p>4. Longitudinal Drift Detection: To the best of our knowledge, AI-derived anomaly scores were applied to track potential systematic drifts over time, offering valuable references for early QC alerting and adaptive radiotherapy (ART) decision-making.</p>
</sec>
<sec id="s5">
<label>5</label>
<title>Limitations and conclusion</title>
<p>Despite its promising results, this study has several limitations.First, the definition of anomalies was based on a statistical threshold (|z| &gt; 3) rather than true clinical mistreatment events. Future research should incorporate dosimetric validation and annotations of actual adverse clinical events to refine the labeling process.Second, this study is subject to center-dependent limitations. All data were collected from a single institution using a specific combination of linear accelerator platform (Varian VitalBeam), on-board CBCT registration system, and local IGRT decision rules. Variations across centers in CBCT image characteristics (e.g., noise properties), ROI definition, registration algorithms, availability and utilization of 6D couch corrections, and operator-dependent manual review practices may influence setup error distributions and anomaly patterns. Therefore, direct generalization of the reported performance should be interpreted cautiously, and further multicenter validation is warranted.Finally, this work compared only the Isolation Forest (IF) and Local Outlier Factor (LOF) algorithms. Future studies should explore more advanced models&#x2014;such as autoencoders, variational autoencoders (VAE), and graph neural networks (GNN)&#x2014;to enhance the detection of more complex and subtle anomalies (<xref ref-type="bibr" rid="B27">27</xref>&#x2013;<xref ref-type="bibr" rid="B30">30</xref>).</p>
<p>In summary, this study shows the feasibility and practical value of an unsupervised learning framework for detecting six-dimensional setup error anomalies in radiotherapy. The multi-level validation results support its potential integration into future intelligent QA systems, particularly when combined with emerging technologies such as surface-guided radiotherapy (SGRT) and adaptive radiotherapy (ART). Such integration could help establish an automated, real-time clinical quality control workflow, further improving the precision and safety of radiotherapy.</p>
</sec>
<sec id="s6" sec-type="conclusions">
<label>6</label>
<title>Conclusion</title>
<p>Based on a large-scale dataset of six-dimensional (6D) setup errors, this study proposed and validated an unsupervised learning&#x2013;driven framework for identifying statistically unusual setup patterns in radiotherapy. The results demonstrated that the Isolation Forest (IF) model exhibited excellent discriminative performance overall and across different immobilization methods and treatment sites.Interpretability analysis identified the anterior&#x2013;posterior (AP), Pitch, and left&#x2013;right (LR) directions as the major contributors to anomaly detection. The longitudinal analysis further revealed the framework&#x2019;s ability to identify potential system drifts, providing an early-warning reference for individualized quality control and adaptive radiotherapy (ART).</p>
<p>Methodologically, this study verified the effectiveness of unsupervised learning in anomaly detection and proposed a closed-loop &#x201c;setup&#x2013;monitoring&#x2013;alert&#x201d; quality control framework. Future work should incorporate dosimetric effect analysis, multicenter validation, and deep learning models to facilitate the transition of this framework from proof-of-concept to standardized clinical implementation.</p>
</sec>
</body>
<back>
<sec id="s7" sec-type="data-availability">
<title>Data availability statement</title>
<p>The data involves ethical issues and patient privacy. Currently, the hospital has not yet obtained ethical approval for disclosing the data; instead, the ethical approval obtained only covers the use of the data for scientific research and the public publication of research findings. Requests to access the datasets should be directed to Weixiang Lin <email xlink:href="mailto:609591064@qq.com">609591064@qq.com</email>.</p></sec>
<sec id="s8" sec-type="ethics-statement">
<title>Ethics statement</title>
<p>The studies involving humans were approved by Scientific Research Ethical Review of Ganzhou Cancer Hospital. The studies were conducted in accordance with the local legislation and institutional requirements. The requirement for informed consent was waived for this study by Scientific Research Ethical Review of Ganzhou Cancer Hospital because of the anonymous nature of the data.</p></sec>
<sec id="s9" sec-type="author-contributions">
<title>Author contributions</title>
<p>WL: Conceptualization, Data curation, Writing &#x2013; original draft, Investigation. CX: Writing &#x2013; review &amp; editing. LX: Writing &#x2013; review &amp; editing. JF: Writing &#x2013; review &amp; editing. XX: Writing &#x2013; review &amp; editing. YF: Formal Analysis, Supervision, Writing &#x2013; review &amp; editing, Software.</p></sec>
<sec id="s11" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s12" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s13" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Gilling</surname> <given-names>L</given-names></name>
<name><surname>Ali</surname> <given-names>O</given-names></name>
</person-group>. 
<article-title>Organ dose from Varian XI and Varian OBI systems are clinically comparable for pelvic CBCT imaging</article-title>. <source>Phys Eng Sci Med</source>. (<year>2022</year>) <volume>45</volume>:<page-range>279&#x2013;85</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s13246-021-01090-3</pub-id>, PMID: <pub-id pub-id-type="pmid">35143026</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<label>2</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Son</surname> <given-names>K</given-names></name>
<name><surname>Kim</surname> <given-names>JS</given-names></name>
<name><surname>Lee</surname> <given-names>H</given-names></name>
<name><surname>Cho</surname> <given-names>S</given-names></name>
</person-group>. 
<article-title>Imaging dose of human organs from kv-cbct in image-guided radiation therapy</article-title>. <source>Radiat Prot Dosim</source>. (<year>2017</year>) <volume>175</volume>:<fpage>194</fpage>&#x2013;<lpage>200</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/rpd/ncw285</pub-id>, PMID: <pub-id pub-id-type="pmid">27765832</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<label>3</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Dzierma</surname> <given-names>Y</given-names></name>
<name><surname>Mikulla</surname> <given-names>K</given-names></name>
<name><surname>Richter</surname> <given-names>P</given-names></name>
<name><surname>Bell</surname> <given-names>K</given-names></name>
<name><surname>Melchior</surname> <given-names>P</given-names></name>
<name><surname>Nuesken</surname> <given-names>F</given-names></name>
<etal/>
</person-group>. 
<article-title>Imaging dose and secondary cancer risk in image-guided radiotherapy of pediatric patients</article-title>. <source>Radiat Oncol</source>. (<year>2018</year>) <volume>13</volume>:<fpage>168</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s13014-018-1109-8</pub-id>, PMID: <pub-id pub-id-type="pmid">30185206</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<label>4</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Thompson</surname> <given-names>RF</given-names></name>
<name><surname>Valdes</surname> <given-names>G</given-names></name>
<name><surname>Fuller</surname> <given-names>CD</given-names></name>
<name><surname>Carpenter</surname> <given-names>CM</given-names></name>
<name><surname>Morin</surname> <given-names>O</given-names></name>
<name><surname>Aneja</surname> <given-names>S</given-names></name>
<etal/>
</person-group>. 
<article-title>Artificial intelligence in radiation oncology: a specialty-wide disruptive transformation</article-title>? <source>Radiother Oncol</source>. (<year>2018</year>) <volume>129</volume>:<page-range>421&#x2013;6</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.radonc.2018.05.030</pub-id>, PMID: <pub-id pub-id-type="pmid">29907338</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<label>5</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chan</surname> <given-names>MF</given-names></name>
<name><surname>Witztum</surname> <given-names>A</given-names></name>
<name><surname>Valdes</surname> <given-names>G</given-names></name>
</person-group>. 
<article-title>Integration of AI and machine learning in radiotherapy QA</article-title>. <source>Front Artif Intell</source>. (<year>2020</year>) <volume>3</volume>:<elocation-id>577620</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/frai.2020.577620</pub-id>, PMID: <pub-id pub-id-type="pmid">33733216</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<label>6</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Pillai</surname> <given-names>M</given-names></name>
<name><surname>Adapa</surname> <given-names>K</given-names></name>
<name><surname>Das</surname> <given-names>SK</given-names></name>
<name><surname>Mazur</surname> <given-names>L</given-names></name>
<name><surname>Dooley</surname> <given-names>J</given-names></name>
<name><surname>Marks</surname> <given-names>LB</given-names></name>
<etal/>
</person-group>. 
<article-title>Using artificial intelligence to improve the quality and safety of radiation therapy</article-title>. <source>J Am Coll Radiol</source>. (<year>2019</year>) <volume>16</volume>:<page-range>1267&#x2013;72</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jacr.2019.06.001</pub-id>, PMID: <pub-id pub-id-type="pmid">31492404</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<label>7</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Doolan</surname> <given-names>PJ</given-names></name>
<name><surname>Charalambous</surname> <given-names>S</given-names></name>
<name><surname>Roussakis</surname> <given-names>Y</given-names></name>
<name><surname>Leczynski</surname> <given-names>A</given-names></name>
<name><surname>Peratikou</surname> <given-names>M</given-names></name>
<name><surname>Benjamin</surname> <given-names>M</given-names></name>
<etal/>
</person-group>. 
<article-title>A clinical evaluation of the performance of five commercial artificial intelligence contouring systems for radiotherapy</article-title>. <source>Front Oncol</source>. (<year>2023</year>) <volume>13</volume>:<elocation-id>1213068</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fonc.2023.1213068</pub-id>, PMID: <pub-id pub-id-type="pmid">37601695</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<label>8</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Cao</surname> <given-names>Y</given-names></name>
<name><surname>Xiang</surname> <given-names>H</given-names></name>
<name><surname>Zhang</surname> <given-names>H</given-names></name>
<name><surname>Zhu</surname> <given-names>Y</given-names></name>
<name><surname>Ting</surname> <given-names>KM</given-names></name>
</person-group>. 
<article-title>Anomaly detection based on isolation mechanisms: A survey</article-title>. <source>Mach Intell Res</source>. (<year>2025</year>) <volume>22</volume>:<page-range>849&#x2013;65</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11633-025-1554-4</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<label>9</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Charilaou</surname> <given-names>P</given-names></name>
<name><surname>Battat</surname> <given-names>R</given-names></name>
</person-group>. 
<article-title>Machine learning models and over-fitting considerations</article-title>. <source>World J Gastroenterol</source>. (<year>2022</year>) <volume>28</volume>:<page-range>605&#x2013;7</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.3748/wjg.v28.i5.605</pub-id>, PMID: <pub-id pub-id-type="pmid">35316964</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<label>10</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wilimitis</surname> <given-names>D</given-names></name>
<name><surname>Walsh</surname> <given-names>CG</given-names></name>
</person-group>. 
<article-title>Practical considerations and applied examples of cross-validation for model development and evaluation in health care</article-title>. <source>JMIR Artif Intell</source>. (<year>2023</year>) <volume>2</volume>:<fpage>e49023</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.2196/49023</pub-id>, PMID: <pub-id pub-id-type="pmid">38875530</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<label>11</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Pillai</surname> <given-names>M</given-names></name>
<name><surname>Shumway</surname> <given-names>JW</given-names></name>
<name><surname>Adapa</surname> <given-names>K</given-names></name>
<name><surname>Dooley</surname> <given-names>J</given-names></name>
<name><surname>McGurk</surname> <given-names>R</given-names></name>
<name><surname>Mazur</surname> <given-names>LM</given-names></name>
<etal/>
</person-group>. 
<article-title>Augmenting quality assurance measures in treatment review with machine learning in radiation oncology</article-title>. <source>Adv Radiat Oncol</source>. (<year>2023</year>) <volume>8</volume>:<fpage>101234</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.adro.2023.101234</pub-id>, PMID: <pub-id pub-id-type="pmid">37205277</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<label>12</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Cilla</surname> <given-names>S</given-names></name>
<name><surname>Romano</surname> <given-names>C</given-names></name>
<name><surname>Viola</surname> <given-names>P</given-names></name>
<name><surname>Craus</surname> <given-names>M</given-names></name>
<name><surname>Macchia</surname> <given-names>G</given-names></name>
<name><surname>Deodato</surname> <given-names>F</given-names></name>
<etal/>
</person-group>. 
<article-title>Complexity-based unsupervised machine learning for patient-specific VMAT quality assurance</article-title>. <source>Med Phys</source>. (<year>2025</year>) <volume>52</volume>:<elocation-id>e18013</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/mp.18013</pub-id>, PMID: <pub-id pub-id-type="pmid">40849868</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<label>13</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Kalet</surname> <given-names>AM</given-names></name>
<name><surname>Luk</surname> <given-names>SMH</given-names></name>
<name><surname>Phillips</surname> <given-names>MH</given-names></name>
</person-group>. 
<article-title>Radiation therapy quality assurance tasks and tools: the many roles of machine learning</article-title>. <source>Med Phys</source>. (<year>2020</year>) <volume>47</volume>:<page-range>e168&#x2013;77</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/mp.13445</pub-id>, PMID: <pub-id pub-id-type="pmid">30768796</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<label>14</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Huang</surname> <given-names>P</given-names></name>
<name><surname>Shang</surname> <given-names>J</given-names></name>
<name><surname>Xu</surname> <given-names>Y</given-names></name>
<name><surname>Hu</surname> <given-names>Z</given-names></name>
<name><surname>Zhang</surname> <given-names>K</given-names></name>
<name><surname>Dai</surname> <given-names>J</given-names></name>
<etal/>
</person-group>. 
<article-title>Anomaly detection in radiotherapy plans using deep autoencoder networks</article-title>. <source>Front Oncol</source>. (<year>2023</year>) <volume>13</volume>:<elocation-id>1142947</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fonc.2023.1142947</pub-id>, PMID: <pub-id pub-id-type="pmid">36998450</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<label>15</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Huang</surname> <given-names>P</given-names></name>
<name><surname>Shang</surname> <given-names>J</given-names></name>
<name><surname>Fan</surname> <given-names>Y</given-names></name>
<name><surname>Hu</surname> <given-names>Z</given-names></name>
<name><surname>Dai</surname> <given-names>J</given-names></name>
<name><surname>Liu</surname> <given-names>Z</given-names></name>
<etal/>
</person-group>. 
<article-title>Unsupervised machine learning model for detecting anomalous volumetric modulated arc therapy plans for lung cancer patients</article-title>. <source>Front Big Data</source>. (<year>2024</year>) <volume>7</volume>:<elocation-id>1462745</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fdata.2024.1462745</pub-id>, PMID: <pub-id pub-id-type="pmid">39421134</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<label>16</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chalapathy</surname> <given-names>R</given-names></name>
<name><surname>Chawla</surname> <given-names>S</given-names></name>
</person-group>. 
<article-title>Deep learning for anomaly detection: A survey</article-title>. <source>arXiv</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1901.03407</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<label>17</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Kingma</surname> <given-names>DP</given-names></name>
<name><surname>Welling</surname> <given-names>M</given-names></name>
</person-group>. 
<article-title>Auto-encoding variational bayes</article-title>. <source>arXiv</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1312.6114</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<label>18</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Breunig</surname> <given-names>MM</given-names></name>
<name><surname>Kriegel</surname> <given-names>HP</given-names></name>
<name><surname>Ng</surname> <given-names>RT</given-names></name>
<name><surname>Sander</surname> <given-names>J</given-names></name>
</person-group>. 
<article-title>LOF: identifying density-based local outliers</article-title>. <source>SIGMOD Rec</source>. (<year>2000</year>) <volume>29</volume>, <fpage>93</fpage>&#x2013;<lpage>104</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1145/335191.335388</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<label>19</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>FT</given-names></name>
<name><surname>Ting</surname> <given-names>KM</given-names></name>
<name><surname>Zhou</surname> <given-names>ZH</given-names></name>
</person-group>. (<year>2008</year>). 
<article-title>Isolation forest</article-title>, in: <conf-name>2008 Eighth IEEE International Conference on Data Mining</conf-name>, <conf-loc>Pisa, Italy</conf-loc>. pp. <page-range>413&#x2013;22</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ICDM.2008.17</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<label>20</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sch&#xf6;lkopf</surname> <given-names>B</given-names></name>
<name><surname>Platt</surname> <given-names>JC</given-names></name>
<name><surname>Shawe-Taylor</surname> <given-names>J</given-names></name>
<name><surname>Smola</surname> <given-names>AJ</given-names></name>
<name><surname>Williamson</surname> <given-names>RC</given-names></name>
</person-group>. 
<article-title>Estimating the support of a high-dimensional distribution</article-title>. <source>Neural Comput</source>. (<year>2001</year>) <volume>13</volume>:<page-range>1443&#x2013;71</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1162/089976601750264965</pub-id>, PMID: <pub-id pub-id-type="pmid">11440593</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<label>21</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Pimentel</surname> <given-names>MAF</given-names></name>
<name><surname>Clifton</surname> <given-names>DA</given-names></name>
<name><surname>Clifton</surname> <given-names>L</given-names></name>
<name><surname>Tarassenko</surname> <given-names>L</given-names></name>
</person-group>. 
<article-title>A review of novelty detection</article-title>. <source>Signal Process</source>. (<year>2014</year>) <volume>99</volume>:<page-range>215&#x2013;49</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.sigpro.2013.12.026</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<label>22</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Pang</surname> <given-names>G</given-names></name>
<name><surname>Shen</surname> <given-names>C</given-names></name>
<name><surname>Cao</surname> <given-names>L</given-names></name>
<name><surname>Van Den Hengel</surname> <given-names>A</given-names></name>
</person-group>. 
<article-title>Deep learning for anomaly detection: A review</article-title>. <source>ACM Comput Surv</source>. (<year>2021</year>) <volume>54</volume>:<fpage>38</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1145/3439950</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<label>23</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hunte</surname> <given-names>SO</given-names></name>
<name><surname>Clark</surname> <given-names>CH</given-names></name>
<name><surname>Zyuzikov</surname> <given-names>N</given-names></name>
<name><surname>Nisbet</surname> <given-names>A</given-names></name>
</person-group>. 
<article-title>Volumetric modulated arc therapy (VMAT): a review of clinical outcomes-what is the clinical evidence for the most effective implementation</article-title>? <source>Br J Radiol</source>. (<year>2022</year>) <volume>95</volume>:<elocation-id>20201289</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1259/bjr.20201289</pub-id>, PMID: <pub-id pub-id-type="pmid">35616646</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<label>24</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bissonnette</surname> <given-names>JP</given-names></name>
<name><surname>Moseley</surname> <given-names>DJ</given-names></name>
<name><surname>White</surname> <given-names>EA</given-names></name>
<name><surname>Sharpe</surname> <given-names>MB</given-names></name>
<name><surname>Purdie</surname> <given-names>TG</given-names></name>
<name><surname>Jaffray</surname> <given-names>DA</given-names></name>
<etal/>
</person-group>. 
<article-title>Quality assurance for image-guided radiation therapy utilizing CT-based technologies: A report of the AAPM TG-179</article-title>. <source>Med Phys</source>. (<year>2012</year>) <volume>39</volume>:<page-range>1946&#x2013;63</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1118/1.3690466</pub-id>, PMID: <pub-id pub-id-type="pmid">22482616</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<label>25</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Buzzi</surname> <given-names>S</given-names></name>
<name><surname>Mancosu</surname> <given-names>P</given-names></name>
<name><surname>Bresolin</surname> <given-names>A</given-names></name>
<name><surname>Gallo</surname> <given-names>P</given-names></name>
<name><surname>La Fauci</surname> <given-names>F</given-names></name>
<name><surname>Lobefalo</surname> <given-names>F</given-names></name>
<etal/>
</person-group>. 
<article-title>A Time-Series Approach for Machine Learning-Based Patient-Specific Quality Assurance of Radiosurgery Plans</article-title>. <source>Bioengineering</source>. (<year>2025</year>) <volume>12</volume>:<elocation-id>897</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/bioengineering12080897</pub-id>, PMID: <pub-id pub-id-type="pmid">40868410</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<label>26</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Claessens</surname> <given-names>M</given-names></name>
<name><surname>Seller Oria</surname> <given-names>C</given-names></name>
<name><surname>Brouwer</surname> <given-names>CL</given-names></name>
<name><surname>Ziemer</surname> <given-names>BP</given-names></name>
<name><surname>Scholey</surname> <given-names>JE</given-names></name>
<name><surname>Lin</surname> <given-names>H</given-names></name>
<etal/>
</person-group>. 
<article-title>Quality Assurance for AI-Based Applications in Radiation Therapy</article-title>. <source>Semin Radiat Oncol.</source>. (<year>2022</year>) <volume>32</volume>:<page-range>421&#x2013;31</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.semradonc.2022.06.011</pub-id>, PMID: <pub-id pub-id-type="pmid">36202444</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<label>27</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yu</surname> <given-names>Cai</given-names></name>
<name><surname>Zhang</surname> <given-names>Weiwen</given-names></name>
<name><surname>Chen</surname> <given-names>Hao</given-names></name>
<name><surname>Cheng</surname> <given-names>KT</given-names></name>
</person-group>. 
<article-title>MedIAnomaly: A comparative study of anomaly detection in medical images</article-title>. <source>Med Image Anal</source>. (<year>2025</year>) <volume>102</volume>:<elocation-id>103500</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.media.2025.103500</pub-id>, PMID: <pub-id pub-id-type="pmid">40009901</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<label>28</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Schlegl</surname> <given-names>T</given-names></name>
<name><surname>Seebck</surname> <given-names>P</given-names></name>
<name><surname>Waldstein</surname> <given-names>SM</given-names></name>
<name><surname>Schmidt-Erfurth</surname> <given-names>U</given-names></name>
<name><surname>Langs</surname> <given-names>G</given-names></name>
</person-group>. <source>Unsupervised Anomaly Detection with Generative Adversarial Networks to Guide Marker Discovery</source>. <publisher-loc>Cham</publisher-loc>: 
<publisher-name>Springer</publisher-name>. (<year>2017</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-3-319-59050-9_12</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<label>29</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>G</given-names></name>
<name><surname>Ballangrud</surname> <given-names>&#xc5;M</given-names></name>
<name><surname>Kuo</surname> <given-names>LC</given-names></name>
<name><surname>Kang</surname> <given-names>H</given-names></name>
<name><surname>Mechalakos</surname> <given-names>J</given-names></name>
<name><surname>Lovelock</surname> <given-names>DM</given-names></name>
<etal/>
</person-group>. 
<article-title>AAPM Task Group 147: Quality assurance for non-radiographic radiotherapy localization and positioning systems</article-title>. <source>Med Phys</source>. (<year>2012</year>) <volume>39</volume>:<page-range>6459&#x2013;79</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1118/1.4747261</pub-id>, PMID: <pub-id pub-id-type="pmid">23039630</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<label>30</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>AlHallaq</surname> <given-names>HA</given-names></name>
<name><surname>Cervi&#xf1;o</surname> <given-names>LI</given-names></name>
<name><surname>Kisling</surname> <given-names>KD</given-names></name>
<name><surname>Followill</surname> <given-names>DS</given-names></name>
<name><surname>Gayou</surname> <given-names>O</given-names></name>
<name><surname>Phillips</surname> <given-names>J</given-names></name>
<etal/>
</person-group>. 
<article-title>AAPM task group 302: surface-guided radiotherapy (SGRT)</article-title>. <source>Med Phys</source>. (<year>2022</year>) <volume>49</volume>:<fpage>e15532</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/mp.15532</pub-id>, PMID: <pub-id pub-id-type="pmid">35179229</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/20709">Timothy James Kinsella</ext-link>, Brown University, United States</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/800480">Eric N. Carver</ext-link>, Brown University, United States</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3316876">Z. Michael Li</ext-link>, Manteia Technologies Co., Ltd, China</p></fn>
</fn-group>
</back>
</article>