<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Oncol.</journal-id>
<journal-title>Frontiers in Oncology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Oncol.</abbrev-journal-title>
<issn pub-type="epub">2234-943X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fonc.2024.1390398</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Oncology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Spatiotemporal correlation enhanced real-time 4D-CBCT imaging using convolutional LSTM networks</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Zhang</surname>
<given-names>Hua</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1031139"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Chen</surname>
<given-names>Kai</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Xu</surname>
<given-names>Xiaotong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>You</surname>
<given-names>Tao</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1166451"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Sun</surname>
<given-names>Wenzheng</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1417151"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Dang</surname>
<given-names>Jun</given-names>
</name>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/847156"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>School of Biomedical Engineering, Southern Medical University</institution>, <addr-line>Guang Zhou, Guangdong</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Guangdong Provincial Key Laboratory of Medical Image Processing, Southern Medical University</institution>, <addr-line>Guang Zhou, Guangdong</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>School of Artificial Intelligence, Chongqing University of Technology</institution>, <addr-line>Chongqing</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Department of Radiation Oncology, The Affiliated Hospital of Jiangsu University</institution>, <addr-line>Zhenjiang, Jiangsu</addr-line>, <country>China</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Department of Radiation Oncology, The Second Affiliated Hospital, School of Medicine, Zhejiang University</institution>, <addr-line>Hangzhou, Zhejiang</addr-line>, <country>China</country>
</aff>
<aff id="aff6">
<sup>6</sup>
<institution>Department of Radiation Oncology, National Cancer Center/National Clinical Research Center for Cancer/Cancer Hospital &amp; Shenzhen Hospital, Chinese Academy of Medical Sciences and Peking Union Medical College</institution>, <addr-line>Shenzhen</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Eric Edward Sigmund, New York University, United States</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Bin Li, Sun Yat-sen University Cancer Center (SYSUCC), China</p>
<p>Zhenhui Dai, Guangzhou University of Chinese Medicine, China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Wenzheng Sun, <email xlink:href="mailto:sunwenzheng@zju.edu.cn">sunwenzheng@zju.edu.cn</email>; Jun Dang, <email xlink:href="mailto:dangjun@cicams-sz.org.cn">dangjun@cicams-sz.org.cn</email>
</p>
</fn>
<fn fn-type="equal" id="fn003">
<p>&#x2020;These authors have contributed equally to this work</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>05</day>
<month>08</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>14</volume>
<elocation-id>1390398</elocation-id>
<history>
<date date-type="received">
<day>23</day>
<month>02</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>01</day>
<month>07</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Zhang, Chen, Xu, You, Sun and Dang</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Zhang, Chen, Xu, You, Sun and Dang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec>
<title>Purpose</title>
<p>To enhance the accuracy of real-time four-dimensional cone beam CT (4D-CBCT) imaging by incorporating spatiotemporal correlation from the sequential projection image into the single projection-based 4D-CBCT estimation process.</p>
</sec>
<sec>
<title>Methods</title>
<p>We first derived 4D deformation vector fields (DVFs) from patient 4D-CT. Principal component analysis (PCA) was then employed to extract distinctive feature labels for each DVF, focusing on the first three PCA coefficients. To simulate a wide range of respiratory motion, we expanded the motion amplitude and used random sampling to generate approximately 900 sets of PCA labels. These labels were used to produce 900 simulated 4D-DVFs, which in turn deformed the 0% phase 4D-CT to obtain 900 CBCT volumes with continuous motion amplitudes. Following this, the forward projection was performed at one angle to get all of the digital reconstructed radiographs (DRRs). These DRRs and the PCA labels were used as the training data set. To capture the spatiotemporal correlation in the projections, we propose to use the convolutional LSTM (ConvLSTM) network for PCA coefficient estimation. For network testing, when several online CBCT projections (with different motion amplitudes that cover the full respiration range) are acquired and sent into the network, the corresponding 4D-PCA coefficients will be obtained and finally lead to a full online 4D-CBCT prediction. A phantom experiment is first performed with the XCAT phantom; then, a pilot clinical evaluation is further conducted.</p>
</sec>
<sec>
<title>Results</title>
<p>Results on the XCAT phantom and the patient data show that the proposed approach outperformed other networks in terms of visual inspection and quantitative metrics. For the XCAT phantom experiment, ConvLSTM achieves the highest quantification accuracy with MAPE(Mean Absolute Percentage Error), PSNR (Peak Signal-to-Noise Ratio), and RMSE(Root Mean Squared Error) of 0.0459, 64.6742, and 0.0011, respectively. For the patient pilot clinical experiment, ConvLSTM also achieves the best quantification accuracy with that of 0.0934, 63.7294, and 0.0019, respectively. The quantification evaluation labels that we used are 1) the Mean Absolute Error (MAE), 2) the Normalized Cross Correlation (NCC), 3)the Structural Similarity Index Measurement(SSIM), 4)the Peak Signal-to-Noise Ratio (PSNR), 5)the Root Mean Squared Error(RMSE), and 6) the Absolute Percentage Error (MAPE).</p>
</sec>
<sec>
<title>Conclusion</title>
<p>The spatiotemporal correlation-based respiration motion modeling supplied a potential solution for accurate real-time 4D-CBCT reconstruction.</p>
</sec>
</abstract>
<kwd-group>
<kwd>ConvLSTM</kwd>
<kwd>PCA</kwd>
<kwd>radiation therapy</kwd>
<kwd>4D-CBCT</kwd>
<kwd>spatiotemporal</kwd>
</kwd-group>
<counts>
<fig-count count="7"/>
<table-count count="6"/>
<equation-count count="17"/>
<ref-count count="22"/>
<page-count count="10"/>
<word-count count="4512"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Cancer Imaging and Image-directed Interventions</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Stereotactic radiotherapy (SBRT) is commonly used in routine clinical radiation therapy circumstances, especially for early-stage cancer such as lung cancer (<xref ref-type="bibr" rid="B1">1</xref>). The high dose rate of the SBRT beam also brings high risk for moving targets (e.g., lung cancer). Hence, accurate image guidance plays a crucial role in precise lung SBRT. In clinical routine, the most common image guidance tool is the integrated 3D Cone Beam CT (CBCT) imaging system (<xref ref-type="bibr" rid="B2">2</xref>). However, conventional static 3D-CBCT is unable to provide qualified 4D lung motion during respiration.</p>
<p>Four-dimensional cone beam CT (4D-CBCT) imaging has been developed to address this issue. 4D-CBCT can supply temporal image sequences for moving organs such as the lung. Conventional analytical 4D-CBCT methods, such as the McKinnon&#x2013;Bates (MKB) algorithm, are widely used in commercial linear accelerators. However, the image quality suffered from reduced contrast and the inevitable motion blurring induced by the time-averaged prior image (<xref ref-type="bibr" rid="B3">3</xref>). Another type of 4D-CBCT reconstruction method is the image deformation-based scheme (<xref ref-type="bibr" rid="B4">4</xref>). For these kinds of methods, the deformation vector fields (DVFs) calculation/estimation between the 0% phase and each other phase is critical to achieve the final accurate 4D-CBCT. The DVF optimization process is quite time consuming, and it raises a blind treatment risk for initiating radiation pneumonia (<xref ref-type="bibr" rid="B5">5</xref>). Both the above-mentioned analytical and deformable-based 4D-CBCT reconstructions all use the full 360&#xb0; range acquired projections. Recently, online real-time CBCT estimation/reconstruction via single or only a few X-ray projections has attracted more interest. It benefits oncologists not only fast but also pretty low-dose real-time 4D-CBCT images compared with the conventional full projection-based 3D-CBCT (<xref ref-type="bibr" rid="B6">6</xref>).</p>
<p>The 2D- to 4D-CBCT estimation has been previously studied by many groups in the past decades. Li (<xref ref-type="bibr" rid="B7">7</xref>) proposed a motion model (MM) to predict 4D-CBCT via forward matching between 3D volumes and 2D X-ray projections. You (<xref ref-type="bibr" rid="B8">8</xref>) reported a motion model free deformation (MM-FD) scheme to introduce free deformation alignment for promoting 4D-CBCT estimation accuracy. One limitation of these iterative approaches is that they are quite time consuming. On the other aspect, Xu (<xref ref-type="bibr" rid="B6">6</xref>) reported a linear model for predicting 4D-CBCT via DRR (Digital Reconstructed Radiography) and validated it with digital and physical phantom experiments. However, the proposed linear model mismatches with the complex relationship between the intensity variation and the real breathing motion. Wei (<xref ref-type="bibr" rid="B9">9</xref>, <xref ref-type="bibr" rid="B10">10</xref>) proposed a Convolutional Neural Network (CNN)-based framework to extract the motion feature from 2D DRRs to corresponding 3D-CBCT (e.g., one phase of 4D-CBCT). However, all of the aforementioned 4D-CBCT prediction strategies neglected the spatiotemporal correlation inherent in 4D-CBCT.</p>
<p>To address the issues, we propose a combined model that contains 1) a convolutional LSTM (ConvLSTM) and 2) a principal component analysis (PCA) model with prior 4D-CT to map a single 2D measured projection to one phase of 4D-CBCT. We evaluated the model&#x2019;s performance on both the XCAT phantom and pilot clinical data. Quantitative metrics are used for network performance quantification between our proposed method versus other state-of-the-art networks.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Methods</title>
<p>The overall workflow is illustrated in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>. In the training stage, the 4D-DVFs are first derived from the 4D-CT (between 0% phase and other phases) via the voxel-by-voxel image registration algorithms (<xref ref-type="bibr" rid="B11">11</xref>&#x2013;<xref ref-type="bibr" rid="B13">13</xref>). The DVFs then will be simply represented by the first few PCA coefficients. In our experiment, we chose the first three PCA coefficients. The PCA coefficient is further expanded to fully cover the potential possible motion range for simulation. We then performed random sampling and generated approximately 900 PCA coefficient groups. These groups will be used to create the corresponding 900 DVFs, which will in turn generate 900 deformed 4D-CT images with varying respiratory motions. Finally, a forward projection will be performed at a single angle for all 900 4D-CT images to acquire 900 DRRs. A ray-tracing algorithm (<xref ref-type="bibr" rid="B14">14</xref>, <xref ref-type="bibr" rid="B15">15</xref>) is used in the forward projection simulation process. The generated DRRs will be used to train the ConvLSTM network, which has three output labels representing the first three PCA-modeled coefficients labels.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>The workflow of the proposed method.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-14-1390398-g001.tif"/>
</fig>
<p>In the application stage, a single CBCT online projection that is measured at the same angle will be sent into the trained network. The network predicts three PCA labels to generate a phased 3D-CBCT. Then, more online projections (with different respiration amplitudes) will be continuously measured and sent into the network so that a whole respiration cycle will be covered. In this way, a full-cycle PCA label groups can be achieved and the whole 4D-CBCT. The entire process is performed on time. Below, we summarize our work into five parts: 1) motion modeling, 2) data processing, 3) network architecture, 4) loss function, and 5)&#xa0;experiment design.</p>
<sec id="s2_1">
<label>2.1</label>
<title>Motion modeling</title>
<p>As mentioned above, the 4D-DVF is initially obtained from 4D-CT via deformable image registration (<xref ref-type="bibr" rid="B11">11</xref>&#x2013;<xref ref-type="bibr" rid="B13">13</xref>). The 0% phase was selected as the reference phase to achieve the 4D-DVF. We used PCA, which is a commonly used data decoupling scheme for data dimension reduction (<xref ref-type="bibr" rid="B16">16</xref>), to extract DVF&#x2019;s feature label (e.g., the principle components/eigenvectors). For computational efficiency consideration, we select the first three PCA labels for mapping the DVFs. <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref> illustrates the accuracy of DVF estimation relative to the number of PCA labels used. As expected, DVF accuracy improves with an increasing number of PCA labels. However, this also increases computational complexity. We found that by using the first three principal components, it already achieved 97.22% DVF information. Further increasing the PCA labels will not dramatically increase the information anymore. Therefore, we chose to discard the remaining PCA labels in our experiment.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>PCA label versus DVF estimation accuracy.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Number of PCA labels</th>
<th valign="middle" align="left">information (%)</th>
<th valign="middle" align="left">Increment of <break/>information (%)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">1</td>
<td valign="middle" align="left">71.08</td>
<td valign="middle" align="left">71.02</td>
</tr>
<tr>
<td valign="middle" align="left">2</td>
<td valign="middle" align="left">87.37</td>
<td valign="middle" align="left">16.35</td>
</tr>
<tr>
<td valign="middle" align="left">3</td>
<td valign="middle" align="left">97.22</td>
<td valign="middle" align="left">9.85</td>
</tr>
<tr>
<td valign="middle" align="left">4</td>
<td valign="middle" align="left">98.24</td>
<td valign="middle" align="left">1.02</td>
</tr>
<tr>
<td valign="middle" align="left">5</td>
<td valign="middle" align="left">99.20</td>
<td valign="middle" align="left">0.96</td>
</tr>
<tr>
<td valign="middle" align="left">6</td>
<td valign="middle" align="left">99.62</td>
<td valign="middle" align="left">0.42</td>
</tr>
<tr>
<td valign="middle" align="left">7</td>
<td valign="middle" align="left">99.89</td>
<td valign="middle" align="left">0.27</td>
</tr>
<tr>
<td valign="middle" align="left">8</td>
<td valign="middle" align="left">100.00</td>
<td valign="middle" align="left">0.11</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The mapping relationship between the DVF and the PCA labels is given by <xref ref-type="disp-formula" rid="eq1">Formula 1</xref>. Let the DVF size set be 3&#xd7;<italic>N<sub>voxelCT</sub>
</italic>, where <italic>N<sub>voxelCT</sub>
</italic> stands for 3D-CT voxel number; 3 stands for the 3D motion. The DVF will be linearly mapped by <xref ref-type="disp-formula" rid="eq1">Equation 1</xref>:</p>
<disp-formula id="eq1">
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>V</mml:mi>
<mml:msup>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>k</mml:mi>
</mml:msubsup>
<mml:msubsup>
<mml:mi>p</mml:mi>
<mml:mi>j</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
<mml:msubsup>
<mml:mi>q</mml:mi>
<mml:mi>j</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Here, <italic>p</italic> and <italic>q</italic> stand for the eigenvectors and their corresponding PCA coefficients. Index <italic>i</italic> and <italic>j</italic> represent the respiration phase and eigenvectors, respectively.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Data processing</title>
<p>Being a regression task, ConvLSTM requires a large number of training data-set samples. In this study, we performed data augmentation and data enhancement. For data augmentation, we enlarged the simulated respiration amplitudes by a 15% interval up and down between two adjacent phases. This is because respiration is a time-continuous physiological motion. The concept of the 4D-CBCT phase is an average reconstruction for projections in one re-binned phase. The lung will move across the re-binned interface between two adjacent phases. Our extended motion amount covers just a bit more than the average motion range (<xref ref-type="bibr" rid="B7">7</xref>). This is to make sure all the possible motion amplitude will be modeled for training data generation. We perform PCA label random sampling to generate 900 DRRs as a training data set.</p>
<p>For data enhancement, we considered the influence of quantum noise in the simulated DRRs. Given that quantum noise is typically a combination of Poisson and Gaussian noise (<xref ref-type="bibr" rid="B17">17</xref>), we constructed a linear noise combination as follows see <xref ref-type="disp-formula" rid="eq2">Equation 2</xref>:</p>
<disp-formula id="eq2">
<label>(2)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>P</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mi>e</mml:mi>
<mml:mi>x</mml:mi>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:mi>G</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>e</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>
<inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the noise-free signal line integral; the index <inline-formula>
<mml:math display="inline" id="im2">
<mml:mi>N</mml:mi>
</mml:math>
</inline-formula> means the noise for each detector; <inline-formula>
<mml:math display="inline" id="im3">
<mml:mrow>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the X-ray projection intensity; and <inline-formula>
<mml:math display="inline" id="im4">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>e</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> represents background electronic noise. <italic>I<sub>0</sub>
</italic> and <inline-formula>
<mml:math display="inline" id="im5">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>e</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> are set to be 10<sup>5</sup> and 10, respectively. DRR was then added to the simulated noise to achieve the real projected image.</p>
<p>We also implemented an intensity correction scheme to minimize the intensity mismatch between the simulated training DRRs versus the measured CBCT projections. The correction is given by <xref ref-type="disp-formula" rid="eq3">Equation 3</xref>:</p>
<disp-formula id="eq3">
<label>(3)</label>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:msubsup>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>R</mml:mi>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mo>&#x2227;</mml:mo>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>R</mml:mi>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>j</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>R</mml:mi>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>j</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>R</mml:mi>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im6">
<mml:mrow>
<mml:msubsup>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>R</mml:mi>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mo>&#x2227;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> represents the corrected DRR intensity. <inline-formula>
<mml:math display="inline" id="im7">
<mml:mrow>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>R</mml:mi>
<mml:mi>R</mml:mi>
<mml:mo>&#xa0;</mml:mo>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im8">
<mml:mrow>
<mml:mo>&#xa0;</mml:mo>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>R</mml:mi>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> represent the mean and the standard deviation of the original DRR intensity, and <inline-formula>
<mml:math display="inline" id="im9">
<mml:mrow>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>j</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im10">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>j</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represent the mean and standard deviation of measured CBCT projection.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Network architecture</title>
<p>We use the ConvLSTM to explore the nonlinear mapping between DRRs and the PCA coefficients. The network architecture is illustrated in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>. It contains a series of ConvLSTM cells and a regression layer.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>The ConvLSTM framework.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-14-1390398-g002.tif"/>
</fig>
<p>Conventional LSTM (<xref ref-type="bibr" rid="B18">18</xref>) contains a memory cell (<inline-formula>
<mml:math display="inline" id="im11">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) and three gate control cells: 1) the forget grate (<inline-formula>
<mml:math display="inline" id="im12">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>), 2) the input gate (<inline-formula>
<mml:math display="inline" id="im13">
<mml:mrow>
<mml:msub>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>), and 3) the output gate (<inline-formula>
<mml:math display="inline" id="im14">
<mml:mrow>
<mml:msub>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>). <inline-formula>
<mml:math display="inline" id="im15">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> stores the foregone information, and the three gates update the cell. The LSTM sorts the relationships between all of the time flags; meanwhile, it ignores the internal information within each time flag. However, ConvLSTM (<xref ref-type="bibr" rid="B19">19</xref>), instead, explores the local features within each time flag via the convolutional operators. For the <italic>t<sup>th</sup>
</italic> ConvLSTM cell, the internal operations will be represented by (<xref ref-type="bibr" rid="B19">19</xref>), see <xref ref-type="disp-formula" rid="eq4">Equations 4</xref>&#x2013;<xref ref-type="disp-formula" rid="eq9">9</xref>:</p>
<disp-formula id="eq4">
<label>(4)</label>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:msub>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2217;</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2217;</mml:mo>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq5">
<label>(5)</label>
<mml:math display="block" id="M5">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2217;</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2217;</mml:mo>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq6">
<label>(6)</label>
<mml:math display="block" id="M6">
<mml:mrow>
<mml:msub>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mi>o</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2217;</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>o</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2217;</mml:mo>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>o</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq7">
<label>(7)</label>
<mml:math display="block" id="M7">
<mml:mrow>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2217;</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2217;</mml:mo>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>g</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq8">
<label>(8)</label>
<mml:math display="block" id="M8">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2218;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2218;</mml:mo>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq9">
<label>(9)</label>
<mml:math display="block" id="M9">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2218;</mml:mo>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>h</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>
<italic>&#x3c3;</italic> is the sigmoid function, <inline-formula>
<mml:math display="inline" id="im16">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> stands for the TanHyperbolic function, &#x2217; and <inline-formula>
<mml:math display="inline" id="im17">
<mml:mo>&#x2218;</mml:mo>
</mml:math>
</inline-formula> represent the convolutional operator and Hadamard product, respectively. <inline-formula>
<mml:math display="inline" id="im18">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the input of the current cell, and <inline-formula>
<mml:math display="inline" id="im19">
<mml:mrow>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is a candidate storage unit for information transmission. In addition, <italic>W</italic> and <italic>b</italic> denote convolution kernels and the bias terms. <italic>W</italic> and <italic>b</italic> have obvious meanings. For instance, <inline-formula>
<mml:math display="inline" id="im20">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mi>o</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the input&#x2013;output gate convolution kernel, while <inline-formula>
<mml:math display="inline" id="im21">
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the input gate bias, etc.</p>
<p>Due to the characteristic of the convolutional operator, ConvLSTM can acquire both temporal and spatial information simultaneously (<xref ref-type="bibr" rid="B19">19</xref>&#x2013;<xref ref-type="bibr" rid="B22">22</xref>). Our ConvLSTM network contains 40 hidden layers and 20 cell layers. Moreover, it has eight layers, kernel size is 3, padding is set as &#x201c;valid&#x201d;, and the stride of the convolution kernel is 1.</p>
<p>The regression layer uses the feature map generated from ConvLSTM to predict PCA coefficients. It contains a pooling layer with two fully connected layers. By using the dominant local information, the pooling layer reduces the computation cost. The pooling was set to twice the down-sampling, and the dimensions of the two completely connected layers are 1,024 and 3.</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Loss function</title>
<p>The normalized mean square error builds the loss function and is given in <xref ref-type="disp-formula" rid="eq5">Formula 5</xref>. The PCA coefficients (e.g., output labels in the network) in the loss function (see <xref ref-type="disp-formula" rid="eq10">Equation 10</xref>) ensured that the first coefficient has the highest estimation accuracy.</p>
<disp-formula id="eq10">
<label>(10)</label>
<mml:math display="block" id="M10">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>s</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>N</mml:mi>
</mml:mfrac>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:msubsup>
<mml:mo>&#x2016;</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2218;</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:msub>
<mml:mo>&#x2016;</mml:mo>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<p>
<italic>N</italic> is the training sample number; <inline-formula>   <mml:math display="inline" id="im22">
<mml:mrow>
<mml:mrow>
<mml:mo>&#x2016;</mml:mo>
<mml:msub>
<mml:mo>&#x2016;</mml:mo>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the L<sub>2</sub> norm, and <italic>o</italic> is the element-wise product. <inline-formula>
<mml:math display="inline" id="im23">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the output of the regression model. <inline-formula>
<mml:math display="inline" id="im24">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the <italic>i</italic>
<sup>th</sup> training image, <inline-formula>
<mml:math display="inline" id="im25">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the PCA coefficient, and W is the network parameters. <inline-formula>
<mml:math display="inline" id="im26">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the PCA coefficients weight, which is set to be [<inline-formula>
<mml:math display="inline" id="im27">
<mml:mrow>
<mml:mfrac>
<mml:mn>2</mml:mn>
<mml:mrow>
<mml:msqrt>
<mml:mn>6</mml:mn>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:msqrt>
<mml:mn>6</mml:mn>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:msqrt>
<mml:mn>6</mml:mn>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula>].</p>
<p>For model training, the ADAM optimizer was utilized with a dynamic learning rate, initially set at 0.001. The batch size was set to 8, and the training ran for 200 epochs. In an environment configured with Python 3.7 and an NVIDIA GeForce RTX 4080, training the data for 200 epochs took approximately 36&#xa0;h.</p>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>Experiment design</title>
<p>For network performance evaluation, we use XCAT phantom and patient 4D-CT for the quantification. For testing, we simulated an on-board CBCT projection and then sent it into the pre-trained network to predict PCA coefficients. The quantification evaluation labels that we used are 1) the Mean Absolute Error (MAE), 2) the Normalized Cross Correlation (NCC), 3) the Multi-scale Structural Similarity(SSIM), 4) the Peak Signal-to-Noise Ratio (PSNR), 5) the Root Mean Squared Error (RMSE), and 6) the Absolute Percentage Error (MAPE). MAE is used to quantify the accuracy of regression models. <italic>y</italic> and <inline-formula>
<mml:math display="inline" id="im28">
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
</mml:math>
</inline-formula> represent the label and the predicted value of the model, and <italic>i</italic> stands for the index of the regression model. We have in <xref ref-type="disp-formula" rid="eq11">Equation 11</xref>:</p>
<disp-formula id="eq11">
<label>(11)</label>
<mml:math display="block" id="M11">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>m</mml:mi>
</mml:mfrac>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>m</mml:mi>
</mml:msubsup>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo>|</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In addition, NCC and SSIM (Multi-scale Structural Similarity Index Measure) are used to evaluate the quality of the reconstructed image. See <xref ref-type="disp-formula" rid="eq12">Equations 12</xref> and <xref ref-type="disp-formula" rid="eq13">13</xref>. <italic>S</italic> and <italic>T</italic> represent slice data with size of <italic>H</italic>&#xd7;<italic>W</italic> of the original image and the reconstructed image, respectively. <inline-formula>
<mml:math display="inline" id="im29">
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula>
<mml:math display="inline" id="im30">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b4;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represent the mean, covariance, and variance of the slice image, respectively.</p>
<disp-formula id="eq12">
<label>(12)</label>
<mml:math display="block" id="M12">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>C</mml:mi>
</mml:msubsup>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>H</mml:mi>
</mml:msubsup>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>W</mml:mi>
</mml:msubsup>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>|</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>|</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:msqrt>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>H</mml:mi>
</mml:msubsup>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>W</mml:mi>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq13">
<label>(13)</label>
<mml:math display="block" id="M13">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>M</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:msub>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:msub>
<mml:mi>&#x3b4;</mml:mi>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>s</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>T</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:msubsup>
<mml:mi>&#x3b4;</mml:mi>
<mml:mi>S</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mi>&#x3b4;</mml:mi>
<mml:mi>T</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
<mml:mo>=</mml:mo>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#xb7;</mml:mo>
<mml:mi>c</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>PSNR is defined based on MSE (Mean Squared Error). See <xref ref-type="disp-formula" rid="eq14">Equations 14</xref> and <xref ref-type="disp-formula" rid="eq15">15</xref>:</p>
<disp-formula id="eq14">
<label>(14)</label>
<mml:math display="block" id="M14">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>N</mml:mi>
</mml:mfrac>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x2016;</mml:mo>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:msup>
<mml:mo>&#x2016;</mml:mo>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq15">
<label>(15)</label>
<mml:math display="block" id="M15">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>N</mml:mi>
<mml:mi>R</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>10</mml:mn>
<mml:mo>&#x2217;</mml:mo>
<mml:mi>l</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>&#x261;</mml:mi>
<mml:mn>10</mml:mn>
<mml:mfrac>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>A</mml:mi>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>N is the image pixel number. MAX is the maximum possible pixel value.</p>
<p>The definition of RMSE is given in <xref ref-type="disp-formula" rid="eq16">Equation 16</xref>:</p>
<disp-formula id="eq16">
<label>(16)</label>
<mml:math display="block" id="M16">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>M</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>=</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>M</mml:mi>
</mml:msubsup>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:msubsup>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo>&#x2217;</mml:mo>
</mml:msubsup>
<mml:msup>
<mml:mo stretchy="false">)</mml:mo>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:math>
</disp-formula>
<p>MAPE is the average ratio of the absolute difference between the predicted value and the true value to the true value. The definition of MAPE is given in <xref ref-type="disp-formula" rid="eq17">Equation 17</xref>:</p>
<disp-formula id="eq17">
<label>(17)</label>
<mml:math display="block" id="M17">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>n</mml:mi>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
<mml:mo>|</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Network parameter optimization</title>
<p>Being a spatiotemporal sensitive network, the temporal continuous image amount that the network can handle for data training reflects its ability for accurate motion estimation. However, <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref> indicates that the model prediction accuracy is not dramatically influenced by the input image number. The MAE values fluctuate between 47 and 57, and the SSIM remains approximately 0.93. We found that the model achieves the best performance with four continuous temporal images with the lowest MAE of 47.15 and highest SSIM of 0.95.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Input image quantity vs. MAE/SSIM of model prediction.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-14-1390398-g003.tif"/>
</fig>
<p>The selection of hyper-parameters for the ConvLSTM network was a critical aspect, as these parameters significantly impact the prediction performance of the model. To determine the optimal configuration, we conducted a series of ablation experiments focusing on the number of hidden layers and cell layers within the ConvLSTM network. The experiment results in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref> reveal that increasing the number of hidden layers decreased the MAE without significantly affecting computation time, although it did increase the number of parameters. Conversely, increasing the number of cell layers resulted in a slower decrease in MAE and an increase in computation time, with little change in parameter count. By balancing these factors, we determined that a configuration with 40 hidden layers and two cell layers provided the optimal trade-off, ensuring high prediction accuracy while maintaining computational efficiency.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Influence of ConvLSTM cells on the model&#x2019;s prediction. &#x201c;H&#x201d; stands for the number of hidden layers; &#x201c;L&#x201d; denotes the number of cell layers.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-14-1390398-g004.tif"/>
</fig>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Convergence of loss function</title>
<p>The convergence of the loss function is decided by the weightings. <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref> shows the convergence comparison caused by different weightings. Their MAE and NCC values are also summarized in the table. We found that the second group weighting (e.g., [<inline-formula>
<mml:math display="inline" id="im42">
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mo stretchy="false">/</mml:mo>
<mml:msqrt>
<mml:mn>6</mml:mn>
</mml:msqrt>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im43">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo stretchy="false">/</mml:mo>
<mml:msqrt>
<mml:mn>6</mml:mn>
</mml:msqrt>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im44">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo stretchy="false">/</mml:mo>
<mml:msqrt>
<mml:mn>6</mml:mn>
</mml:msqrt>
</mml:mrow>
</mml:math>
</inline-formula>]) has the smallest first PCA label error. Meanwhile, this group also got the highest NCC.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Weighting influence on MAE/NCC.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="left">loss function<break/>weighting</th>
<th valign="middle" colspan="3" align="left">MAE</th>
<th valign="middle" rowspan="2" align="left">NCC</th>
</tr>
<tr>
<th valign="middle" align="left">1st</th>
<th valign="middle" align="left">2nd</th>
<th valign="middle" align="left">3<sup>rd</sup>
</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">[<inline-formula>
<mml:math display="inline" id="im31">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo stretchy="false">/</mml:mo>
<mml:msqrt>
<mml:mn>3</mml:mn>
</mml:msqrt>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im32">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo stretchy="false">/</mml:mo>
<mml:msqrt>
<mml:mn>3</mml:mn>
</mml:msqrt>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im33">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo stretchy="false">/</mml:mo>
<mml:msqrt>
<mml:mn>3</mml:mn>
</mml:msqrt>
</mml:mrow>
</mml:math>
</inline-formula>]</td>
<td valign="middle" align="left">9.09</td>
<td valign="middle" align="left">9.23</td>
<td valign="middle" align="left">9.36</td>
<td valign="middle" align="left">0.96</td>
</tr>
<tr>
<td valign="middle" align="left">[<inline-formula>
<mml:math display="inline" id="im34">
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mo stretchy="false">/</mml:mo>
<mml:msqrt>
<mml:mn>6</mml:mn>
</mml:msqrt>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im35">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo stretchy="false">/</mml:mo>
<mml:msqrt>
<mml:mn>6</mml:mn>
</mml:msqrt>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im36">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo stretchy="false">/</mml:mo>
<mml:msqrt>
<mml:mn>6</mml:mn>
</mml:msqrt>
</mml:mrow>
</mml:math>
</inline-formula>]</td>
<td valign="middle" align="left">6.29</td>
<td valign="middle" align="left">9.81</td>
<td valign="middle" align="left">10.06</td>
<td valign="middle" align="left">0.98</td>
</tr>
<tr>
<td valign="middle" align="left">[<inline-formula>
<mml:math display="inline" id="im37">
<mml:mrow>
<mml:msqrt>
<mml:mn>3</mml:mn>
</mml:msqrt>
</mml:mrow>
</mml:math>
</inline-formula>/<inline-formula>
<mml:math display="inline" id="im38">
<mml:mrow>
<mml:msqrt>
<mml:mn>6</mml:mn>
</mml:msqrt>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im39">
<mml:mrow>
<mml:msqrt>
<mml:mn>2</mml:mn>
</mml:msqrt>
</mml:mrow>
</mml:math>
</inline-formula>/<inline-formula>
<mml:math display="inline" id="im40">
<mml:mrow>
<mml:msqrt>
<mml:mn>6</mml:mn>
</mml:msqrt>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im41">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo stretchy="false">/</mml:mo>
<mml:msqrt>
<mml:mn>6</mml:mn>
</mml:msqrt>
</mml:mrow>
</mml:math>
</inline-formula>]</td>
<td valign="middle" align="left">8.01</td>
<td valign="middle" align="left">9.19</td>
<td valign="middle" align="left">10.01</td>
<td valign="middle" align="left">0.96</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Suitable choice of the pooling will also speed up loss function convergence. See <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5</bold>
</xref>. The figure compared loss convergence curve with epoch with different pooling scheme such as Maximal pooling, Converlutional pooling, average pooling, and even no pooling at all. The results show that convolutional pooling achieves the best convergence performance. The pooling operation reduces the model&#x2019;s parameters, hence accelerating its convergence.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Training results by using different pooling optimizations. These pooling operations are set to twice the down-sampling, and the model only performs a single pooling operation.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-14-1390398-g005.tif"/>
</fig>
<p>Suitable choice of pooling will also speed up loss function convergence. <xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6</bold>
</xref> compares the loss convergence curve with different pooling schemes such as maximal pooling, convolutional pooling, average pooling, and even no pooling. The results show that convolutional pooling achieves the best convergence performance. The pooling operation reduces the model&#x2019;s parameters, hence accelerating its convergence.</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Visualization of images result of TestCase1 in different anatomical surfaces for each model with the training data generated from XCAT. <bold>(A)</bold> Coronal plane; <bold>(B)</bold> sagittal plane.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-14-1390398-g006.tif"/>
</fig>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>XCAT simulation results</title>
<p>The XCAT phantom-based digital experiment was first performed. Four state-of-art network structures (e.g., CNN/Unet/ResNet/ConvLSTM) were tested with the phantom to compare their performances. As shown in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>, for the two test cases, the ConvLSTM outperforms other models in PCA coefficient prediction, especially for the first coefficient. The bold values provided in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref> means that ConvLSTM achieves the best PCA coefficient match compared with that of the ground truth for XCAT phantom. By utilizing PCA to reduce the dimensionality of the DVFs, the ConvLSTM network focuses on the most significant components of respiratory motion. This not only improves computational efficiency but also ensures that the network is learning the most relevant features for accurate motion prediction. <xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6</bold>
</xref> presents the reconstructed results based on the PCA coefficients predicted by ConvLSTM versus CNN/UNet/ResNet. The reconstructed coronal plane and sagittal plane images and the different images between each reconstruction and the ground truth image are summarized in <xref ref-type="fig" rid="f6">
<bold>Figures&#xa0;6A, B</bold>
</xref>.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Comparison of prediction results versus ground truth of XCAT data.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Model</th>
<th valign="middle" colspan="2" align="center">PCA coefficients</th>
</tr>
<tr>
<th valign="middle" align="center">Test Case1</th>
<th valign="middle" align="center">Test Case2</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">CNN</td>
<td valign="top" align="center">[&#x2212;1,121.3269 366.0489 &#x2212;114.1392]</td>
<td valign="top" align="center">[5,736.9881 &#x2212;391.8785 28.2141]</td>
</tr>
<tr>
<td valign="middle" align="center">Unet</td>
<td valign="top" align="center">[&#x2212;1,201.9354 394.0645 &#x2212;66.5190]</td>
<td valign="top" align="center">[5,723.7266 &#x2212;291.1034 56.5676]</td>
</tr>
<tr>
<td valign="middle" align="center">ResNet</td>
<td valign="top" align="center">[&#x2212;1,124.9048 378.9768 &#x2212;60.8043]</td>
<td valign="top" align="center">[5,610.0141 &#x2212;292.2644 90.5484]</td>
</tr>
<tr>
<td valign="middle" align="center">ConvLSTM</td>
<td valign="top" align="center">[<bold>&#x2212;1,173.5433</bold> 407.5900 &#x2212;53.5265]</td>
<td valign="top" align="center">[<bold>5,742.6875</bold> &#x2212;246.5791 181.1309]</td>
</tr>
<tr>
<td valign="middle" align="center">Ground Truth</td>
<td valign="top" align="center">[<bold>&#x2212;1,163.5334</bold> 454.6699 &#x2212;78.2698]</td>
<td valign="top" align="center">[<bold>5,787.5347</bold> &#x2212;258.0560 186.0697]</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Values in bold indicate that our proposed method achieves the best quantification results compared to the ground truth.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>
<xref ref-type="table" rid="T4">
<bold>Table&#xa0;4</bold>
</xref> summarizes the quantification evaluation comparison between each network. The results indicate that ConvLSTM outperformed other networks for all of the evaluation labels.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Quantification comparison of prediction and reconstruction of each model on the coronal plane in XCAT TestData1.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Model</th>
<th valign="top" align="left">MAPE</th>
<th valign="top" align="left">PSNR</th>
<th valign="top" align="left">RMSE</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">CNN</td>
<td valign="top" align="left">0.2092</td>
<td valign="top" align="left">55.0287</td>
<td valign="top" align="left">0.0024</td>
</tr>
<tr>
<td valign="top" align="left">UNet</td>
<td valign="top" align="left">0.0464</td>
<td valign="top" align="left">62.0018</td>
<td valign="top" align="left">0.0015</td>
</tr>
<tr>
<td valign="top" align="left">ResNet</td>
<td valign="top" align="left">0.0628</td>
<td valign="top" align="left">56.6748</td>
<td valign="top" align="left">0.0025</td>
</tr>
<tr>
<td valign="top" align="left">ConvLSTM</td>
<td valign="top" align="left">0.0459</td>
<td valign="top" align="left">64.6742</td>
<td valign="top" align="left">0.0011</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Pilot clinical results</title>
<p>
<xref ref-type="table" rid="T5">
<bold>Table&#xa0;5</bold>
</xref> shows two cases of the real and predicted first three PCA coefficients of the patient data results. It is well known that the higher the principal component order, the higher the PCA contribution rate. As can be seen from <xref ref-type="table" rid="T5">
<bold>Table&#xa0;5</bold>
</xref>, the first principal component of the model based on ConvLSTM is closest to the true value, just as the bold values illustrated. <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref> shows the reconstructed coronal images based on the PCA coefficients predicted by CNN/UNet/ResNet and ConvLSTM network. We can see that all models have successfully reconstructed the anatomical structures, but ConvLSTM achieves the smallest different image to the ground truth. <xref ref-type="table" rid="T6">
<bold>Table&#xa0;6</bold>
</xref> summarizes the quantification evaluation comparison between each network on the clinical TestCase1. According to the result, we can see that ConvLSTM supplies a prediction with the minimum error compared with the ground truth, certified that ConvLSTM outperformed other networks. Traditional CNNs and other networks mainly focus on spatial features, which limits their ability to accurately model dynamic processes like respiratory motion. The ConvLSTM&#x2019;s ability to integrate convolutional operations with LSTM&#x2019;s temporal processing allows it to effectively model the temporal evolution of respiratory motion, leading to more accurate 4D-CBCT reconstructions.</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>Comparison of prediction results versus ground truth of patient data.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Model</th>
<th valign="middle" colspan="2" align="center">PCA coefficients</th>
</tr>
<tr>
<th valign="middle" align="center">Test Case1</th>
<th valign="middle" align="center">Test Case2</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">CNN</td>
<td valign="top" align="center">[&#x2212;676.5737 &#x2212;36.4397 &#x2212;26.6990]</td>
<td valign="top" align="center">[&#x2212;87.5940 &#x2212;117.7669 12.5394]</td>
</tr>
<tr>
<td valign="middle" align="center">Unet</td>
<td valign="top" align="center">[&#x2212;747.2873 &#x2212;81.1768 &#x2212;34.4381]</td>
<td valign="top" align="center">[&#x2212;81.5389 &#x2212;102.9164 11.1525]</td>
</tr>
<tr>
<td valign="middle" align="center">ResNet</td>
<td valign="top" align="center">[&#x2212;673.7071 &#x2212;74.0461 &#x2212;21.3157]</td>
<td valign="top" align="center">[&#x2212;107.5652 &#x2212;120.5355 14.5815]</td>
</tr>
<tr>
<td valign="middle" align="center">ConvLSTM</td>
<td valign="top" align="center">[<bold>&#x2212;712.0823</bold> &#x2212;23.8481 &#x2212;45.7298]</td>
<td valign="top" align="center">[<bold>&#x2212;99.6298</bold> &#x2212;112.0357 20.4654]</td>
</tr>
<tr>
<td valign="middle" align="center">Ground Truth</td>
<td valign="top" align="center">[<bold>&#x2212;715.3792</bold> &#x2212;26.7257 &#x2212;20.5198]</td>
<td valign="top" align="center">[<bold>&#x2212;101.5152</bold> &#x2212;127.8026 19.3956]</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Values in bold indicate that our proposed method achieves the best quantification results compared to the ground truth.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Visualization of images result of TestCase1 for each model with the training data generated from 4D-CT.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-14-1390398-g007.tif"/>
</fig>
<table-wrap id="T6" position="float">
<label>Table&#xa0;6</label>
<caption>
<p>Quantification comparison of prediction and reconstruction of each model on the coronal plane in patient DataTest1.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center">Model</th>
<th valign="top" align="left">MAPE</th>
<th valign="top" align="left">PSNR</th>
<th valign="top" align="left">RMSE</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">CNN</td>
<td valign="top" align="left">0.2206</td>
<td valign="top" align="left">57.8427</td>
<td valign="top" align="left">0.0037</td>
</tr>
<tr>
<td valign="top" align="left">UNet</td>
<td valign="top" align="left">0.3313</td>
<td valign="top" align="left">53.6098</td>
<td valign="top" align="left">0.0060</td>
</tr>
<tr>
<td valign="top" align="left">ResNet</td>
<td valign="top" align="left">0.2706</td>
<td valign="top" align="left">55.4795</td>
<td valign="top" align="left">0.0048</td>
</tr>
<tr>
<td valign="top" align="left">ConvLSTM</td>
<td valign="top" align="left">0.0934</td>
<td valign="top" align="left">63.7294</td>
<td valign="top" align="left">0.0019</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>In this study, we proposed a spatiotemporal consistent scheme via ConvLSTM and PCA motion modeling to estimate online 4D-CBCT. The network learns the motion features from patient 4D-CT with hundreds of simulated DRRs under a fixed angle. Both digital XCAT phantom experiments and pilot clinical studies were performed to prove the algorithm&#x2019;s efficiency. We compared our proposed method&#x2019;s efficiency with other popular networks such as CNN/Unet/ResNet. Quantification results indicate that ConvLSTM outperforms its competitors. ConvLSTM is an architecture that integrates Convolutional Neural Networks (CNN) with Long Short-Term Memory (LSTM) networks, enabling the application of convolution operations at each time step to effectively capture spatial information in temporal data. Compared to CNN, U-Net, and ResNet architectures, ConvLSTM can link the feature information of the current projection with that of adjacent projections, providing enhanced temporal and spatial feature connectivity. Hence, it will be able to supply enough information for motion estimation with temporal correlation.</p>
<p>In this work, our goal is to develop a real-time 4D-CBCT imaging model utilizing projection images with high temporal resolution. The model inference for PCA labels is remarkably fast, taking approximately 0.006 s for one projection. This rapid inference is critical for maintaining real-time processing capabilities, ensuring that the model can handle a continuous stream of projection images without significant latency. However, the reconstruction time for a single volume of 4D-CBCT is approximately 5 s on a personal desktop computer. While this is relatively fast given the complexity of the task, it underscores the computational demands associated with high-resolution 4D imaging. Our ongoing work focuses on optimizing this reconstruction time further, possibly through hardware acceleration or more efficient algorithms, to achieve even faster performance.</p>
<p>Despite the promising results, our study has several limitations that need to be addressed. First, the study relies on simulated data for training the network, including simulated respiratory motion and noise models. While these simulations aim to mimic real-world conditions, they may not fully capture the complexities of actual patient data, potentially affecting the model&#x2019;s performance in clinical settings. Second, the proposed model depends heavily on the consistency of the patient&#x2019;s respiration pattern between the initial 4D-CT scanning and the online treatment stages. Any significant variation in the patient&#x2019;s breathing pattern during treatment could impact the accuracy of the 4D-CBCT reconstruction. Third, the pilot clinical evaluation was conducted with a limited number of patients. Although the results were promising, a larger and more diverse patient cohort is necessary to validate the robustness of the proposed method.</p>
</sec>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material. Further inquiries can be directed to the corresponding authors.</p>
</sec>
<sec id="s6" sec-type="ethics-statement">
<title>Ethics statement</title>
<p>The studies involving humans were approved by Affiliated Hospital of Jiangsu University Review Board (#2016-034). The studies were conducted in accordance with the local legislation and institutional requirements. Written informed consent for participation in this study was provided by the participants&#x2019; legal guardians/next of kin.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>HZ: Writing &#x2013; original draft, Writing &#x2013; review &amp; editing, Methodology, Supervision. KC: Investigation, Software, Writing &#x2013; original draft. XX: Data curation, Software, Writing &#x2013; original draft. TY: Validation, Writing &#x2013; review &amp; editing. WS: Funding acquisition, Writing &#x2013; review &amp; editing. JD: Writing &#x2013; original draft, Writing &#x2013; review &amp; editing.</p>
</sec>
</body>
<back>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This work was supported in part by the National Natural Science Foundation of China under Grants (No. 61871208 and No. 62103366); in part by the Guangdong Basic and Applied Basic Research Foundation (No. 2023A1515011365), in part by the Shenzhen Science and Technology Program (No. JCYJ20220530153801003), in part by the National Cancer Center/National Clinical Research Center for Cancer/Cancer Hospital &amp; Shenzhen Hospital, Chinese Academy of Medical Sciences and Peking Union Medical College, Shenzhen (No. E010221003), and by the Zhenjiang City Science and Technology Plan Project (No. SH2021040); and in part by the Shenzhen High-level Hospital Construction Fund.</p>
</sec>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
<p>The reviewer ZD declared a past co-authorship with the author HZ to the handling editor.</p>
</sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mah</surname> <given-names>K</given-names>
</name>
<name>
<surname>Chow</surname> <given-names>B</given-names>
</name>
<name>
<surname>Swami</surname> <given-names>N</given-names>
</name>
<name>
<surname>Pope</surname> <given-names>A</given-names>
</name>
<name>
<surname>Rydall</surname> <given-names>A</given-names>
</name>
<name>
<surname>Earle</surname> <given-names>C</given-names>
</name>
<etal/>
</person-group>. <article-title>Early palliative care and quality of dying and death in patients with advanced cancer</article-title>. <source>BMJ Supportive Palliative Care</source>. (<year>2023</year>) <volume>13</volume>:<page-range>e74&#x2013;7</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1136/bmjspcare-2021-002893</pub-id>
</citation>
</ref>
<ref id="B2">
<label>2</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bertholet</surname> <given-names>J</given-names>
</name>
<name>
<surname>Vinogradskiy</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Carlson</surname> <given-names>DJ</given-names>
</name>
</person-group>. <article-title>Advances in image-guided adaptive radiation therapy</article-title>. <source>Int J Radiat OncologyBiologyPhysics</source>. (<year>2021</year>) <volume>110</volume>:<page-range>625&#x2013;8</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ijrobp.2021.02.047</pub-id>
</citation>
</ref>
<ref id="B3">
<label>3</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Star-Lack</surname> <given-names>J</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>M</given-names>
</name>
<name>
<surname>Oelhafen</surname> <given-names>M</given-names>
</name>
<name>
<surname>Berkus</surname> <given-names>T</given-names>
</name>
<name>
<surname>Pavkovich</surname> <given-names>J</given-names>
</name>
<name>
<surname>Brehm</surname> <given-names>M</given-names>
</name>
<etal/>
</person-group>. <article-title>A modified McKinnon-Bates (MKB MKB) algorithm for improved 4D cone-beam computed tomography (CBCT CBCT) of the lung</article-title>. <source>Med Phys</source>. (<year>2018</year>) <volume>45</volume>:<page-range>3783&#x2013;99</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/mp.13034</pub-id>
</citation>
</ref>
<ref id="B4">
<label>4</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dang</surname> <given-names>J</given-names>
</name>
<name>
<surname>Ying</surname> <given-names>F-F</given-names>
</name>
<name>
<surname>Dai</surname> <given-names>C</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>D</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J</given-names>
</name>
</person-group>. <article-title>Simultaneous 4D-CBCT reconstruction with sliding motion constraint</article-title>. <source>Med Phys</source>. (<year>2016</year>) <volume>43</volume>(<issue>10</issue>):<page-range>5453&#x2013;63</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1118/1.4959998</pub-id>
</citation>
</ref>
<ref id="B5">
<label>5</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chapman</surname> <given-names>CH</given-names>
</name>
<name>
<surname>McGuinness</surname> <given-names>C</given-names>
</name>
<name>
<surname>Gottschalk</surname> <given-names>AR</given-names>
</name>
<name>
<surname>Yom</surname> <given-names>SS</given-names>
</name>
<name>
<surname>Garsa</surname> <given-names>AA</given-names>
</name>
<name>
<surname>Anwar</surname> <given-names>M</given-names>
</name>
<etal/>
</person-group>. <article-title>Influence of respiratory motion management technique on radiation pneumonitis risk with robotic stereotactic body radiation therapy</article-title>. <source>J Appl Clin Med Phys</source>. (<year>2018</year>) <volume>19</volume>:<fpage>48</fpage>&#x2013;<lpage>57</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/acm2.12338</pub-id>
</citation>
</ref>
<ref id="B6">
<label>6</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Yan</surname> <given-names>H</given-names>
</name>
<name>
<surname>Ouyang</surname> <given-names>L</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>L</given-names>
</name>
<name>
<surname>Cervino</surname> <given-names>L</given-names>
</name>
<etal/>
</person-group>. <article-title>A method for volumetric imaging in radiotherapy using single x-ray projection</article-title>. <source>Med Phys</source>. (<year>2015</year>) <volume>42</volume>:<page-range>2498&#x2013;509</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1118/1.4918577</pub-id>
</citation>
</ref>
<ref id="B7">
<label>7</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>R</given-names>
</name>
<name>
<surname>Lewis</surname> <given-names>JH</given-names>
</name>
<name>
<surname>Jia</surname> <given-names>X</given-names>
</name>
<name>
<surname>Gu</surname> <given-names>X</given-names>
</name>
<name>
<surname>Folkerts</surname> <given-names>M</given-names>
</name>
<name>
<surname>Men</surname> <given-names>C</given-names>
</name>
<etal/>
</person-group>. <article-title>3D tumor localization through real-time volumetric x-ray imaging for lung cancer radiotherapy</article-title>. <source>Med Phys</source>. (<year>2011</year>) <volume>38</volume>:<page-range>2783&#x2013;94</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1118/1.3582693</pub-id>
</citation>
</ref>
<ref id="B8">
<label>8</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>YY</given-names>
</name>
<name>
<surname>Yin</surname> <given-names>F-F</given-names>
</name>
<name>
<surname>Segars</surname> <given-names>WP</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>L</given-names>
</name>
</person-group>. <article-title>A technique for estimating 4D-CBCT using prior knowledge and Limited-angle projections</article-title>. <source>Med Phys</source>. (<year>2013</year>) <volume>40</volume>(<issue>12</issue>):<page-range>1217011&#x2013;1&#x2013;1217011:16</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1118/1.4825097</pub-id>
</citation>
</ref>
<ref id="B9">
<label>9</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wei</surname> <given-names>R</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>F</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>B</given-names>
</name>
<name>
<surname>Bai</surname> <given-names>X</given-names>
</name>
<name>
<surname>Fu</surname> <given-names>D</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Y</given-names>
</name>
<etal/>
</person-group>. <article-title>Convolutional neural network (CNN) based three dimensional tumor localization using single X-ray projection</article-title>. <source>IEEE Access</source>. (<year>2019</year>) <volume>7</volume>:<page-range>37026&#x2013;38</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/Access.6287639</pub-id>
</citation>
</ref>
<ref id="B10">
<label>10</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wei</surname> <given-names>R</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>F</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>B</given-names>
</name>
<name>
<surname>Bai</surname> <given-names>X</given-names>
</name>
<name>
<surname>Fu</surname> <given-names>D</given-names>
</name>
<name>
<surname>Liang</surname> <given-names>B</given-names>
</name>
<etal/>
</person-group>. <article-title>Real-time tumor localization with single x-ray projection at arbitrary gantry angles using a convolutional neural network (CNN)</article-title>. <source>Phys Med Biol</source>. (<year>2020</year>) <volume>65</volume>(<issue>6</issue>):<fpage>065012</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1088/1361-6560/ab66e4</pub-id>
</citation>
</ref>
<ref id="B11">
<label>11</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brown</surname> <given-names>MS</given-names>
</name>
<name>
<surname>Mcnitt-Gray</surname> <given-names>MF</given-names>
</name>
<name>
<surname>Goldin</surname> <given-names>JG</given-names>
</name>
<name>
<surname>Suh</surname> <given-names>RD</given-names>
</name>
<name>
<surname>Sayre</surname> <given-names>JW</given-names>
</name>
<name>
<surname>Aberlee</surname> <given-names>DR</given-names>
</name>
</person-group>. <article-title>Patient-specific models for lung nodule detection and survellience in CT images</article-title>. <source>IEEE Trans On Med Imaging MI</source>. (<year>2001</year>) <volume>20</volume>(<issue>12</issue>):<page-range>1242&#x2013;50</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/42.974919</pub-id>
</citation>
</ref>
<ref id="B12">
<label>12</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>M</given-names>
</name>
<name>
<surname>Cao</surname> <given-names>K</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Siochi</surname> <given-names>RAC</given-names>
</name>
</person-group>. <article-title>Motion-compensated mega-voltage cone beam CT using the deformation derived directly from 2D projection images</article-title>. <source>IEEE Trans On Med Imaging MI</source>. (<year>2013</year>) <volume>32</volume>(<issue>8</issue>):<page-range>1365&#x2013;75</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TMI.2012.2231694</pub-id>
</citation>
</ref>
<ref id="B13">
<label>13</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname> <given-names>T</given-names>
</name>
<name>
<surname>Li</surname> <given-names>R</given-names>
</name>
<name>
<surname>Tang</surname> <given-names>X</given-names>
</name>
<name>
<surname>Dy</surname> <given-names>JG</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>SB</given-names>
</name>
</person-group>. <article-title>Markerless gating for lung cancer radiotherapy based on machine learning techniques</article-title>. <source>Phys Med Biol</source>. (<year>2009</year>) <volume>54</volume>:<page-range>1555&#x2013;63</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1088/0031-9155/54/6/010</pub-id>
</citation>
</ref>
<ref id="B14">
<label>14</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Unberath</surname> <given-names>M</given-names>
</name>
<name>
<surname>Zaech</surname> <given-names>JN</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>SC</given-names>
</name>
<name>
<surname>Bier</surname> <given-names>B</given-names>
</name>
<name>
<surname>Fotouhi</surname> <given-names>J</given-names>
</name>
<name>
<surname>Armand</surname> <given-names>M</given-names>
</name>
<etal/>
</person-group>. &#x201c;<article-title>DeepDRR &#x2013; A catalyst for machine learning in fluoroscopy-guided procedures</article-title>&#x201d;. In <person-group person-group-type="editor">
<name>
<surname>Frangi</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Schnabel</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Davatzikos</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Alberola-L&#xf3;pez</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Fichtinger</surname> <given-names>G.</given-names>
</name>
</person-group> (editors) <source>Medical Image Computing and Computer Assisted Intervention&#x2013;MICCAI 2018. Lecture Notes in Computer Science (LNIP, Vol. 11073, pp. 98-106)</source>. <publisher-name>Springer</publisher-name>, <publisher-loc>Cham</publisher-loc> (<year>2018</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-3-030-00937-3_12</pub-id>
</citation>
</ref>
<ref id="B15">
<label>15</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Unberath</surname> <given-names>M</given-names>
</name>
<name>
<surname>Zaech</surname> <given-names>JN</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>C</given-names>
</name>
<name>
<surname>Bier</surname> <given-names>B</given-names>
</name>
<name>
<surname>Navab</surname> <given-names>N</given-names>
</name>
</person-group>. <article-title>Enabling machine learning in X-ray-based procedures via realistic simulation of image formation</article-title>. <source>Int J Comput Assisted Radiol Surg</source>. (<year>2019</year>) <volume>14</volume>(<issue>9</issue>):<page-range>1517&#x2013;28</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11548-019-02011-2</pub-id>
</citation>
</ref>
<ref id="B16">
<label>16</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gewers</surname> <given-names>FL</given-names>
</name>
<name>
<surname>Ferreira</surname> <given-names>GR</given-names>
</name>
<name>
<surname>De Arruda</surname> <given-names>HF</given-names>
</name>
<name>
<surname>Silva</surname> <given-names>FN</given-names>
</name>
<name>
<surname>Comin</surname> <given-names>CH</given-names>
</name>
<name>
<surname>Amancio</surname> <given-names>DR</given-names>
</name>
<etal/>
</person-group>. <article-title>Principal component analysis: A natural approach to data exploration</article-title>. <source>ACM Computing Surveys (CSUR)</source>. (<year>2018</year>) <volume>54</volume>:<fpage>1</fpage>&#x2013;<lpage>34</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1145/3447755</pub-id>
</citation>
</ref>
<ref id="B17">
<label>17</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dang</surname> <given-names>J</given-names>
</name>
<name>
<surname>Ouyang</surname> <given-names>L</given-names>
</name>
<name>
<surname>Gu</surname> <given-names>X</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J</given-names>
</name>
</person-group>. <article-title>Deformation vector fields (DVF)-driven image reconstruction for 4D-CBCT</article-title>. <source>J Xray Sci Technol</source>. (<year>2013</year>) <volume>40</volume>:<page-range>457&#x2013;7</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.3233/XST-140466</pub-id>
</citation>
</ref>
<ref id="B18">
<label>18</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hochreiter</surname> <given-names>S</given-names>
</name>
<name>
<surname>Schmidhuber</surname> <given-names>J</given-names>
</name>
</person-group>. &#x201c;<article-title>Long short-term memory</article-title>,&#x201d; in <source>Neural Computation</source>. (<year>1997</year>) pp. <page-range>1735&#x2013;80</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1162/neco.1997.9.8.1735</pub-id>
</citation>
</ref>
<ref id="B19">
<label>19</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shi</surname> <given-names>X</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>H</given-names>
</name>
<name>
<surname>Yeung</surname> <given-names>DY</given-names>
</name>
<name>
<surname>Wong</surname> <given-names>W</given-names>
</name>
<name>
<surname>Woo</surname> <given-names>W</given-names>
</name>
</person-group>. &#x201c;<article-title>Convolutional LSTM Network: A Machine Learning Approach for Precipitation Nowcasting</article-title>.&#x201d; <source>NIPS&#x2019;15: Proceedings of the 28th International Conference on Neural Information Processing Systems</source>. <publisher-name>MIT Press</publisher-name>. (<year>2015</year>) <volume>1</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.5555/2969239.2969329</pub-id>
</citation>
</ref>
<ref id="B20">
<label>20</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>Q</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Jia</surname> <given-names>W</given-names>
</name>
<name>
<surname>Xiangjian</surname> <given-names>HE</given-names>
</name>
<name>
<surname>Blumenstein</surname> <given-names>M</given-names>
</name>
<name>
<surname>Lyu</surname> <given-names>S</given-names>
</name>
<etal/>
</person-group>. <article-title>FACLSTM:ConvLSTM with focused attention for scene text recognition</article-title>. <source>Sci China Inf Sci</source>. (<year>2020</year>) <volume>63</volume>:<page-range>120103:1&#x2013;120103:14</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11432-019-2713-1</pub-id>
</citation>
</ref>
<ref id="B21">
<label>21</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kong</surname> <given-names>F</given-names>
</name>
<name>
<surname>Deng</surname> <given-names>J</given-names>
</name>
<name>
<surname>Fan</surname> <given-names>Z</given-names>
</name>
</person-group>. <article-title>Gesture recognition system based on ultrasonic FMCW and ConvLSTM model</article-title>. <source>Measurement</source>. (<year>2022</year>) <volume>190</volume>:<fpage>110743</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.measurement.2022.110743</pub-id>
</citation>
</ref>
<ref id="B22">
<label>22</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>W-Y</given-names>
</name>
<name>
<surname>Li</surname> <given-names>H-C</given-names>
</name>
<name>
<surname>Deng</surname> <given-names>Y-J</given-names>
</name>
<name>
<surname>Shao</surname> <given-names>L-Y</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>X-Q</given-names>
</name>
<name>
<surname>Du</surname> <given-names>Q</given-names>
</name>
</person-group>. <article-title>Generative adversarial capsule network with ConvLSTM for hyperspectral image classification</article-title>. <source>IEEE Geosci Remote Sens Lett</source>. (<year>2020</year>) <volume>18</volume>:<page-range>523&#x2013;7</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/LGRS.8859</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>