<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="review-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Bioeng. Biotechnol.</journal-id>
<journal-title>Frontiers in Bioengineering and Biotechnology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Bioeng. Biotechnol.</abbrev-journal-title>
<issn pub-type="epub">2296-4185</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1500270</article-id>
<article-id pub-id-type="doi">10.3389/fbioe.2024.1500270</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Bioengineering and Biotechnology</subject>
<subj-group>
<subject>Review</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Deep learning methods for high-resolution microscale light field image reconstruction: a survey</article-title>
<alt-title alt-title-type="left-running-head">Lin et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fbioe.2024.1500270">10.3389/fbioe.2024.1500270</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Lin</surname>
<given-names>Bingzhi</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2867926/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Tian</surname>
<given-names>Yuan</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Yue</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhu</surname>
<given-names>Zhijing</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Wang</surname>
<given-names>Depeng</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2848122/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>College of Energy and Power Engineering</institution>, <institution>Nanjing University of Aeronautics and Astronautics</institution>, <addr-line>Nanjing</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Biomedical Engineering</institution>, <institution>Duke University</institution>, <addr-line>Durham</addr-line>, <addr-line>NC</addr-line>, <country>United States</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Key Laboratory of Novel Targets and Drug Study for Neural Repair of Zhejiang Province</institution>, <institution>School of Medicine</institution>, <institution>Hangzhou City University</institution>, <addr-line>Hangzhou</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1614610/overview">Zetao Chen</ext-link>, Tianjin University, China</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1646677/overview">Hao Dong</ext-link>, Zhejiang Lab, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2858143/overview">Guosen Xie</ext-link>, Nanjing University of Information Science and Technology, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Depeng Wang, <email>depeng.wang@nuaa.edu.cn</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>18</day>
<month>11</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>12</volume>
<elocation-id>1500270</elocation-id>
<history>
<date date-type="received">
<day>23</day>
<month>09</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>30</day>
<month>10</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Lin, Tian, Zhang, Zhu and Wang.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Lin, Tian, Zhang, Zhu and Wang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Deep learning is progressively emerging as a vital tool for image reconstruction in light field microscopy. The present review provides a comprehensive examination of the latest advancements in light field image reconstruction techniques based on deep learning algorithms. First, the review briefly introduced the concept of light field and deep learning techniques. Following that, the application of deep learning in light field image reconstruction was discussed. Subsequently, we classified deep learning-based light field microscopy reconstruction algorithms into three types based on the contribution of deep learning, including fully deep learning-based method, deep learning enhanced raw light field image with numerical inversion volumetric reconstruction, and numerical inversion volumetric reconstruction with deep learning enhanced resolution, and comprehensively analyzed the features of each approach. Finally, we discussed several challenges, including deep neural approaches for increasing the accuracy of light field microscopy to predict temporal information, methods for obtaining light field training data, strategies for data enhancement using existing data, and the interpretability of deep neural networks.</p>
</abstract>
<kwd-group>
<kwd>deep learning</kwd>
<kwd>light field microscopy</kwd>
<kwd>light field imaging</kwd>
<kwd>high resolution</kwd>
<kwd>volumetric reconstruction</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Biosensors and Biomolecular Electronics</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>By simultaneously capturing combined signals from different depths of an entire volume in a single-camera-frame, light field microscopy (LFM) enables rapid spatial dynamic imaging (<xref ref-type="bibr" rid="B29">Levoy et al., 2006</xref>), and has developed into a valuable tool for structural and functional imaging of biological specimens. LFM usually necessitates computational volumetric reconstruction using traditional algorithms like refocusing (<xref ref-type="bibr" rid="B8">Dansereau et al., 2015</xref>; <xref ref-type="bibr" rid="B19">Jayaweera et al., 2020</xref>) or three-dimensional (3D) deconvolution (<xref ref-type="bibr" rid="B4">Broxton et al., 2013</xref>). However, conventional algorithms are limited by low efficiency and poor resolution, thereby hindering them for broader application of LFM. Therefore, the need to achieve high efficiency and high-resolution image reconstruction is crucial for the advance of LFM.</p>
<p>In recent years, deep learning has been widely used for variant applications, including image classification (<xref ref-type="bibr" rid="B71">Yu et al., 2022</xref>; <xref ref-type="bibr" rid="B10">Dosovitskiy, 2020</xref>; <xref ref-type="bibr" rid="B13">Foret et al., 2020</xref>), semantic segmentation (<xref ref-type="bibr" rid="B62">Wang et al., 2023</xref>; <xref ref-type="bibr" rid="B53">Srivastava and Sharma, 2024</xref>; <xref ref-type="bibr" rid="B52">Erisen, 2024</xref>), generation (<xref ref-type="bibr" rid="B23">Kim et al., 2023</xref>; <xref ref-type="bibr" rid="B60">Walton et al., 2022</xref>; <xref ref-type="bibr" rid="B49">Sadat et al., 2023</xref>), denoising (<xref ref-type="bibr" rid="B78">Zhou et al., 2020</xref>), restoration (<xref ref-type="bibr" rid="B61">Wan et al., 2022</xref>), super-resolution (<xref ref-type="bibr" rid="B20">Johnson et al., 2016</xref>; <xref ref-type="bibr" rid="B27">Ledig et al., 2017</xref>; <xref ref-type="bibr" rid="B51">Shi et al., 2016</xref>), depth estimation (<xref ref-type="bibr" rid="B3">Alhashim, 2018</xref>; <xref ref-type="bibr" rid="B79">Zhuang et al., 2022</xref>; <xref ref-type="bibr" rid="B56">Tateno et al., 2018</xref>) and image reconstruction (<xref ref-type="bibr" rid="B11">Drozdova et al., 2024</xref>; <xref ref-type="bibr" rid="B74">Zhang et al., 2024</xref>; <xref ref-type="bibr" rid="B14">Godard et al., 2017</xref>; <xref ref-type="bibr" rid="B31">Li et al., 2017</xref>; <xref ref-type="bibr" rid="B32">Liang et al., 2021</xref>; <xref ref-type="bibr" rid="B46">Quan et al., 2021</xref>; <xref ref-type="bibr" rid="B50">Schlemper et al., 2017</xref>). The use of deep learning algorithms has also boosted LFM (<xref ref-type="bibr" rid="B26">LeCun et al., 2015</xref>; <xref ref-type="bibr" rid="B57">Vizcaino et al., 2021a</xref>; <xref ref-type="bibr" rid="B63">Wang et al., 2021</xref>; <xref ref-type="bibr" rid="B70">Yi et al., 2023</xref>; <xref ref-type="bibr" rid="B59">Wagner et al., 2021</xref>). For instance, deep learning-based LFM has been applied to resolve the activity of motor neurons in <italic>Caenorhabditis elegans</italic> with single-cell resolution (<xref ref-type="bibr" rid="B63">Wang et al., 2021</xref>), to extract the calcium signal in the brains of 5-day-old transgenic zebrafish (<italic>Danio rerio</italic>) larvae (<xref ref-type="bibr" rid="B59">Wagner et al., 2021</xref>), and to reconstruct the high-speed 3D voltage imaging in sparsely labeled dopaminergic neurons in the fruit fly brain (<xref ref-type="bibr" rid="B35">Lu et al., 2023</xref>).</p>
<p>This review first introduces light field acquisition methods, and explores the applicability of these methods in light field microimaging, then introduces the current application of deep learning in image processing technology and explores the feasibility of deep learning technology in light field microimaging reconstruction, and finally outlines the recent progress of deep learning-based reconstruction algorithms for LFM. This paper aims to provide a comprehensive review of deep learning-based LFM, focusing primarily on network architecture, reconstruction resolution, and running time to reveal current shortcomings, and future possibilities.</p>
</sec>
<sec id="s2">
<title>2 Light field and deep learning</title>
<sec id="s2-1">
<title>2.1 Principle of light field imaging</title>
<p>In the field of 3D space, the light field serves as a comprehensive representation of all light rays existing in 3D space from any given point in any direction. The concept of the &#x201c;Light Field&#x201d; was first introduced by Alexander Gershun (<xref ref-type="bibr" rid="B1">Gershun, 1939</xref>), who proposed five-dimensional (5D) plenoptic function <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mi mathvariant="normal">L</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="normal">x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">y</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">z</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">&#x3c6;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="normal">R</mml:mi>
<mml:mn>5</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> to describe the light field, utilizing spatial coordinates <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> and angular coordinates <inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">&#x3c6;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> to specify each ray. In contrast, Levoy and Hanrahan (<xref ref-type="bibr" rid="B28">Levoy and Hanrahan, 2023</xref>) presented four-dimensional (4D) representation <inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="normal">R</mml:mi>
<mml:mn>4</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> for the light field, conceptualizing it as comprised of oriented lines in free space. This representation is efficient in reducing data redundancy and simplifying the reconstruction of the plenoptic function. The parameterization <inline-formula id="inf5">
<mml:math id="m5">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> defines these lines based on their interactions with two arbitrarily positioned planes, where the first plane is denoted by <inline-formula id="inf6">
<mml:math id="m6">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> and the second plane by <inline-formula id="inf7">
<mml:math id="m7">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>. All light field microscopy discussed subsequently in this paper is 4D light field.</p>
<p>Typically, there are three strategies to acquire light field information (<xref ref-type="bibr" rid="B67">Wu et al., 2017</xref>), multi-sensor capture (<xref ref-type="fig" rid="F1">Figure 1A</xref>), time-sequential capture (<xref ref-type="fig" rid="F1">Figure 1B</xref>), and multiplexed imaging (<xref ref-type="fig" rid="F1">Figure 1C</xref>). Theoretically, all these three strategies aim to acquire light field information but the approaches they utilize to record light field information are totally different. Specifically, multi-sensor capture utilizes multiple cameras to concurrently capture light field, predominantly employing camera arrays (<xref ref-type="bibr" rid="B33">Lin et al., 2015</xref>; <xref ref-type="bibr" rid="B18">Huang et al., 2023</xref>; <xref ref-type="bibr" rid="B16">Gu et al., 2020</xref>; <xref ref-type="bibr" rid="B69">Xu et al., 2020</xref>). This approach can yield high spatial resolution imaging while capturing real-time information, but the total setup is complex and expensive. In time-sequential capture, a single camera is utilized to capture light field through a series of exposures. This method is known for being time-consuming and cannot provide real-time information (<xref ref-type="bibr" rid="B30">Li et al., 2014</xref>; <xref ref-type="bibr" rid="B54">Taguchi et al., 2010</xref>; <xref ref-type="bibr" rid="B9">Dansereau et al., 2017</xref>; <xref ref-type="bibr" rid="B34">Liu et al., 2022</xref>). On the other hand, multiplexed imaging involves the conversion of high-dimensional data into a more simplified two-dimensional (2D) image (<xref ref-type="bibr" rid="B44">Prevedel et al., 2014</xref>; <xref ref-type="bibr" rid="B36">Mignard-Debise, 2015</xref>; <xref ref-type="bibr" rid="B21">Kim et al., 2016</xref>; <xref ref-type="bibr" rid="B58">Vizcaino et al., 2021b</xref>; <xref ref-type="bibr" rid="B42">Orth and Crozier, 2012</xref>; <xref ref-type="bibr" rid="B66">Yang and Yuste, 2017</xref>; <xref ref-type="bibr" rid="B12">Fan et al., 2022</xref>) using a microlens array (MLA) positioned in the optical instrument&#x2019;s intermediate image plane. By adopting this approach, the entire imaging system is significantly streamlined with easy operation. Consequently, light field camera and LFM developed based on the multiplexing principle are widely used in volumetric imaging. Particularly, LFM has demonstrated strong imaging ability in <italic>in-vivo</italic> imaging of heartbeat, blood flow, and neural activity, and has allowed 3D visualization of the spatial and temporal evolution patterns of the signals and the mechanisms behind biological processes.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Acquisition of light field. <bold>(A)</bold> Multi-sensor capture: capturing the light field simultaneously using multiple cameras, most of which are camera arrays. [modified from (<xref ref-type="bibr" rid="B33">Lin et al., 2015</xref>)] <bold>(B)</bold> Time-sequential capture: capturing the light field using multiple exposures from a single camera, which is time-consuming. [modified from (<xref ref-type="bibr" rid="B54">Taguchi et al., 2010</xref>)] <bold>(C)</bold> Multiplexed capture: The process of mapping complex high-dimensional data into two-dimensional (2D) images. [modified from (<xref ref-type="bibr" rid="B12">Fan et al., 2022</xref>)].</p>
</caption>
<graphic xlink:href="fbioe-12-1500270-g001.tif"/>
</fig>
</sec>
<sec id="s2-2">
<title>2.2 Light field image reconstruction benefits from deep learning</title>
<p>Light field reconstruction can be seen as a transformation between the raw light field image and the reconstructed volumetric image. Classical reconstruction methods address such transformations from a physical-optical perspective through display modeling, which can be classified into two categories: mathematical inversion and numerical inversion. Refocusing is a typical mathematical inversion method in light field reconstruction (<xref ref-type="bibr" rid="B41">Ng et al., 2005</xref>; <xref ref-type="bibr" rid="B2">Alain et al., 2019</xref>) and is based on an idealized mathematical model, that essentially superimposes and shifts sub-aperture images over the entire aperture range. During the reconstruction process, the difference between the actual situation and the mathematical model is magnified, and thus prone to image noises, and artifacts. Numerical inversion employs iterative reconstruction for various imaging modalities to introduce external <italic>a priori</italic> information, thereby greatly enriching the information available for reconstruction and improving the quality of the final image. One widely used numerical inversion method is Richardson-Lucy deconvolution which relies on the microscope&#x2019;s point spread function and Poisson noise statistics assumption (<xref ref-type="bibr" rid="B44">Prevedel et al., 2014</xref>; <xref ref-type="bibr" rid="B47">Richardson, 1972</xref>). However, the accuracy of these classical reconstruction methods is restricted by the premises of their physical models. These methods are unable to capture the full statistical complexity of microscopic images, and thus can only reconstruct high-quality results in specific cases. In contrast, data-driven procedures, especially deep learning methods, rely on high-resolution data to optimize the reconstruction procedure, thereby usually offering better resolution than conventional algorithms. Consequently, deep learning-based methods allow high-resolution light field reconstruction. For instance, deep learning methods have enabled high-resolution LFM in the reconstruction of fluorescently labeled blood vessels in mouse brain slices (<xref ref-type="bibr" rid="B57">Vizcaino et al., 2021a</xref>), neuronal signals and analysis of the calcium activity patterns, four-dimensional dynamics of red blood cells and cardiomyocytes (<xref ref-type="bibr" rid="B63">Wang et al., 2021</xref>), continuous 3D observation of dynamic processes (<xref ref-type="bibr" rid="B70">Yi et al., 2023</xref>), and imaging of zebrafish (<italic>Oryzias latipes</italic>) embryos and zebrafish (<italic>D. rerio</italic>) larvae (<xref ref-type="bibr" rid="B59">Wagner et al., 2021</xref>).</p>
<p>Briefly, deep learning networks are primarily composed of various nonlinear parameterized processing modules that iteratively convert an input <inline-formula id="inf8">
<mml:math id="m8">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> into the anticipated output <inline-formula id="inf9">
<mml:math id="m9">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, generally approximating it as <inline-formula id="inf10">
<mml:math id="m10">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula>. It is theoretically posited that a neural network possessing an ample quantity of parameters and a minimum of three layers can approximate virtually any function within its domain (<xref ref-type="bibr" rid="B17">Hornik et al., 1989</xref>). This assertion is founded on the universal approximation theorem in the field of networks, which suggests that with a sufficiently complex architecture, including an adequate number of parameters and layers, the network can flexibly adapt and represent a vast array of intricate functions. Such a network structure enables the model to intricately capture the underlying patterns and relationships present in the data, facilitating the acquisition of high-level abstraction to effectively model complex functions, and thus network can also be called a universal function approximator (<xref ref-type="fig" rid="F2">Figure 2A</xref>). The main principle of learning is to make accurate updates to the parameter values <inline-formula id="inf11">
<mml:math id="m11">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The learning process has two main steps. The first step requires passing the input value <inline-formula id="inf12">
<mml:math id="m12">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> of the module forward once in the network to get the approximation value <inline-formula id="inf13">
<mml:math id="m13">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula>. The differentiable nature of each module within a neural network allows for the computation of partial derivatives for the parameters <inline-formula id="inf14">
<mml:math id="m14">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. This property enables the determination of how changes in these parameters impact both the output <inline-formula id="inf15">
<mml:math id="m15">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> and intermediate values <inline-formula id="inf16">
<mml:math id="m16">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> throughout the network architecture. By calculating the partial derivatives <inline-formula id="inf17">
<mml:math id="m17">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi mathvariant="normal">&#x3b8;</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf18">
<mml:math id="m18">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi mathvariant="normal">&#x3b8;</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula>, it becomes feasible to assess the sensitivity of the network&#x2019;s predictions and internal representations to variations in the model&#x2019;s parameters. This differentiation capability plays a fundamental role in the optimization process, as it facilitates the adjustment of parameter values to minimize the discrepancy between the network outputs and the anticipated output. Ultimately, the ability to compute these partial derivatives for the parameters empowers the neural network to iteratively refine its internal representations and enhance its predictive performance through the optimization of its parameters. Therefore, the second step utilizes the back-propagation algorithm (<xref ref-type="bibr" rid="B48">Rumelhart et al., 1986</xref>) to iteratively update the initially set <inline-formula id="inf19">
<mml:math id="m19">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> value by efficiently calculating all the partial derivatives or gradients by the chain rule and passing them backward once. Using the loss function <inline-formula id="inf20">
<mml:math id="m20">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mo>,</mml:mo>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, the difference between the anticipated output <inline-formula id="inf21">
<mml:math id="m21">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and the network output <inline-formula id="inf22">
<mml:math id="m22">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> can be measured. To minimize this loss, a commonly employed approach involves the utilization of optimizers such as Adam (<xref ref-type="bibr" rid="B24">Kinga and Adam, 2014</xref>) to adjust the parameter denoted as <inline-formula id="inf23">
<mml:math id="m23">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> iteratively.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Universal function approximator and back-propagation were used to learn light field reconstruction. <bold>(A)</bold> Learning process of universal function approximator. <bold>(B)</bold> Multilayer neural networks and back-propagation were used to learn LFM direct reconstruction. [modified from (<xref ref-type="bibr" rid="B35">Lu et al., 2023</xref>)].</p>
</caption>
<graphic xlink:href="fbioe-12-1500270-g002.tif"/>
</fig>
<p>The trained network is the solver to compute volumetric images from the raw light field data, and directly impacts the quality of the reconstructed image. Typically, to ensure the network is fully optimized, high-resolution 3D images of the target samples are first acquired, which can be obtained from simulated data or experimental methods, such as confocal microscopy (<xref ref-type="bibr" rid="B57">Vizcaino et al., 2021a</xref>), selective plane illumination microscopy (SPIM) (<xref ref-type="bibr" rid="B59">Wagner et al., 2021</xref>) and light-sheet microscopy (<xref ref-type="bibr" rid="B77">Zhao et al., 2020</xref>). Using the wave optics model, these high-resolution volumetric images are projected into 2D light field images. In the network training process, the raw light field images serve as the initial input denoted as <inline-formula id="inf24">
<mml:math id="m24">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The network then extracts relevant features by image convolution with kernels, ultimately generating a set of 3D image stacks represented as <inline-formula id="inf25">
<mml:math id="m25">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> (<xref ref-type="fig" rid="F2">Figure 2B</xref>). Lastly, a suitable loss function must be chosen. The loss function is then minimized and kernels are updated using the network iteratively. This process continues until the network is gradually optimized to the point where it can produce a 3D image that closely resembles the ground truth from the synthetic light field.</p>
</sec>
</sec>
<sec id="s3">
<title>3 Deep learning-based reconstruction algorithms for light field microscopy</title>
<p>Deep learning-based LFM image reconstruction has demonstrated superior resolution than conventional methods (<xref ref-type="bibr" rid="B63">Wang et al., 2021</xref>; <xref ref-type="bibr" rid="B70">Yi et al., 2023</xref>; <xref ref-type="bibr" rid="B59">Wagner et al., 2021</xref>), which allows researchers to observe finer structures, such as subcellular organelles or molecular complexes with greater clarity. The improved performance of deep learning-based methods originates from its upsampling design in the network, which can compensate for the reduced resolution of raw light field images when the volumetric information is encoded onto the 2D sensor. To attain high resolution and high efficiency, deep learning methods can also be integrated with numerical inversion strategy. Based on the combination of deep learning and numerical inversion methods, the current deep learning-based LFM algorithms can be subdivided into three categories: fully deep learning-based method (type I) (<xref ref-type="fig" rid="F3">Figure 3A</xref>), deep learning enhanced raw light field image with numerical inversion volumetric reconstruction (type II) (<xref ref-type="fig" rid="F3">Figure 3B</xref>), and numerical inversion volumetric reconstruction with deep learning enhanced volumetric data (type III) (<xref ref-type="fig" rid="F3">Figure 3C</xref>).</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Three types of light field microscopy reconstruction methods. <bold>(A)</bold> Fully deep learning-based method. <bold>(B)</bold> Deep learning enhanced raw light field image with numerical inversion volumetric. <bold>(C)</bold> Numerical inversion volumetric reconstruction with deep learning enhanced volumetric data. [modified from (<xref ref-type="bibr" rid="B35">Lu et al., 2023</xref>)].</p>
</caption>
<graphic xlink:href="fbioe-12-1500270-g003.tif"/>
</fig>
<p>Type I method completely uses deep learning to reconstruct the raw light field image into a 3D volume. This network needs to accomplish both volumetric reconstruction and resolution improvement tasks simultaneously. The Type II method initially uses deep learning algorithms to elevate the resolution of the raw light field image, succeeded by a gradual reconstruction of the light field image through the utilization of numerical inversion methods. Type III method refers to the use of the numerical inversion method to iteratively reconstruct the light field image into poor 3D volume, and then use the deep learning method to transform the low-resolution volume into high-resolution volume. Specifically, the performance of these three types of methods varies depending on the structure of the variant. Type I methods use an end-to-end network, which has the advantage of being able to quickly reconstruct a volumetric image from a light field image as long as the network is appropriately trained, but is more difficult to train due to the complexity of the network. Compared to Type I methods, Type II and Type III methods have the advantage of better generalization, but the numerical inverse volumetric reconstruction in them requires iterative computation, resulting in less efficient reconstruction. The Type III method has a wider range of applications than the Type I and Type II methods, but the drawback is that false results that deviate too much from the real situation may occur. Each of these three types of methods has its own advantages and disadvantages, which need to be considered and weighed when applying them. In the future, type I methods may become the mainstream of real-time reconstruction of LFM, type II methods may become the mainstream of high-resolution reconstruction of LFM, and type III methods will be applied to a variety of 3D reconstruction in addition to LFM.</p>
<sec id="s3-1">
<title>3.1 Type I: Fully deep learning-based method</title>
<p>The fully deep learning-based method is the most commonly used deep learning-based method for light field reconstruction (<xref ref-type="fig" rid="F4">Figure 4</xref>). This approach uses the light field image as the input <inline-formula id="inf26">
<mml:math id="m26">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> for conventional deep learning, generating the predicted volume as the output <inline-formula id="inf27">
<mml:math id="m27">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula>, while the target volume (ground truth) serves as the desired output <inline-formula id="inf28">
<mml:math id="m28">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (<xref ref-type="fig" rid="F2">Figure 2B</xref>). The most advanced networks based on fully deep learning-based methods currently include LFMNet, VCD-Net, F-VCD, and HyLFM-Net (<xref ref-type="fig" rid="F4">Figures 4A&#x2013;D</xref>).</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Fully deep learning-based method of light field microscopy. <bold>(A)</bold> LFMNet architecture (left), and the imaging result (right). Scale bars, 500 <inline-formula id="inf29">
<mml:math id="m29">
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3bc;</mml:mi>
<mml:mi mathvariant="bold-italic">m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (LF image), 10 <inline-formula id="inf30">
<mml:math id="m30">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3bc;</mml:mi>
<mml:mi mathvariant="bold-italic">m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (Network and Ground truth). [modified from (<xref ref-type="bibr" rid="B57">Vizcaino et al., 2021a</xref>)] <bold>(B)</bold> VCD-Net architecture (left) and the imaging result (right). Scale bars, 50 <inline-formula id="inf31">
<mml:math id="m31">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3bc;</mml:mi>
<mml:mi mathvariant="bold-italic">m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. [modified from (<xref ref-type="bibr" rid="B63">Wang et al., 2021</xref>)] <bold>(C)</bold> F-VCD architecture (left), and the imaging result (right). Scale bars, 20 <inline-formula id="inf32">
<mml:math id="m32">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3bc;</mml:mi>
<mml:mi mathvariant="bold-italic">m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. [modified from (<xref ref-type="bibr" rid="B70">Yi et al., 2023</xref>)] <bold>(D)</bold> HyLFM-Net architecture (left) and the imaging result (right). Scale bars, 50 <inline-formula id="inf33">
<mml:math id="m33">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3bc;</mml:mi>
<mml:mi mathvariant="bold-italic">m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. [modified from (<xref ref-type="bibr" rid="B59">Wagner et al., 2021</xref>)].</p>
</caption>
<graphic xlink:href="fbioe-12-1500270-g004.tif"/>
</fig>
<p>In these networks, LFMNet (<xref ref-type="bibr" rid="B57">Vizcaino et al., 2021a</xref>) is the earliest architecture, which adds an initial layer (Conv4d (<xref ref-type="bibr" rid="B7">Choy et al., 2019</xref>)) to the U-Net. This design produced a fully convolutional network with the first layer traversing each microlens and capturing its surrounding neighborhood. The resulting output is then transformed into a channel number that is equal to the depths that need to be reconstructed. Subsequently, the tensor enters the U-Net for feature extraction and 3D reconstruction. The LFMNet has been mainly validated on images of fluorescently labeled blood vessels in mouse brain slices and achieved reconstruction resolution <inline-formula id="inf34">
<mml:math id="m34">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>0.086</mml:mn>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi mathvariant="normal">&#x3bc;</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> that was comparable to confocal microscopy. Compared to previous methods, LFMNet has significantly improved reconstruction accuracy, such as Peak Signal-to-Noise Ratio (PSNR) and Structural Similarity (SSIM), and reconstruction speed (75,000 times faster than deconvolution).</p>
<p>Following LFMNet, VCD-Net (<xref ref-type="bibr" rid="B63">Wang et al., 2021</xref>) was proposed, which adopts the cascaded convolutional layer design of the U-Net architecture, but differs from LFMNet in that the initial layer is no longer designed using Conv4d. Instead, the initial layer is transformed using SubPixel up-scaling (<xref ref-type="bibr" rid="B51">Shi et al., 2016</xref>) and a convolutional layer to reformat pixels in the input 2D light field raw image into different views, generating multi-channel outputs representing different depths. VCD-Net has performed single-cell resolution and up to 200&#xa0;Hz volumetric imaging on the neuronal activity of moving <italic>C. elegans</italic> and the blood flow of beating zebrafish hearts, and has obtained uniform average resolutions within the range of the 1.1 <inline-formula id="inf35">
<mml:math id="m35">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3bc;</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> in both the axial directions.</p>
<p>F-VCD (<xref ref-type="bibr" rid="B70">Yi et al., 2023</xref>) is proposed based on VCD-Net, so it provides improved reconstruction resolution, accuracy, and efficiency over VCD-Net. The F-VCD comprises two primary modules: the &#x201c;F-Denoise&#x201d; module and the &#x201c;F-Reconstruction&#x201d; module. The F-Denoise module introduces a viewing angle attention branch into the traditional RCAN network (<xref ref-type="bibr" rid="B6">Chen et al., 2021</xref>) to balance the influence of different viewing angles, to denoise raw light field images in a weighted way, because light field images from different viewing angles have different signal-to-noise ratio (SNR). The F-Reconstruction module is based on VCD-Net but has added three dilated convolution blocks to the original U-Net coding blocks of VCD and replaced the normal convolution operation with a residual block. This increases the number of input channels and expands the lateral size of the extracted features. To prevent the loss of subtle signals in the optimization process of deep networks, adjustments have been made to the normalization layer and activation function, replacing them with instance normalization and LeakyRelu, respectively. The F-VCD has been mainly validated on live-cell imaging and fixed-cell imaging. In live-cell imaging, the F-VCD technique enabled the achievement of 3D super-resolution imaging with a resolution of approximately 180&#xa0;nm &#xd7; 180&#xa0;nm &#xd7; 400&#xa0;nm and was able to capture the rapid motion and morphological changes of mitochondria within cells, including mitochondrial fusion, fission, and dynamic tubulation, at a rate of up to 50&#xa0;Hz. In fixed-cell imaging, F-VCD significantly improved the spatial resolution and contrast, and reduced axial artifacts, enabling clear visualization of organelle structures such as mitochondria and the endoplasmic reticulum. Specifically, F-VCD improved the axial resolution from approximately 400&#xa0;nm to approximately 320&#xa0;nm and achieved a 2-fold increase in lateral and a 1.5-fold increase in axial resolution.</p>
<p>HyLFM-Net is different from the above methods. Instead of using a U-Net, HyLFM-Net (<xref ref-type="bibr" rid="B59">Wagner et al., 2021</xref>) consists of a series of residual blocks (<xref ref-type="bibr" rid="B22">Kaiming et al., 2016</xref>) and transposed convolutions. It converts the multi-channel 2D image to the axial spatial dimension after applying 2D residual blocks and transposed convolutions, resulting in a 3D image. This 3D image undergoes further processing through 3D residual blocks and is upsampled by transposed convolutions to ultimately obtain the reconstructed 3D volume. In the dynamic imaging of the 8-day-old zebrafish (<italic>O. latipes</italic>) embryonic heart, HyLFM-Net successfully imaged the dynamic of the zebrafish heart within a field of view of 350 &#xd7; 300 &#xd7; 150 <inline-formula id="inf36">
<mml:math id="m36">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3bc;</mml:mi>
<mml:msup>
<mml:mi>m</mml:mi>
<mml:mn>3</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> at a volume imaging speed of 40&#x2013;100&#xa0;Hz, with significant improvements in spatial resolution and image quality, and can achieve a 3D volume inference speed of 26.7&#xa0;Hz on a consumer-grade GPU, at least 1,000 times faster than conventional LFD.</p>
<p>The utilization of entirely deep learning-based approaches holds the potential to significantly reduce the presence of mosaic-like artifacts in the vicinity of the focal plane, a prevalent occurrence in LFD (<xref ref-type="bibr" rid="B44">Prevedel et al., 2014</xref>) and can accurately recover the signal even when the SNR of the raw image is low (<xref ref-type="bibr" rid="B57">Vizcaino et al., 2021a</xref>; <xref ref-type="bibr" rid="B63">Wang et al., 2021</xref>; <xref ref-type="bibr" rid="B70">Yi et al., 2023</xref>; <xref ref-type="bibr" rid="B59">Wagner et al., 2021</xref>). However, this approach has some drawbacks because the network simultaneously improves the resolution and spatial-angular of light field, which may increase the training workload and lead to structure missing and image artifacts. To improve this problem, it is necessary to improve the adaptability of the network structure and loss function to achieve satisfactory prediction results.</p>
</sec>
<sec id="s3-2">
<title>3.2 Type II: Deep learning enhanced raw light field image with numerical inversion volumetric reconstruction</title>
<p>Scanning LFM (sLFM) system (<xref ref-type="bibr" rid="B39">Morozov et al., 2002</xref>; <xref ref-type="bibr" rid="B68">Wu et al., 2021</xref>) improves the raw light field image quality through physical scanning to collect the 4D spatial-angular light distribution at near-diffraction-limited. However, sLFM usually requires a certain amount of time to scan the sample when acquiring the light field, and this spatial resolution improvement comes at the sacrifice of temporal resolution. To compensate for this deficiency, VsLFM (<xref ref-type="bibr" rid="B35">Lu et al., 2023</xref>) optimizes the scanning process using a deep learning model based on DAOSLIMIT (<xref ref-type="bibr" rid="B68">Wu et al., 2021</xref>).</p>
<p>After the light field&#x2019;s resolution is increased, VsLFM is a typical network that is used for reconstruction. This process primarily uses deep learning to improve the raw light field image&#x2019;s resolution. The subsequent reconstruction process necessitates the use of physical iterations to convert sample images from various angular views into volumetric images. To simulate the scanning process, VsLFM utilizes a supervised learning network (Vs-Net) to extract, interact, fuse and upsample spatial angle features. In network training, phase-dependent low-resolution angular data is used to learn physical priori relationships, and the high-resolution angular measurements produced by the sLFM are used as anticipated output. To rebuild 3D high-resolution volumes, iterative tomography is finally applied utilizing DAO on several angular views that are acquired by Vs-Net (<xref ref-type="fig" rid="F5">Figure 5A</xref>). The subsequent reconstruction process requires the use of physical iterations to reconstruct the sample images from different viewpoints into volumetric images (<xref ref-type="fig" rid="F5">Figure 5A</xref>). It is the improved resolution of the raw light field image that makes the imaging effect of VsLFM higher than that of the fully deep learning-based methods, especially in the localized details of specific depths where the image quality of VsLFM is close to ground truth.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Deep learning enhanced raw light field image with numerical inversion volumetric reconstruction. <bold>(A)</bold> VsLFM schematic diagram. <bold>(B)</bold> Enhanced sections extracted from xy cross-sections at z &#x3d; 1 <inline-formula id="inf37">
<mml:math id="m37">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3bc;</mml:mi>
<mml:mi mathvariant="bold-italic">m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> of a static featuring membrane marking, captured using LFM, VCD-Net, HyLFM-Net, VsLFM, and sLFM techniques individually. Scale bars, 10 <inline-formula id="inf38">
<mml:math id="m38">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3bc;</mml:mi>
<mml:mi mathvariant="bold-italic">m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. [a and b modified from (<xref ref-type="bibr" rid="B35">Lu et al., 2023</xref>)].</p>
</caption>
<graphic xlink:href="fbioe-12-1500270-g005.tif"/>
</fig>
<p>VsLFM outperforms other methods such as LFM, VCD-Net, and HyLFM-Net on maximum intensity projection (MIP) images. VsLFM is able to obtain better resolution and contrast and performs well on both cell membrane-labeled and mitochondria-labeled samples (<xref ref-type="fig" rid="F5">Figure 5B</xref>). In the numerical simulation of the synthesized 3D tubulins structure, the SNR of VsLFM in the spatial-angular domain is improved by about 15&#xa0;dB and the SSIM is improved by 0.12. However, the increase in resolution involves physical iterations that can lead to time-consuming and poor reproducibility of the results. To address this issue, VsLFM has improved HyLFM-Net into HyLFM-A-Net to replace the physical iteration process with a deep neural network, which reduces the reconstruction of the whole process from 1,200 s to 11&#xa0;s. The combination of Vs-Net and HyLFM-A-Net results in a deep neural network for mapping LF images to volumetric images. In contrast to the fully deep learning-based method, this approach is equivalent to tuning the network during the training process, so the complexity and redundancy of the model are much higher.</p>
</sec>
<sec id="s3-3">
<title>3.3 Type III: Numerical inversion volumetric reconstruction with deep learning enhanced volumetric data</title>
<p>It is widely acknowledged that conventional numerical iterative algorithms are unable to produce satisfactory reconstructions due to the presence of redundancy in the majority of light field datasets. Consequently, researchers are faced with the challenge of fully utilizing the redundancy (<xref ref-type="bibr" rid="B8">Dansereau et al., 2015</xref>; <xref ref-type="bibr" rid="B19">Jayaweera et al., 2020</xref>; <xref ref-type="bibr" rid="B37">Mihara et al., 2016</xref>). Moreover, real optical physics has many deviations from the model so the corresponding errors propagate during the reconstruction process can cause image noise, blurring, and artifacts. To obtain higher resolution light field reconstruction, the results are usually further post-processed after reconstruction by conventional iterative methods. It is on this premise that the deep learning-based post-processing networks CARE (<xref ref-type="bibr" rid="B64">Weigert et al., 2018</xref>) (<xref ref-type="fig" rid="F6">Figure 6A</xref>), DFGAN and DFCAN (<xref ref-type="bibr" rid="B45">Qiao et al., 2021</xref>) (<xref ref-type="fig" rid="F6">Figure 6B</xref>), have also been applied to LFM.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Numerical inversion volumetric reconstruction with deep learning enhanced volumetric data. <bold>(A)</bold> CARE architecture (left) and the imaging result (right). Scale bars, 50 <inline-formula id="inf39">
<mml:math id="m39">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3bc;</mml:mi>
<mml:mi mathvariant="bold-italic">m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. [modified from (<xref ref-type="bibr" rid="B64">Weigert et al., 2018</xref>)] <bold>(B)</bold> DFCAN and DFGAN architecture (left), and the imaging result (right). Scale bars, 3 <inline-formula id="inf40">
<mml:math id="m40">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3bc;</mml:mi>
<mml:mi mathvariant="bold-italic">m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (upper row), 1 <inline-formula id="inf41">
<mml:math id="m41">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3bc;</mml:mi>
<mml:mi mathvariant="bold-italic">m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (lower row). [modified from (<xref ref-type="bibr" rid="B45">Qiao et al., 2021</xref>)].</p>
</caption>
<graphic xlink:href="fbioe-12-1500270-g006.tif"/>
</fig>
<p>CARE (Content-Aware Image Restoration) is a proposed method for LFM that utilizes machine learning techniques to enhance the quality of the acquired images. The primary objective of CARE is to develop a residual version of U-Net and train the network with a loss function mean square error (MSE). The CARE network can significantly improve the accuracy of cell nucleus segmentation with reduction in illumination dose, and has improved obvious segmentation accuracy (SEG) score from 0.47 in the original image to 0.65 in the CARE restored image. By leveraging machine-learned image computation, CARE networks can significantly improve image quality, making it easier to analyze biological samples.</p>
<p>After CARE was proposed, networks for single-image super-resolution (SISR) have also been proposed, and the most representative of these networks are DFCAN and DFGAN. DFCAN consists of convolutional layers, and DFCAN is a deeper DFCAN, which consists of convolutional layers, residual groups, Fourier channel attention blocks, skip connections, and activation functions such as GELU. The DFGAN network is derived from a conditional generative adversarial network (cGAN (<xref ref-type="bibr" rid="B38">Mirza, 2014</xref>)) framework applied to the DFCAN network. The generative model G of DFGAN is DFCAN, which mainly learns data distribution and image transformation. The discriminative model D is constructed based on the conventional CNN architecture, which consists of a convolutional layer activated by LeakyReLU and a fully connected layer activated by a sigmoid activation function. DFCAN and DFGAN validated the structures of clathrin-coated pits (CCPs), microtubules (MTs), and F-actin, and achieved good super-resolution reconstruction performance. Among them, in the case of 3-fold magnification, the quality of the reconstructed images is very close to the real super-resolution images, with the normalized root-mean-square error (NRMSE) below 0.1 on average. In addition, for the endoplasmic reticulum (ER) structure, due to the obvious aggregation caused by chemical fixation, the authors adopted real-time imaging and also obtained satisfactory reconstruction results.</p>
<p>This type of network can also be applied in other microscopy in fluorescent imaging, but it has certain drawbacks. For example, its performance could be compromised when handling samples with extremely complicated structures. In addition, widespread application in practical experiments may be limited by high-fidelity super-resolution information, especially when the network is applied in sample that contains structure absent from the training set. Moreover, this network is not ideal for intensity-based quantification, such as fluorescent substance counting, and cannot be used for all current image restoration challenges due to its nonlinear neural network prediction nature.</p>
<p>To better compare all the mentioned networks, we have summarized their structure and the performance of them in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Comparison of different networks.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Category</th>
<th align="center">Network</th>
<th align="center">Architecture</th>
<th align="center">Metric</th>
<th align="center">Running time</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="4" align="center">Full deep learning-based method</td>
<td align="center">LFMNET (<xref ref-type="bibr" rid="B57">Vizcaino et al., 2021a</xref>)</td>
<td align="center">Conv4d (<xref ref-type="bibr" rid="B7">Choy et al., 2019</xref>)<break/>And U-Net (<xref ref-type="bibr" rid="B5">Ounkomol et al., 2018</xref>)</td>
<td align="left">Error with Confocal: 0.086&#xa0;&#x3bc;m (test on vessels)</td>
<td align="center">50&#xa0;ms</td>
</tr>
<tr>
<td align="center">VCD-NET (<xref ref-type="bibr" rid="B63">Wang et al., 2021</xref>)</td>
<td align="center">PixelShuffle (<xref ref-type="bibr" rid="B51">Shi et al., 2016</xref>)<break/>And U-Net (<xref ref-type="bibr" rid="B5">Ounkomol et al., 2018</xref>)</td>
<td align="left">uniform average resolutions<break/>1.1&#xa0;&#x3bc;m (x,y)<break/>3.0&#xa0;&#x3bc;m (z)<break/>(test on isovolumetric subdiffraction fluorescent beads distributed in a hydrogel)</td>
<td align="center">5&#xa0;ms (test on <italic>Caenorhabditis elegans</italic> and the blood flow of beating zebrafish hearts)</td>
</tr>
<tr>
<td align="center">F-VCD (<xref ref-type="bibr" rid="B70">Yi et al., 2023</xref>)</td>
<td align="center">RCAN (<xref ref-type="bibr" rid="B6">Chen et al., 2021</xref>)<break/>And VCD-NET (<xref ref-type="bibr" rid="B63">Wang et al., 2021</xref>)</td>
<td align="left">Axial resolution than VCD-Net: 2-fold<break/>Lateral resolution: 1.5-fold</td>
<td align="center">20&#xa0;ms (test on mitochondrial)</td>
</tr>
<tr>
<td align="center">HyLFM-Net (<xref ref-type="bibr" rid="B59">Wagner et al., 2021</xref>)</td>
<td align="center">2D residual blocks (<xref ref-type="bibr" rid="B22">Kaiming et al., 2016</xref>)<break/>And 3D residual blocks (<xref ref-type="bibr" rid="B22">Kaiming et al., 2016</xref>)</td>
<td align="left">MS-SSIM<break/>0.982 &#xb1; 0.002 (test on beads)<break/>0.91 &#xb1; 0.02 (test on static)<break/>0.78 &#xb1; 0.04 (test on dynamic) 0.90 &#xb1; 0.02<break/>(test on brain)</td>
<td align="center">10&#xa0;ms (test on medaka heart dynamics and zebrafish neural activity)</td>
</tr>
<tr>
<td align="center">Deep learning enhanced raw light field image with numerical inversion volumetric reconstruction</td>
<td align="center">VsLFM (<xref ref-type="bibr" rid="B35">Lu et al., 2023</xref>)</td>
<td align="center">convolutional layers<break/>Leakey ReLU<break/>And PixelShuffle (<xref ref-type="bibr" rid="B51">Shi et al., 2016</xref>)</td>
<td align="left">spatial-angular domain<break/>SNR &#x223c;30&#xa0;dB<break/>SSIM &#x223c;0.95<break/>Reconstructed<break/>SNR &#x223c;2.5&#xa0;dB<break/>SSIM &#x223c;0.6 (test on 3D tubulins structure)</td>
<td align="center">1,200&#xa0;s</td>
</tr>
<tr>
<td rowspan="2" align="center">Numerical inversion volumetric reconstruction with deep learning enhanced volumetric data</td>
<td align="center">CARE (<xref ref-type="bibr" rid="B64">Weigert et al., 2018</xref>)</td>
<td align="left">Residual version of U-Net (<xref ref-type="bibr" rid="B22">Kaiming et al., 2016</xref>; <xref ref-type="bibr" rid="B5">Ounkomol et al., 2018</xref>)</td>
<td align="left">SEG score: from 0.47 to 0.65 (test on <italic>Tribolium castaneum</italic>)</td>
<td align="left"/>
</tr>
<tr>
<td align="center">DFCAN and DFGAN (<xref ref-type="bibr" rid="B45">Qiao et al., 2021</xref>)</td>
<td align="center">Fourier channel attention (<xref ref-type="bibr" rid="B6">Chen et al., 2021</xref>), convolutional layers<break/>And PixelShuffle (<xref ref-type="bibr" rid="B51">Shi et al., 2016</xref>) (DFCAN and DFGAN)<break/>And cGAN (<xref ref-type="bibr" rid="B38">Mirza, 2014</xref>) (DFGAN)</td>
<td align="left">NRMSE: 0.0593<break/>MS-SSIM: 0.8665<break/>Resolution: 139&#xa0;nm (test on low-fluorescence average photon count 120 DFCAN)<break/>NRMSE:0.0586<break/>MS-SSIM:0.8680<break/>Resolution:97&#xa0;nm (test on low-fluorescence average photon count 120 DFGAN)</td>
<td align="left"/>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s4">
<title>4 Challenges and opportunities</title>
<p>The existing reconstruction methods for LFM using deep learning are facing multiple challenges. Firstly, the current approaches are centered on predicting single frames, necessitating a refinement in ensuring accurate predictions for consecutive frames. Nonetheless, enhancing the forecast precision for continuous frames inevitably translates to an increase in computational burden, thereby escalating the requisites for advanced computational resources. Secondly, the scarcity of available LFM datasets poses a hindrance to fully harnessing these resources for achieving optimal outcomes. Lastly, the enigmatic nature of deep learning models presents a hurdle in enhancing the intelligibility of these intricate models.</p>
<sec id="s4-1">
<title>4.1 Accuracy in predicting temporal information</title>
<p>Utilizing cutting-edge techniques in real-time data forecasting, researchers can accurately monitor the dynamic behaviors of numerous cells with precision in both the spatial and temporal domains, thereby enhancing comprehension of neuronal population activities (<xref ref-type="bibr" rid="B44">Prevedel et al., 2014</xref>; <xref ref-type="bibr" rid="B17">Hornik et al., 1989</xref>). Nonetheless, when applying deep learning methodologies to LFM, the conventional practice involves individual frame prediction, leading to potential inconsistencies in temporal coherence and the presence of artifacts over time intervals. To address this challenge effectively, it is crucial to explicitly account for temporal dynamics during data reconstruction by incorporating time-resolved data. While a straightforward approach involves treating time as an extra dimension within CNNs, such a method may not be viable for extensive networks managing prolonged correlations. An alternative and more efficient solution are to merge CNNs with advanced recurrent neural networks like convLSTM (<xref ref-type="bibr" rid="B75">Zhang and Zhang, 2024</xref>) and convGRU (<xref ref-type="bibr" rid="B25">Lagemann et al., 2021</xref>) architectures, which are specifically tailored for sequence prediction tasks. However, this integrated approach may demand more sophisticated hardware resources to ensure streamlined execution.</p>
</sec>
<sec id="s4-2">
<title>4.2 Hardware requirement</title>
<p>In the realm of deep learning for applications like LFM, the necessity for customized software frameworks to facilitate the manipulation and analysis of intricate neural networks is evident. A pivotal consideration in these advancements pertains to the evolving hardware prerequisites. As we all known, specialized graphics processors (GPUs) for training deep learning models underscores the criticality of hardware in expediting computational processes. The migration towards GPU utilization over conventional central processing units (CPUs) is essential for substantial gains in training speed, significantly reducing the training duration. This transition not only accelerates the pace of model refinement but also addresses the cost constraints associated with sophisticated hardware requirements. Looking ahead, the symbiotic relationship between software innovation and hardware optimization remains fundamental in shaping the trajectory of deep learning applications, paving the way for enhanced efficiencies and broader accessibility across research domains.</p>
</sec>
<sec id="s4-3">
<title>4.3 Better network structures and training strategies can reduce the need for datasets</title>
<p>Deep learning&#x2019;s effectiveness is vitally dependent on the availability of training data. Inadequate training data will result in poor performance. However, a prevalent misperception is that deep learning requires an enormous amount of training samples. For example, VsLFM (<xref ref-type="bibr" rid="B35">Lu et al., 2023</xref>) training typically uses 5,000 paired spatial-angular patches, and VCD-Net (<xref ref-type="bibr" rid="B63">Wang et al., 2021</xref>) trains using 4,580 pairs of image patches, each with a light field image (176 &#xd7; 176 pixels) and a volume (176 &#xd7; 176 &#xd7; 51 pixels). However, LFMNet (<xref ref-type="bibr" rid="B57">Vizcaino et al., 2021a</xref>) required 362 high-resolution images (1,287 &#xd7; 1,287 pixels), whereas based on the U-Net architecture (<xref ref-type="bibr" rid="B5">Ounkomol et al., 2018</xref>) only used 40 images (1,500 &#xd7; 1,500 pixels). It can be observed that different network architectures and training strategies can significantly reduce the size of the training dataset. The quality of the data and its relevance to the situation are likely more crucial. In order to advance further, LFM necessitates innovative experimental and computational approaches for the production of an increased quantity and quality of training data.</p>
</sec>
<sec id="s4-4">
<title>4.4 Strategies for obtaining a light field dataset and leveraging existing training data to enhance the dataset</title>
<p>Various strategies can be explored to acquire a comprehensive light field dataset. One avenue involves conducting specialized experiments tailored to capture the requisite images for training purposes. For instance, employing confocal microscopy and light field microscopy in tandem to capture pairs of high-quality volumetric images along with corresponding light field data from a stationary cell location can help validate LFM image reconstruction algorithm. Additionally, leveraging an in-depth understanding of the underlying physics governing light field propagation enables the utilization of forward model simulations to generate authentic images (<xref ref-type="bibr" rid="B64">Weigert et al., 2018</xref>; <xref ref-type="bibr" rid="B40">Nehme et al., 2018</xref>). Furthermore, the integration of neural networks presents a promising approach to dataset creation. Recent endeavors have focused on the development of cell generation models through adversarial generative techniques (<xref ref-type="bibr" rid="B43">Osokin et al., 2017</xref>; <xref ref-type="bibr" rid="B15">Goldsborough et al., 2017</xref>; <xref ref-type="bibr" rid="B72">Yuan et al., 2019</xref>), leading to the generation of synthetic images that can subsequently contribute to training reconstruction algorithms. Moreover, conventional approaches, such as data augmentation, present a feasible tactic for enriching datasets by creating diverse variations of existing images. This process involves employing methodologies like rotation, scaling, and manipulation of lighting conditions to expand the range of training samples available for model learning. An alternative efficacious approach involves the utilization of transfer learning (<xref ref-type="bibr" rid="B73">Zeiler and Fergus, 2014</xref>). By transferring knowledge from pre-trained models to new tasks, transfer learning enables the efficient utilization of learned features and representations, thereby enhancing the generalization capability and performance of the neural network on specific tasks. These techniques, rooted in the diversification of data and the strategic reuse of network knowledge, play pivotal roles in advancing the efficacy and adaptability of deep learning models across various domains and applications. By pretraining networks on extensive datasets sourced from different domains, transfer learning expedites convergence and enhances the generalization capabilities of the models (<xref ref-type="bibr" rid="B65">Weiss et al., 2016</xref>). This multifaceted approach holds significant promise for enriching the light field dataset and maximizing its efficacy in a research context.</p>
</sec>
<sec id="s4-5">
<title>4.5 Explainable/interpreting the deep neural network</title>
<p>The challenge of model interpretability emerges as a critical issue given the inherently opaque and enigmatic nature of deep neural networks (<xref ref-type="bibr" rid="B76">Zhang et al., 2018</xref>). To establish deep learning as a reliable component within LFM-based processes, it is important to explore the integration of conceptual frameworks and interactive graphical tools to elucidate the underlying rationale behind generating specific outcomes. Encouragingly, the field of computer vision has witnessed advancements in enhancing the interpretability of deep learning through various methodologies. These include delving into the essential components of input images for accurate predictions, scrutinizing the function of intermediate layers, analyzing the contributions of different module through ablation studies, constructing hierarchical explanatory graphs spanning across layers, and designing network architectures that prioritize interpretability. The adaptation of these techniques to the domain of light field imaging is deemed essential, calling for the development of specialized tools tailored to facilitate the interpretation of results. One potential avenue involves the creation of tools explicitly designed to explicate the rationale behind predicted outcomes, thereby fostering transparency and comprehension in the intricate mechanisms governing deep neural networks in LFM contexts.</p>
</sec>
<sec id="s4-6">
<title>4.6 Outlook for deep learning to microscale light field image reconstruction</title>
<p>Deep learning-based LFM is still in its nascent stage, but significant advancements have been achieved in leveraging deep learning techniques for this purpose. As we look forward, the future of deep learning-based LFM reconstruction may further highlight the utilization of expansive and high-quality big data sets to facilitate various forms of learning paradigms such as supervised, weakly supervised, self-supervised, or unsupervised learning. To promote broader adoption and enhancement of existing tools, as well as the development of novel ones, it is critical to create extensive datasets to meet the image analysis requirements of the broader life sciences data. These datasets should be publicly accessible, assisting skilled machine learning researchers to tackle biological challenges. It is evident that there are numerous unexplored applications awaiting discovery in this domain. It is hence advisable to simultaneously push forward the tool development and biological prediction processes, given that deep learning fundamentally thrives on data analysis.</p>
</sec>
</sec>
</body>
<back>
<sec sec-type="author-contributions" id="s5">
<title>Author contributions</title>
<p>BL: Conceptualization, Formal Analysis, Methodology, Visualization, Writing&#x2013;original draft, Writing&#x2013;review and editing. YT: Writing&#x2013;original draft, Writing&#x2013;review and editing. YZ: Writing&#x2013;original draft, Writing&#x2013;review and editing. ZZ: Funding acquisition, Project administration, Visualization, Writing&#x2013;original draft, Writing&#x2013;review and editing. DW: Conceptualization, Funding acquisition, Project administration, Supervision, Writing&#x2013;original draft, Writing&#x2013;review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s6">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. This work was funded by Zhejiang Provincial Medical and Health Technology Project (Grant No. 2024KY246), the Fundamental Research Funds for the Central Universities (NO. NS2023007), the Key Laboratory of Intake and Exhaust Technology, Ministry of Education (CEPE2024015).</p>
</sec>
<sec sec-type="COI-statement" id="s7">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s8">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alain</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Aenchbacher</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Smolic</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Interactive light field tilt-shift refocus with generalized shift-and-sum</article-title>. <source>arXiv preprint</source>. <comment>arXiv:1910.04699</comment>.</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alhashim</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>High quality monocular depth estimation via transfer learning</article-title>. <source>arXiv preprint</source>. <comment>arXiv:1812.11941</comment>.</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Broxton</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Grosenick</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cohen</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Andalman</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Deisseroth</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> <article-title>Wave optics theory and 3-D deconvolution for the light field microscope</article-title>, <source>Opt. Exp.</source> <volume>21</volume> (<issue>21</issue>), <fpage>25418</fpage>&#x2013;<lpage>25439</lpage>. <pub-id pub-id-type="doi">10.1364/OE.21.025418</pub-id>
<year>2013</year>).</citation>
</ref>
<ref id="B6">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Sasaki</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Lai</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Three-dimensional residual channel attention networks denoise and sharpen fluorescence microscopy image volumes</article-title>. <source>Nat. Methods</source> <volume>18</volume> (<issue>6</issue>), <fpage>678</fpage>&#x2013;<lpage>687</lpage>.</citation>
</ref>
<ref id="B7">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Choy</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Gwak</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Savarese</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>4d spatio-temporal convnets: Minkowski convolutional neural networks</article-title>. <source>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</source>, <fpage>3075</fpage>&#x2013;<lpage>3084</lpage>.</citation>
</ref>
<ref id="B8">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Dansereau</surname>
<given-names>D. G.</given-names>
</name>
<name>
<surname>Pizarro</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Williams</surname>
<given-names>S. B.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Linear volumetric focus for light field cameras</article-title>. <source>ACM Trans. Graph.</source> <volume>34</volume> (<issue>2</issue>), <fpage>15-1</fpage>.</citation>
</ref>
<ref id="B9">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Dansereau</surname>
<given-names>D. G.</given-names>
</name>
<name>
<surname>Schuster</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Ford</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wetzstein</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>A wide-field-of-view monocentric light field camera</article-title>,&#x201d; <source>Proceedings of the IEEE conference on computer vision and pattern recognition</source>, <fpage>5048</fpage>&#x2013;<lpage>5057</lpage>.</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dosovitskiy</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>An image is worth 16x16 words: Transformers for image recognition at scale</article-title>. <source>arXiv preprint</source>. <comment>arXiv:2010.11929</comment>.</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Drozdova</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kinakh</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Bait</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Taran</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Lastufka</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Dessauges-Zavadsky</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Radio-astronomical image reconstruction with a conditional denoising diffusion model</article-title>. <source>Astronomy and Astrophysics</source> <volume>683</volume>, <fpage>A105</fpage>.</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Erisen</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>SERNet-former: semantic segmentation by efficient residual network with attention-boosting gates and attention-fusion networks</article-title>. <source>arXiv preprint</source>. <comment>arXiv:2401.15741</comment>.</citation>
</ref>
<ref id="B12">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Fan</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Yue</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Trilobite-inspired neural nanophotonic light-field camera with extreme depth-of-field</article-title>. <source>Nat. Comm.</source>, <volume>13</volume> (<issue>1</issue>), <fpage>2130</fpage>.</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Foret</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Kleiner</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mobahi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Neyshabur</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Sharpness-aware minimization for efficiently improving generalization</article-title>. <source>arXiv preprint</source>. <comment>arXiv:2010.01412</comment>.</citation>
</ref>
<ref id="B1">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Gershun</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>1939</year>). <article-title>The light field</article-title>. <source>J. Math. Phys.</source>, <volume>18</volume> (<issue>1&#x2013;4</issue>), <fpage>51</fpage>&#x2013;<lpage>151</lpage>.</citation>
</ref>
<ref id="B14">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Godard</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Mac Aodha</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Brostow</surname>
<given-names>G. J.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Unsupervised monocular depth estimation with left-right consistency</article-title>,&#x201d; in <source>Proceedings of the IEEE conference on computer vision and pattern recognition</source>, <fpage>270</fpage>&#x2013;<lpage>279</lpage>.</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Goldsborough</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Pawlowski</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Caicedo</surname>
<given-names>J. C.</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Carpenter</surname>
<given-names>A. E.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>CytoGAN: generative modeling of cell images</article-title>. <source>BioRxiv</source>, <fpage>227645</fpage>.</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Dai</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Cascade cost volume for high-resolution multi-view stereo and stereo matching</article-title>,&#x201d; in <source>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</source>, <fpage>2495</fpage>&#x2013;<lpage>2504</lpage>.</citation>
</ref>
<ref id="B17">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Hornik</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Stinchcombe</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>White</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>1989</year>). <article-title>Multilayer feedforward networks are universal approximators</article-title>. <source>Neural Netw.</source> <volume>2</volume> (<issue>5</issue>), <fpage>359</fpage>&#x2013;<lpage>366</lpage>.</citation>
</ref>
<ref id="B18">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Spectral clustering super-resolution imaging based on multispectral camera array</article-title>. <source>IEEE Transactions on Image Processing</source> <volume>32</volume>, <fpage>1257</fpage>&#x2013;<lpage>1271</lpage>.</citation>
</ref>
<ref id="B19">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Jayaweera</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Edussooriya</surname>
<given-names>C. U.</given-names>
</name>
<name>
<surname>Wijenayake</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Agathoklis</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Bruton</surname>
<given-names>L. T.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Multi-volumetric refocusing of light fields</article-title>. <source>IEEE Signal Processing Letters</source> <volume>28</volume> <fpage>31</fpage>&#x2013;<lpage>35</lpage>.</citation>
</ref>
<ref id="B20">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Johnson</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Alahi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Fei-Fei</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Perceptual losses for real-time style transfer and super-resolution</article-title>,&#x201d; in <conf-name>Computer vision&#x2013;ECCV 2016: 14th European conference</conf-name>, <conf-loc>Amsterdam, Netherlands</conf-loc>, <conf-date>October 11&#x2013;14, 2016</conf-date> (<publisher-name>Springer</publisher-name>), <fpage>694</fpage>&#x2013;<lpage>711</lpage>.</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kaiming</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Xiangyu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Shaoqing</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Jian</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Deep residual learning for image recognition</article-title>,&#x201d; in <article-title>Proceedings of the IEEE conference on computer vision and pattern recognition</article-title>. <volume>Vol. 34</volume>, <fpage>770</fpage>&#x2013;<lpage>778</lpage>.</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kim</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Lai</surname>
<given-names>C. H.</given-names>
</name>
<name>
<surname>Liao</surname>
<given-names>W. H.</given-names>
</name>
<name>
<surname>Murata</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Takida</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Uesaka</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Consistency trajectory models: learning probability flow ode trajectory of diffusion</article-title>. <source>arXiv preprint</source>. <comment>arXiv:2310.02279</comment>
</citation>
</ref>
<ref id="B21">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Kim</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jeong</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>C. K.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Hong</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>F-number matching method in light field microscopy using an elastic micro lens array</article-title>. <source>Opt. Lett.</source>, <volume>41</volume> (<issue>12</issue>), <fpage>2751</fpage>&#x2013;<lpage>2754</lpage>.</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kinga</surname>
<given-names>D</given-names>
</name>
<name>
<surname>Adam</surname>
<given-names>J. B.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>A method for stochastic optimization</article-title>. <source>International conference on learning representations (ICLR)</source> <volume>5</volume>, <fpage>6</fpage>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lagemann</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Lagemann</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Mukherjee</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Schr&#xf6;der</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Deep recurrent optical flow learning for particle image velocimetry data</article-title>. <source>Nat. Mach. Intell.</source> <volume>3</volume> (<issue>7</issue>), <fpage>641</fpage>&#x2013;<lpage>651</lpage>. <pub-id pub-id-type="doi">10.1038/s42256-021-00369-0</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>LeCun</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Bengio</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hinton</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Deep learning</article-title>. <source>Nature</source> <volume>521</volume> (<issue>7553</issue>), <fpage>436</fpage>&#x2013;<lpage>444</lpage>. <pub-id pub-id-type="doi">10.1038/nature14539</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Ledig</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Theis</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Husz&#xe1;r</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Caballero</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cunningham</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Acosta</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). &#x201c;<article-title>Photo-realistic single image super-resolution using a generative adversarial network</article-title>,&#x201d; in <source>Proceedings of the IEEE conference on computer vision and pattern recognition</source>, <fpage>4681</fpage>&#x2013;<lpage>4690</lpage>.</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Levoy</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hanrahan</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>Light field rendering</article-title>,&#x201d; in <source>Seminal Graphics Papers: Pushing the Boundaries</source>. <volume>Vol.2</volume>, <fpage>441</fpage>&#x2013;<lpage>452</lpage>.</citation>
</ref>
<ref id="B29">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Levoy</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ng</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Adams</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Footer</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Horowitz</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2006</year>). &#x201c;<article-title>Light field microscopy</article-title>&#x201d;, in <source>Acm siggraph 2006 papers</source>, <fpage>924</fpage>&#x2013;<lpage>934</lpage>.</citation>
</ref>
<ref id="B30">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kaneko</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Fukushima</surname>
<given-names>E. F.</given-names>
</name>
</person-group> (<year>2014</year>). &#x201c;<article-title>Proposal of terrain mapping under extreme light conditions using direct stereo matching methods</article-title>,&#x201d; in <source>2014 IEEE/SICE International Symposium on System Integration</source>, <fpage>153</fpage>&#x2013;<lpage>158</lpage>.</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Fang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>M. H.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Universal style transfer via feature transforms</article-title>. <source>Advances in neural information processing systems</source> <volume>30</volume>.</citation>
</ref>
<ref id="B32">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Liang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Van Gool</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Timofte</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Swinir: image restoration using swin transformer</article-title>&#x201d;, in <source>Proceedings of the IEEE/CVF international conference on computer vision</source>, <fpage>1833</fpage>&#x2013;<lpage>1844</lpage>.</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Dai</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Camera array based light field microscopy</article-title>. <source>Biomed. Opt. Exp.</source> <volume>6</volume> (<issue>9</issue>), <fpage>3179</fpage>&#x2013;<lpage>3189</lpage>. <pub-id pub-id-type="doi">10.1364/boe.6.003179</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>Y. W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Q. H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Continuous optical zoom microscope with extended depth of field and 3D reconstruction</article-title>. <source>PhotoniX</source>, <volume>3</volume> (<issue>1</issue>), <fpage>20</fpage>.</citation>
</ref>
<ref id="B35">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Lu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yue</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Virtual-scanning light-field microscopy for robust snapshot high-resolution volumetric imaging</article-title>. <source>Nat. Methods</source> <volume>20</volume> (<issue>5</issue>), <fpage>735</fpage>&#x2013;<lpage>746</lpage>.</citation>
</ref>
<ref id="B36">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Mignard-Debise</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ihrke</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>Light-field microscopy with a consumer light-field camera</article-title>,&#x201d; <source>2015 International Conference on 3D Vision</source>, <volume>335-343</volume>.</citation>
</ref>
<ref id="B37">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Mihara</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Funatomi</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Tanaka</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Kubo</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Mukaigawa</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Nagahara</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>4D light field segmentation with spatial and angular consistencies</article-title>,&#x201d; in <source>2016 IEEE International Conference on Computational Photography (ICCP)</source>, <fpage>1</fpage>&#x2013;<lpage>8</lpage>.</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mirza</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Conditional generative adversarial nets</article-title>. <source>arXiv preprint</source>. <comment>arXiv:1411.1784</comment>.</citation>
</ref>
<ref id="B39">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Morozov</surname>
<given-names>O. G.</given-names>
</name>
<name>
<surname>Ovchinnikov</surname>
<given-names>D. L.</given-names>
</name>
<name>
<surname>Akhtiamov</surname>
<given-names>R. A.</given-names>
</name>
<name>
<surname>Zalyalov</surname>
<given-names>R. G.</given-names>
</name>
<name>
<surname>Il&#x2019;in</surname>
<given-names>G. L.</given-names>
</name>
<name>
<surname>Pol&#x27;ski</surname>
<given-names>Y. E.</given-names>
</name>
<etal/>
</person-group> (<year>2002</year>) &#x201c;<article-title>Two-frequency scanning LFM lidars: theory and applications</article-title>,&#x201d; in <source>Remote sensing of clouds and the atmosphere VI</source> <volume>Vol. 4539</volume> (<publisher-name>SPIE</publisher-name>), <fpage>158</fpage>&#x2013;<lpage>168</lpage>. <pub-id pub-id-type="doi">10.1117/12.454435</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Nehme</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Weiss</surname>
<given-names>L. E.</given-names>
</name>
<name>
<surname>Michaeli</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Shechtman</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Deep-STORM: super-resolution single-molecule microscopy by deep learning</article-title>. <source>Optica</source>, <volume>5</volume>, <fpage>458</fpage>&#x2013;<lpage>464</lpage>.</citation>
</ref>
<ref id="B41">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Ng</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Levoy</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Br&#xe9;dif</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Duval</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Horowitz</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hanrahan</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Light field photography with a hand-held plenoptic camera</article-title>. <publisher-name>Stanford university</publisher-name>.</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Orth</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Crozier</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Microscopy with microlens arrays: high throughput, high resolution and light-field imaging</article-title>, <source>Opt. Express</source> <volume>20</volume> (<issue>12</issue>), <fpage>13522</fpage>&#x2013;<lpage>13531</lpage>. <pub-id pub-id-type="doi">10.1364/OE.20.013522</pub-id>
<year>2012</year>).</citation>
</ref>
<ref id="B43">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Osokin</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Chessel</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Carazo Salas</surname>
<given-names>R. E.</given-names>
</name>
<name>
<surname>Vaggi</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>GANs for biological image synthesis</article-title>,&#x201d; in <source>Proceedings of the IEEE international conference on computer vision</source>, <fpage>2233</fpage>&#x2013;<lpage>2242</lpage>.</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ounkomol</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Seshamani</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Maleckar</surname>
<given-names>M. M.</given-names>
</name>
<name>
<surname>Collman</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Johnson</surname>
<given-names>G. R.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Label-free prediction of three-dimensional fluorescence images from transmitted-light microscopy</article-title>. <source>Nat. Methods</source> <volume>15</volume> (<issue>11</issue>), <fpage>917</fpage>&#x2013;<lpage>920</lpage>.</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Prevedel</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Yoon</surname>
<given-names>Y. G.</given-names>
</name>
<name>
<surname>Hoffmann</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Pak</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Wetzstein</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Kato</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>Simultaneous whole-animal 3D imaging of neuronal activity using light-field microscopy</article-title>, <source>Nat. Methods</source> <volume>11</volume> (<issue>7</issue>), <fpage>727</fpage>&#x2013;<lpage>730</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.2964</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Qiao</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Dai</surname>
<given-names>Q.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Evaluation and development of deep neural networks for image super-resolution in optical microscopy</article-title>. <source>Nat. Methods</source> <volume>18</volume> (<issue>2</issue>), <fpage>194</fpage>&#x2013;<lpage>202</lpage>.</citation>
</ref>
<ref id="B46">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Quan</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Homotopic gradients of generative density priors for MR image reconstruction</article-title>. <source>IEEE Trans. Med. Imaging</source> <volume>40</volume> (<issue>12</issue>), <fpage>3265</fpage>&#x2013;<lpage>3278</lpage>.</citation>
</ref>
<ref id="B47">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Richardson</surname>
<given-names>W. H.</given-names>
</name>
</person-group> (<year>1972</year>). <article-title>Bayesian-based iterative method of image restoration</article-title>. <source>JoSA</source> <volume>62</volume> (<issue>1</issue>), <fpage>55</fpage>&#x2013;<lpage>59</lpage>.</citation>
</ref>
<ref id="B48">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Rumelhart</surname>
<given-names>D. E.</given-names>
</name>
<name>
<surname>Hinton</surname>
<given-names>G. E.</given-names>
</name>
<name>
<surname>Williams</surname>
<given-names>R. J.</given-names>
</name>
</person-group> (<year>1986</year>) . <article-title>Learning representations by back-propagating errors</article-title>. <source>Nature</source> <volume>323</volume> (<issue>6088</issue>), <fpage>533</fpage>&#x2013;<lpage>536</lpage>.</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sadat</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Buhmann</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bradely</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Hilliges</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Weber</surname>
<given-names>R. M.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>CADS: Unleashing the diversity of diffusion models through condition-annealed sampling</article-title>. <source>arXiv preprint</source>. <comment>arXiv:2310.17347</comment>.</citation>
</ref>
<ref id="B50">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Schlemper</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Caballero</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hajnal</surname>
<given-names>J. V.</given-names>
</name>
<name>
<surname>Price</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Rueckert</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>A deep cascade of convolutional neural networks for MR image reconstruction</article-title>,&#x201d; in <conf-name>Information Processing in Medical Imaging: 25th International Conference, IPMI 2017</conf-name>, <conf-loc>Boone, NC</conf-loc>, <conf-date>June 25&#x2013;30, 2017</conf-date>, <fpage>647</fpage>&#x2013;<lpage>658</lpage>.</citation>
</ref>
<ref id="B51">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Shi</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Caballero</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Husz&#xe1;r</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Totz</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Aitken</surname>
<given-names>A. P.</given-names>
</name>
<name>
<surname>Bishop</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). &#x201c;<article-title>Real-time single image and video super-resolution using an efficient sub-pixel convolutional neural network</article-title>,&#x201d; in <source>Proceedings of the IEEE conference on computer vision and pattern recognition</source>, <fpage>1874</fpage>&#x2013;<lpage>1883</lpage>.</citation>
</ref>
<ref id="B53">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Srivastava</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sharma</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2024</year>). &#x201c;<article-title>Omnivec: Learning robust representations with cross modal sharing</article-title>,&#x201d; in <source>Proceedings of the IEEE/CVF winter conference on applications of computer vision</source>, <fpage>1236</fpage>&#x2013;<lpage>1248</lpage>.</citation>
</ref>
<ref id="B54">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Taguchi</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Agrawal</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ramalingam</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Veeraraghavan</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2010</year>). &#x201c; <article-title>Axial light field for curved mirrors: Reflect your perspective, widen your view</article-title>,&#x201d; in <source>2010 IEEE Computer Society Conference on Computer Vision and Pattern Recognition</source>, <fpage>499</fpage>&#x2013;<lpage>506</lpage>.</citation>
</ref>
<ref id="B56">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Tateno</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Navab</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Tombari</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c; <article-title>Distortion-aware convolutional filters for dense prediction in panoramic images</article-title>,&#x201d; in <source>Proceedings of the European conference on computer vision</source> (<publisher-loc>Munich, Germany</publisher-loc>: <publisher-name>ECCV</publisher-name>), <fpage>707</fpage>&#x2013;<lpage>722</lpage>.</citation>
</ref>
<ref id="B57">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Vizcaino</surname>
<given-names>J. P.</given-names>
</name>
<name>
<surname>Saltarin</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Belyaev</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lyck</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Lasser</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Favaro</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2021a</year>) <article-title>Learning to reconstruct confocal microscopy stacks from single light field images</article-title>. <source>IEEE transactions on computational imaging</source> <volume>7</volume>, <fpage>775</fpage>&#x2013;<lpage>788</lpage>.</citation>
</ref>
<ref id="B58">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Vizcaino</surname>
<given-names>J. P.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Symvoulidis</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Favaro</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Guner-Ataman</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Lasser</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2021b</year>). &#x201c;<article-title>Real-time light field 3D microscopy via sparsity-driven learned deconvolution</article-title>,&#x201d; in <source>2021 IEEE International Conference on Computational Photography (ICCP)</source>, <fpage>1</fpage>&#x2013;<lpage>11</lpage>.</citation>
</ref>
<ref id="B59">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wagner</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Beuttenmueller</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Norlin</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Gierten</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Boffi</surname>
<given-names>J. C.</given-names>
</name>
<name>
<surname>Wittbrodt</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Deep learning-enhanced light-field imaging with continuous validation</article-title>. <source>Nat. Methods</source> <volume>18</volume> (<issue>5</issue>), <fpage>557</fpage>&#x2013;<lpage>563</lpage>.</citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Walton</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Hassani</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Stylenat: giving each head a new perspective</article-title>. <source>arXiv preprint</source>. <comment>arXiv:2211.05770</comment>.</citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wan</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Wen</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Liao</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Old photo restoration via deep latent space translation</article-title>. <source>IEEE Transactions on Pattern Analysis and Machine Intelligence</source> <volume>45</volume> (<issue>2</issue>), <fpage>2071</fpage>&#x2013;<lpage>2087</lpage>.</citation>
</ref>
<ref id="B62">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bai</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>One-peace: Exploring one general representation model toward unlimited modalities</article-title>. <source>arXiv preprint</source>. <comment>arXiv:2305.11172</comment>.</citation>
</ref>
<ref id="B63">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Yi</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Real-time volumetric reconstruction of biological dynamics with light-field microscopy and deep learning</article-title>. <source>Nat. methods</source> <volume>18</volume> (<issue>5</issue>), <fpage>551</fpage>&#x2013;<lpage>556</lpage>. <pub-id pub-id-type="doi">10.48550/arXiv.2305.11172</pub-id>
</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Weigert</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Schmidt</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Boothe</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>M&#xfc;ller</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Dibrov</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Jain</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Content-aware image restoration: pushing the limits of fluorescence microscopy</article-title>, <source>Nat. Methods</source> <volume>15</volume> (<issue>12</issue>), <fpage>1090</fpage>&#x2013;<lpage>1097</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-018-0216-7</pub-id>
</citation>
</ref>
<ref id="B65">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Weiss</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Khoshgoftaar</surname>
<given-names>T. M.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>A survey of transfer learning</article-title>. <source>J. Big Data</source> <volume>3</volume>, <fpage>1</fpage>&#x2013;<lpage>40</lpage>.</citation>
</ref>
<ref id="B66">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Yuste</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>
<italic>In vivo</italic> imaging of neural activity</article-title>. <source>Nat. Meth.</source> <volume>14</volume> (<issue>4</issue>), <fpage>349</fpage>&#x2013;<lpage>359</lpage>.</citation>
</ref>
<ref id="B67">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Masia</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Jarabo</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Dai</surname>
<given-names>Q.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Light field image processing: An overview</article-title>. <source>IEEE Journal of Selected Topics in Signal Processing</source> <volume>11</volume> (<issue>7</issue>), <fpage>926</fpage>&#x2013;<lpage>954</lpage>.</citation>
</ref>
<ref id="B68">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Qiao</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Iterative tomography with digital adaptive optics permits hour-long intravital observation of 3D subcellular dynamics at millisecond scale</article-title>. <source>Cell</source> <volume>184</volume> (<issue>12</issue>), <fpage>3318</fpage>&#x2013;<lpage>3332</lpage>.</citation>
</ref>
<ref id="B69">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Aanet: Adaptive aggregation network for efficient stereo matching</article-title>,&#x201d; in <source>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</source>, <fpage>1959</fpage>&#x2013;<lpage>1968</lpage>.</citation>
</ref>
<ref id="B70">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Yi</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>F.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Video-rate 3D imaging of living cells using Fourier view-channel-depth light field microscopy</article-title>. <source>Commun. Biol.</source> <volume>6</volume> (<issue>1</issue>), <fpage>1259</fpage>.</citation>
</ref>
<ref id="B71">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Vasudevan</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Yeung</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Seyedhosseini</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Contrastive captioners are image-text foundation models</article-title>. <source>arXiv preprint</source>.</citation>
</ref>
<ref id="B72">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yuan</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ji</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Computational modeling of cellular structures using conditional deep generative networks</article-title>. <source>Bioinformatics</source> <volume>35</volume> (<issue>12</issue>), <fpage>2141</fpage>&#x2013;<lpage>2149</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bty923</pub-id>
</citation>
</ref>
<ref id="B73">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Zeiler</surname>
<given-names>M. D.</given-names>
</name>
</person-group> (<year>2014</year>). &#x201c;<article-title>Visualizing and understanding convolutional networks</article-title>,&#x201d; in <source>European conference on computer vision/arXiv (Vol. 1311)</source>.</citation>
</ref>
<ref id="B74">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Weng</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Ge</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>TC-DiffRecon: texture coordination MRI reconstruction method based on diffusion model and modified MF-UNet method</article-title>. <source>arXiv preprint</source>. <comment>arXiv:2402.11274</comment>.</citation>
</ref>
<ref id="B75">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Application of GWO-attention-ConvLSTM model in customer churn prediction and satisfaction analysis in customer relationship management</article-title>. <source>Heliyon</source> <volume>10</volume> (<issue>7</issue>), <fpage>e37229</fpage>. <pub-id pub-id-type="doi">10.1016/j.heliyon.2024.e37229</pub-id>
</citation>
</ref>
<ref id="B76">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Y. N.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>S.-C.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c; <article-title>Interpretable convolutional neural networks</article-title>,&#x201d; in <source>Proceedings of the IEEE conference on computer vision and pattern recognition</source>, <fpage>8827</fpage>&#x2013;<lpage>8836</lpage>.</citation>
</ref>
<ref id="B77">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Fang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Fei</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Deep-learning super-resolution light-sheet add-on microscopy (Deep-SLAM) for easy isotropic volumetric imaging of large biological specimens</article-title>. <source>Biomed. Opt. Exp.</source> <volume>11</volume> (<issue>12</issue>), <fpage>7273</fpage>&#x2013;<lpage>7285</lpage>.</citation>
</ref>
<ref id="B78">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Jiao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). &#x201c;<article-title>When awgn-based denoiser meets real noises</article-title>,&#x201d; <conf-name>Proceedings of the AAAI Conference on Artificial Intelligence</conf-name>. <volume>Vol. 34</volume> (<issue>07</issue>), <fpage>13074</fpage>&#x2013;<lpage>13081</lpage>. <pub-id pub-id-type="doi">10.1609/aaai.v34i07.7009</pub-id>
</citation>
</ref>
<ref id="B79">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhuang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Acdnet: Adaptively combined dilated convolution for monocular panorama depth estimation</article-title>,&#x201d; in <conf-name>Proceedings of the AAAI Conference on Artificial Intelligence</conf-name>. <volume>Vol. 36</volume> (<issue>3</issue>), <fpage>3653</fpage>&#x2013;<lpage>3661</lpage>. <pub-id pub-id-type="doi">10.1609/aaai.v36i3.20278</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>