<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Artif. Intell.</journal-id>
<journal-title>Frontiers in Artificial Intelligence</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Artif. Intell.</abbrev-journal-title>
<issn pub-type="epub">2624-8212</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frai.2025.1463233</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Artificial Intelligence</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>FLA-UNet: feature-location attention U-Net for foveal avascular zone segmentation in OCTA images</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Li</surname> <given-names>Wei</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Cao</surname> <given-names>Li</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2401752/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Deng</surname> <given-names>He</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2411389/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>School of Electrical and Electronic Engineering, Wuhan Polytechnic University</institution>, <addr-line>Wuhan</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>School of Computer Science and Technology, Wuhan University of Science and Technology</institution>, <addr-line>Wuhan</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by" id="fn0001">
<p>Edited by: Rizwan Qureshi, Hamad bin Khalifa University, Qatar</p>
</fn>
<fn fn-type="edited-by" id="fn0002">
<p>Reviewed by: Moiz Khan Sherwani, University of Copenhagen, Denmark</p>
<p>Sharmeen Tole, Institute of Chemical Technology, India</p>
<p>Muhammad Ammar Khawer, Hong Kong Metropolitan University, Hong Kong SAR, China</p>
</fn>
<corresp id="c001">&#x002A;Correspondence: Li Cao, <email>12591@whpu.edu.cn</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>17</day>
<month>07</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>8</volume>
<elocation-id>1463233</elocation-id>
<history>
<date date-type="received">
<day>23</day>
<month>07</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>30</day>
<month>06</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2025 Li, Cao and Deng.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Li, Cao and Deng</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Since optical coherence tomography angiography (OCTA) is non-invasive and non-contact, it is widely used in the study of retinal disease detection. As a key indicator for retinal disease detection, accurate segmentation of foveal avascular zone (FAZ) has an important impact on clinical application. Although the U-Net and its existing improvement methods have achieved good performance on FAZ segmentation, their generalization ability and segmentation accuracy can be further improved by exploring more effective improvement strategies.</p>
</sec>
<sec>
<title>Methods</title>
<p>We propose a novel improved method named Feature-location Attention U-Net (FLA-UNet) by introducing new designed feature-location attention blocks (FLABs) into U-Net and using a joint loss function. The FLAB consists of feature-aware blocks and location-aware blocks in parallel, and is embed into each decoder of U-Net to integrate more marginal information of FAZ and strengthen the connection between target region and boundary information. The joint loss function is composed of the cross-entropy loss (CE loss) function and the Dice coefficient loss (Dice loss) function, and by adjusting the weights of them, the performance of the network on boundary and internal segmentation can be comprehensively considered to improve its accuracy and robustness for FAZ segmentation.</p>
</sec>
<sec>
<title>Results</title>
<p>The qualitative and quantitative comparative experiments on the three datasets of OCTAGON, FAZID and OCTA-500 show that, our proposed FLA-UNet achieves better segmentation quality, and is superior to other existing state-of-the-art methods in terms of the MIoU, ACC and Dice coefficient.</p>
</sec>
<sec>
<title>Discussion</title>
<p>The proposed FLA-UNet can effectively improve the accuracy and robustness of FAZ segmentation in OCTA images by introducing feature-location attention blocks into U-Net and using a joint loss function. This has laid a solid theoretical foundation for its application in auxiliary diagnosis of fundus diseases.</p>
</sec>
</abstract>
<kwd-group>
<kwd>optical coherence tomography angiography (OCTA)</kwd>
<kwd>foveal avascular zone (FAZ) segmentation</kwd>
<kwd>feature-location attention</kwd>
<kwd>joint loss function</kwd>
<kwd>U-Net</kwd>
</kwd-group>
<counts>
<fig-count count="5"/>
<table-count count="4"/>
<equation-count count="9"/>
<ref-count count="33"/>
<page-count count="10"/>
<word-count count="6767"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Pattern Recognition</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec1">
<label>1</label>
<title>Introduction</title>
<p>With the rapid development and popularization of medical imaging equipment, the imaging technology has been widely used in clinical practice, and become an indispensable auxiliary means to carry out disease diagnosis, surgical planning, prognosis assessment and so on. Optical coherence tomography angiography (OCTA) (<xref ref-type="bibr" rid="ref17">Kashani et al., 2017</xref>) is a new non-invasive fundus imaging technology, which uses light interference to obtain vascular structure and blood flow information, and provide high resolution vascular imaging. In recent years, OCTA has been widely used in clinical diagnosis of various eye diseases, such as macular region disease, diabetic retinopathy, and retinal vascular obstruction. These eye diseases are related to the size and morphological changes of foveal avascular zone (FAZ) (<xref ref-type="bibr" rid="ref6">Chui et al., 2012</xref>), which is surrounded by continuous capillary plexus of the retina, and does not have any capillary structure itself. It is an important area for the formation of fine visual function. The changes in its shape and surrounding capillary density reflect the degree of ischemia of the macula, and are closely related to retinal vascular diseases, such as diabetic retinopathy and retinal venous obstruction. For the three eye-related conditions, namely normal, diabetes and myopia (<xref ref-type="bibr" rid="ref3">Balaji et al., 2020</xref>), diabetic eyes have a statistically significant increase in FAZ area compared to normal eyes. Similarly, the FAZ area increases and the blood vessel diameter decreases in myopia, especially in high myopia. Therefore, the changes in the area and morphology of FAZ can provide an important basis for clinical diagnosis of diabetes and myopia. The accurate segmentation of FAZ in OCTA images is crucial for diagnosis of fundus diseases.</p>
<p>In early days, many classical methods are proposed for FAZ segmentation. For example, the methods based on threshold segmentation (<xref ref-type="bibr" rid="ref24">Liu et al., 2022</xref>), region growth (<xref ref-type="bibr" rid="ref9">Ghassemi and Mirzadeh, 2007</xref>) and morphological operation (<xref ref-type="bibr" rid="ref32">Yang et al., 2016</xref>) can be used to segment FAZ, by setting appropriate thresholds or using local features of images. However, these methods may have some limitations when dealing with complex image conditions. To further improve the segmentation performance, some methods based on traditional machine learning algorithms are proposed, such as Markov Random Fields (MRF) (<xref ref-type="bibr" rid="ref4">Bourennane, 2010</xref>) and Support Vector Machine (SVM) (<xref ref-type="bibr" rid="ref2">Alam et al., 2019</xref>), where hand-crafted features and traditional classifiers are used for segmentation. Nevertheless, the segmentation accuracy is usually limited by the selection of features and the capability of classifiers.</p>
<p>In recent years, with the development of deep learning technology, fundus image segmentation methods based on deep learning have achieved great success. A typical example is the segmentation method using U-Net (<xref ref-type="bibr" rid="ref26">Ronneberger et al., 2015</xref>; <xref ref-type="bibr" rid="ref27">Sherwani and Gopalakrishnan, 2024</xref>), which is a kind of full convolutional network with simple structure and beneficial effect. As this method processes the whole image in the same way and cannot give different attentions to different areas, various improvement methods based on U-Net are proposed later. The introduction of attention mechanism (<xref ref-type="bibr" rid="ref5">Chen et al., 2022</xref>) in network models is one of the most effective ways, which can improve the accuracy and stability of segmentation by focusing on FAZ. These methods are often implemented by adding attention branches to models and adjusting the weights of features in channel and spatial dimensions, respectively. For instance, the channel attention branch can globally model the channels on feature maps and adjust the importance of each channel according to task requirements, to better express attention on FAZ. The spatial attention branch can consider the position relationship between pixels in spatial dimension to adjust the weight of each pixel in global feature maps, so as to accurately segment FAZ.</p>
<p>In addition to the improvement of the network structure, another improvement point is adopting a more appropriate loss function to optimize the model parameters, so as to improve the performance of the network model. For example, a hybrid loss function is used in DT-Net to improve the accuracy of retinal vessel segmentation (<xref ref-type="bibr" rid="ref16">Jia et al., 2023</xref>), and a joint loss function is used in a multi-task segmentation framework for thyroid tumor segmentation (<xref ref-type="bibr" rid="ref31">Yang et al., 2023</xref>).</p>
<p>Inspired by these strategies, it is very promising to obtain a novel method, by incorporating more effective attention mechanisms into U-Net, and using a more appropriate loss function that can further improve the accuracy of FAZ segmentation.</p>
</sec>
<sec id="sec2">
<label>2</label>
<title>Related works</title>
<p>The methods for FAZ segmentation of OCTA images are mainly divided into classical methods, traditional machine learning methods and deep learning-based methods.</p>
<p>Among classical methods, the threshold segmentation (<xref ref-type="bibr" rid="ref24">Liu et al., 2022</xref>) is a simple and commonly used method to segment FAZ based on pixel threshold. Each pixel is compared with a pre-defined threshold, and once the pixel value is greater than the threshold, it is marked as belonging to FAZ. Its segmentation result can be further optimized by subsequent morphological operations. The segmentation method based on region-growing (<xref ref-type="bibr" rid="ref9">Ghassemi and Mirzadeh, 2007</xref>) utilizes the similarity between seed points and adjacent pixels, where a seed point is first selected, and then the FAZ is gradually expanded by comparing the similarity of adjacent pixels to the seed point. This method requires appropriate similarity measurement and seed point selection. The method based on morphological operation (<xref ref-type="bibr" rid="ref32">Yang et al., 2016</xref>), such as corrosion, dilation, open and close operations, processes images to extract structures of interest, which achieves a good segmentation effect for objects with obvious morphological features. The frequency domain analysis method (<xref ref-type="bibr" rid="ref22">Liu and Li, 2019</xref>) is to segment FAZ based on Fourier transform or wavelet transform equal frequency domain analysis technology, which can distinguish between vascular and non-vascular areas by extracting frequency information. The two-stage image processing method proposed by <xref ref-type="bibr" rid="ref7">D&#x00ED;az et al. (2019a)</xref> is based on FAZ positioning and contour extraction, which can handle detailed information well. Although these classical methods have made some progress, there are still limitations, such as inaccurate boundary due to poor image quality, confusion between FAZ and non-perfusion region, segmentation error when there is wrong layer projection, and cannot adapt well to complex image scenes and shapes.</p>
<p>In terms of traditional machine learning methods, the method proposed by <xref ref-type="bibr" rid="ref29">Sim&#x00F3; and Ves (2001)</xref> uses a statistical Bayesian segmentation for FAZ detection in digital retinal angiograms, which provides a global segmentation, i.e., veins, arteries and fovea are obtained simultaneously. The method proposed by <xref ref-type="bibr" rid="ref2">Alam et al. (2019)</xref> employs an AI system containing an SVM classifier model and utilizes a hierarchical backward elimination technique to identify optimal-feature-combination for the best diagnostic accuracy and most efficient classification performance. Another method proposed by <xref ref-type="bibr" rid="ref4">Bourennane (2010)</xref> first uses singular value decomposition (SVD) to improve signal to noise ratio, then applies MRF for FAZ segmentation, which achieves an encouraging result as a first approach for location and evolution of FAZ in retinal images. These machine learning-based methods on FAZ segmentation usually rely on hand-crafted features and prior knowledge, which are difficult to adapt to the complexity and diversity of FAZ, especially in the segmentation of low-quality images or diseased areas, and are prone to missegmentation or missing segmentation.</p>
<p>Among deep learning-based methods, U-Net (<xref ref-type="bibr" rid="ref26">Ronneberger et al., 2015</xref>; <xref ref-type="bibr" rid="ref27">Sherwani and Gopalakrishnan, 2024</xref>) is a landmark network structure for medical image segmentation, which is formed by concatenating feature maps of its encoder branch with feature maps of its decoder branch via skip connections. Subsequently, a variety of improved networks based on this structure are proposed. MED-Net proposed by <xref ref-type="bibr" rid="ref11">Guo et al. (2018)</xref> is the first deep neural network used for avascular zone detection in OCTA images, which consists of encoders and decoders with multi-scale blocks to capture features at different scales. An automatic superficial FAZ segmentation and quantification method proposed by <xref ref-type="bibr" rid="ref13">Guo et al. (2019)</xref> to classify each pixel into superficial FAZ or non-superficial FAZ class. Subsequent applied largest connected-region extraction and hole-filling to fine-tune the automatic segmentation results. Another customized encoder-decoder network incorporates a boundary alignment strategy with boundary supervision proposed by <xref ref-type="bibr" rid="ref12">Guo et al. (2021)</xref> to automatically segment the superficial FAZ. BSDA-Net proposed by <xref ref-type="bibr" rid="ref21">Lin et al. (2021)</xref> uses boundary regression and distance graph reconstruction of two auxiliary branches to improve the performance of the main branch. A lightweight U-Net proposed by <xref ref-type="bibr" rid="ref19">Li et al. (2020)</xref> is used to perform fast and robust FAZ segmentation. A segmentation network leveraging optical density and disease features ODDF-Net is proposed by <xref ref-type="bibr" rid="ref30">Yang et al. (2024)</xref> for the simultaneous 2D segmentation of RC, RA, RV, and FAZ in 3D OCTA, which can learn the relationship between retinal diseases and the disrupted vascular structures, facilitating multi-object structure extraction. A multistage dual-branch image projection network (DIPN) is proposed by <xref ref-type="bibr" rid="ref23">Liu et al. (2025)</xref> to learn feature information in B-scan images to assist geographic atrophy segmentation and FAZ segmentation. At present, these deep learning-based methods on FAZ segmentation still faces the problems of insufficient segmentation accuracy and limited generalization ability, and still needs to be further improved.</p>
<p>In order to further improve the accuracy of FAZ segmentation while maintaining good generalization ability, we propose a novel improved method named FLA-UNet by incorporating feature attention and location attention into U-Net and using a joint loss function. The main contributions of this paper are as follows:<list list-type="order">
<list-item>
<p>An innovative feature-location attention block (FLAB) is designed by using a feature-aware block and a location-aware block in parallel for each feature map, where the feature-aware block can be used to adjust the weight of each feature map and enhance the expression ability of network, while the location-aware block can obtain the global statistics of each feature map and better retain texture features and background information of FAZ.</p>
</list-item>
<list-item>
<p>A novel improved method based on U-Net for FAZ segmentation is proposed by embedding a FLAB into each decoder of U-Net to integrate more marginal information of FAZ and strengthen the connection between target region and boundary information, and using a joint loss function consisting of the cross-entropy loss (CE loss) function and the Dice coefficient loss (Dice loss) function to realize the optimization of the whole continuity of image and the boundary recovery.</p>
</list-item>
<list-item>
<p>A series of qualitative and quantitative comparative experiments on the three datasets of OCTAGON, FAZID and OCTA-500 are implemented to show the superiority of our method over other existing state-of-the-art methods in terms of visual segmentation effect and the MIoU, ACC and Dice coefficients.</p>
</list-item>
</list></p>
</sec>
<sec id="sec3">
<label>3</label>
<title>Proposed method</title>
<p>Typically, for a basic U-Net structure used for object segmentation, the encoded low-level feature maps are concatenated with the corresponding high-level feature maps from the decoder branch, so the beneficial semantic information and redundant information are simultaneously input to its next layer, which may affect the segmentation accuracy of network. This problem can be solved by adding appropriate attention blocks into the main network. Besides, since the CE loss function used in U-Net is only concerned with the prediction result at pixel level, the generated segmentation boundary may be discontinuous or jagged. This problem can be solved by combining it with the Dice loss function, which is used to measure overlap in segmentation tasks and tends to produce smoother segmentation boundaries, to form a compound loss function to optimize the network model parameters for FAZ segmentation.</p>
<sec id="sec4">
<label>3.1</label>
<title>Improved network structure</title>
<p>The novel improved network for FAZ segmentation is designed by embedding an innovative FLAB into each decoder of U-Net, as shown in <xref ref-type="fig" rid="fig1">Figure 1</xref>. In the encoder branch, five encoders are used to extract features of the input image. Each encoder contains two identical convolution blocks, and each of which consists of a 3&#x202F;&#x00D7;&#x202F;3 convolution layer, a batch normalization (BN) layer, and a ReLU activation layer. Between every two encoders, a max pooling operation is used to implement downsampling, and eventually, the spatial dimension is halved by setting the value of stride length to 2 and the number of channels doubles by setting the number of output channels to twice the number of input channels. In the decoder branch, four decoders use the feature maps of encoders to progressively obtain the segmentation result. Each decoder contains a skip connection block, a FLAB, and two identical convolution blocks, each of which is the same as in its corresponding encoder. Between every two decoders (or the last encoder and the first decoder), the bilinear interpolation is used to implement upsampling, where low-resolution feature maps are upsampled to the same resolution as the encoder stage for feature fusion, which can accelerate the training speed of model and make the marginal contour clearer. In each skip connection block, the upsampled feature maps from the previous decoder (or the last encoder) are concatenated with the encoded feature maps from the encoder at current layer, and the concatenated feature maps are input into the corresponding FLAB. In each FLAB, the concatenated feature maps are processed to obtain more detailed features. Finally, the segmentation result is obtained by performing a 1&#x202F;&#x00D7;&#x202F;1 convolution operation on the output of the last level decoder, followed by using a Softmax function.</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>Schematic representation of our improved network structure, which is formed by embedding a FLAB into each decoder of U-Net.</p>
</caption>
<graphic xlink:href="frai-08-1463233-g001.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a convolutional neural network architecture for image processing. It includes layers for convolution, downsampling, upsampling, and feature-location attention blocks (FLAB). The input is a 480x480 image, processed through multiple stages that reduce its size to 30x30. Arrows indicate data flow, with skip connections and integration of convolution operations.</alt-text>
</graphic>
</fig>
</sec>
<sec id="sec5">
<label>3.2</label>
<title>Feature-location attention block</title>
<p>For the concatenated feature maps <italic>F</italic> &#x2208;&#x211D;<sup><italic>H</italic>&#x202F;&#x00D7;&#x202F;<italic>W</italic>&#x202F;&#x00D7;&#x202F;<italic>C</italic></sup>, a FLAB contains <italic>C</italic> attention modules to process <italic>C</italic> feature maps separately, each attention module consists of a feature-aware block and a location-aware block in parallel, as shown in <xref ref-type="fig" rid="fig2">Figure 2</xref>. For channel <italic>i</italic> (<italic>i</italic> &#x2208;1, &#x2026;, <italic>C</italic>), firstly, the feature weight <italic>W<sub>Fi</sub></italic> &#x2208;&#x211D;<sup><italic>H</italic>&#x202F;&#x00D7;&#x202F;<italic>W</italic>&#x202F;&#x00D7;&#x202F;1</sup> and location weight <italic>W<sub>Li</sub></italic> &#x2208;&#x211D;<sup><italic>H</italic>&#x202F;&#x00D7;&#x202F;<italic>W</italic>&#x202F;&#x00D7;&#x202F;1</sup> is calculated simultaneously to, respectively, represent the important features in channel and different spatial positions. Then, <italic>W<sub>Fi</sub></italic> and <italic>W<sub>Li</sub></italic> are fused together through a simple addition operation to ensure information interaction. Finally, the fused feature map is activated by a Sigmoid function and multiplied with the concatenated feature map in channel <italic>i</italic> to obtain an updated feature map with enhanced feature and location information. The updated feature maps from <italic>C</italic> channels are processed by performing a 3&#x202F;&#x00D7;&#x202F;3 convolution operation and halving the number of channels to serve as input of the subsequent convolution block.</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>Schematic representation of a FLAB, which mainly consists of a feature-aware block and a location-aware block in parallel.</p>
</caption>
<graphic xlink:href="frai-08-1463233-g002.tif">
<alt-text content-type="machine-generated">Flowchart of a neural network operation, starting with a multi-channel input of size H by W by C. It splits into feature-aware and location-aware blocks, both producing outputs of size H by W by 1. These outputs are added and processed through a sigmoid function, then multiplied with the original input. A convolution operation reduces the output size to H by W by C over two. Symbols represent addition, sigmoid, multiplication, and convolution.</alt-text>
</graphic>
</fig>
<p>For the concatenated feature map <italic>F<sub>i</sub></italic> &#x2208;&#x211D;<sup><italic>H</italic>&#x202F;&#x00D7;&#x202F;<italic>W</italic>&#x202F;&#x00D7;&#x202F;1</sup> in channel <italic>i</italic>, firstly, an average pooling and max pooling operation is separately performed in the feature-aware block to produce the feature maps <italic>F</italic><sub>Avg</sub>&#x2208;&#x211D;<sup><italic>H</italic>&#x202F;&#x00D7;&#x202F;<italic>W</italic>&#x202F;&#x00D7;&#x202F;1</sup> and <italic>F</italic><sub>Max</sub>&#x2208;&#x211D;<sup><italic>H</italic>&#x202F;&#x00D7;&#x202F;<italic>W</italic>&#x202F;&#x00D7;&#x202F;1</sup>. Then, <italic>F</italic><sub>Avg</sub> and <italic>F</italic><sub>Max</sub> are concatenated to preserve the texture and marginal features of the image. Finally, the feature map sequentially passes through a 1&#x202F;&#x00D7;&#x202F;1 convolution layer, a ReLU activation layer and a 1&#x202F;&#x00D7;&#x202F;1 convolution layer to generate the feature weight <italic>W<sub>Fi</sub></italic> &#x2208;&#x211D;<sup><italic>H</italic>&#x202F;&#x00D7;&#x202F;<italic>W</italic>&#x202F;&#x00D7;&#x202F;1</sup>. Its form is shown in <xref ref-type="disp-formula" rid="EQ1">Equation 1</xref>.<disp-formula id="EQ1">
<label>(1)</label>
<mml:math id="M1">
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi mathvariant="italic">Fi</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mtext>conv</mml:mtext>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x00D7;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="true">(</mml:mo>
<mml:mtext>ReLU</mml:mtext>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mtext>conv</mml:mtext>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x00D7;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="true">(</mml:mo>
<mml:mtext>concat</mml:mtext>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>Avg</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi mathvariant="italic">Max</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</disp-formula></p>
<p>In the location-aware block, an average pooling and max pooling operation is separately performed on the concatenated feature map <italic>F<sub>i</sub></italic> &#x2208;&#x211D;<sup><italic>H</italic>&#x202F;&#x00D7;&#x202F;<italic>W</italic>&#x202F;&#x00D7;&#x202F;1</sup> in channel <italic>i</italic>, and the corresponding feature maps <italic>F</italic><sub>Avg</sub>&#x2208;&#x211D;<sup><italic>H</italic>&#x202F;&#x00D7;&#x202F;<italic>W</italic>&#x202F;&#x00D7;&#x202F;1</sup> and <italic>F</italic><sub>Max</sub>&#x2208;&#x211D;<sup><italic>H</italic>&#x202F;&#x00D7;&#x202F;<italic>W</italic>&#x202F;&#x00D7;&#x202F;1</sup> are concatenated similarly. Then, the feature map passes through a 7&#x202F;&#x00D7;&#x202F;7 convolution layer to capture the contextual information on location. The output of the convolution layer is the location weight <italic>W<sub>Li</sub></italic> &#x2208;&#x211D;<sup><italic>H</italic>&#x202F;&#x00D7;&#x202F;<italic>W</italic>&#x202F;&#x00D7;&#x202F;1</sup>. Its form is shown in <xref ref-type="disp-formula" rid="EQ2">Equation 2</xref>.<disp-formula id="EQ2">
<label>(2)</label>
<mml:math id="M2">
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi mathvariant="italic">Li</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mtext>conv</mml:mtext>
<mml:mrow>
<mml:mn>7</mml:mn>
<mml:mo>&#x00D7;</mml:mo>
<mml:mn>7</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="true">(</mml:mo>
<mml:mtext>concat</mml:mtext>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>Avg</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi mathvariant="italic">Max</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</disp-formula><disp-formula id="EQ3">
<label>(3)</label>
<mml:math id="M3">
<mml:mi>W</mml:mi>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi mathvariant="italic">Fi</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi mathvariant="italic">Li</mml:mi>
</mml:msub>
</mml:math>
</disp-formula></p>
<p>Finally, <italic>W<sub>Fi</sub></italic> and <italic>W<sub>Li</sub></italic> are fused together through a simple addition operation to ensure information interaction, as shown in <xref ref-type="disp-formula" rid="EQ3">Equation 3</xref>.</p>
</sec>
<sec id="sec6">
<label>3.3</label>
<title>Model optimization and implementation details</title>
<p>In order to optimize the parameters of improved network model for FAZ segmentation, the joint loss function <italic>L</italic><sub>Jloss</sub> (<xref ref-type="bibr" rid="ref16">Jia et al., 2023</xref>; <xref ref-type="bibr" rid="ref31">Yang et al., 2023</xref>) is adopted, which includes the CE loss function <italic>L</italic><sub>CE</sub> and the Dice loss function <italic>L</italic><sub>Dice</sub>, as shown in <xref ref-type="disp-formula" rid="EQ4">Equation 4</xref>.<disp-formula id="EQ4">
<label>(4)</label>
<mml:math id="M4">
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mtext>Jloss</mml:mtext>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mtext>Dice</mml:mtext>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi>CE</mml:mi>
</mml:msub>
</mml:math>
</disp-formula></p>
<p>Where, <italic>w</italic><sub>1</sub> and <italic>w</italic><sub>2</sub> are the weight coefficients, <italic>L</italic><sub>CE</sub> is used to promote the improved model to learn more accurate classification information and improve its generalization ability, while <italic>L</italic><sub>Dice</sub> is used to help it learn more accurate boundary segmentation information, so as to improve the segmentation accuracy. By combining these two loss functions, the robustness of the model and the accuracy of segmentation can be enhanced.</p>
<p>According to the experimental results, <italic>w</italic><sub>1</sub> is set to 0.8 and <italic>w</italic><sub>2</sub> is set to 0.2. <italic>L</italic><sub>CE</sub> and <italic>L</italic><sub>Dice</sub> can be expressed by the following <xref ref-type="disp-formula" rid="EQ5">Equations 5</xref> and <xref ref-type="disp-formula" rid="EQ6">6</xref>.<disp-formula id="EQ5">
<label>(5)</label>
<mml:math id="M5">
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi>CE</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mo stretchy="true">[</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo>log</mml:mo>
<mml:mi>p</mml:mi>
<mml:mo>+</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>log</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>p</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo stretchy="true">]</mml:mo>
</mml:math>
</disp-formula><disp-formula id="EQ6">
<label>(6)</label>
<mml:math id="M6">
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mtext>Dice</mml:mtext>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>&#x00D7;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2223;</mml:mo>
<mml:mi>X</mml:mi>
<mml:mo>&#x2229;</mml:mo>
<mml:mi>Y</mml:mi>
<mml:mo>&#x2223;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2223;</mml:mo>
<mml:mi>X</mml:mi>
<mml:mo>&#x2223;</mml:mo>
<mml:mo>+</mml:mo>
<mml:mo>&#x2223;</mml:mo>
<mml:mi>Y</mml:mi>
<mml:mo>&#x2223;</mml:mo>
</mml:mrow>
</mml:mfrac>
</mml:math>
</disp-formula></p>
<p>Where, <italic>y</italic> is the true label, representing the category of the sample, <italic>p</italic> is the prediction probability that the sample belongs to the positive class; <italic>X</italic> represents the positive pixel set in the prediction segmentation image and <italic>Y</italic> represents the positive pixel set in the real segmentation image, <italic>|X|</italic> and <italic>|Y|</italic> respectively indicate the size of the pixel set, while <italic>|X &#x2229; Y|</italic> represents the intersection size of two-pixel sets.</p>
<p>The proposed FLA-UNet is implemented with Pytorch framework using the NVIDIA A40 on Ubuntu, which has 48&#x202F;GB memory and 19.5 TFLOPs. The Adam optimizer is used with a learning rate of 0.01, and the model is trained for 200 epochs with a batch size of 8. Each original image is cropped to a size of 480&#x202F;&#x00D7;&#x202F;480 for model training. The ratio between the training set and testing set is 7:3. Each dataset is trained three times, and the final model is determined to be the model that has the optimal value of the selected performance metrics on the testing set.</p>
</sec>
</sec>
<sec id="sec7">
<label>4</label>
<title>Experiments and results</title>
<sec id="sec8">
<label>4.1</label>
<title>Datasets and evaluation metrics</title>
<p>In order to verify the performance of the proposed FLA-UNet, three public datasets OCTAGON (<xref ref-type="bibr" rid="ref8">D&#x00ED;az et al., 2019b</xref>), FAZID (<xref ref-type="bibr" rid="ref1">Agarwal et al., 2020</xref>) and OCTA-500 (<xref ref-type="bibr" rid="ref19">Li et al., 2020</xref>) with high image quality are selected, and their details are listed in <xref ref-type="table" rid="tab1">Table 1</xref>. OCTAGON contains 213 OCTA images with a resolution of 320&#x202F;&#x00D7;&#x202F;320, 144 of which are <italic>normal</italic> with a field of view (FOV) size of 6&#x202F;&#x00D7;&#x202F;6&#x202F;mm<sup>2</sup>, and 69 of which are <italic>diabetic</italic> with a FOV size of 3&#x202F;&#x00D7;&#x202F;3&#x202F;mm<sup>2</sup>. FAZID consists of 304 OCTA images with a resolution of 420&#x202F;&#x00D7;&#x202F;420, 88 of which are <italic>normal</italic>, 109 of which are <italic>myopic</italic> and 107 of which are <italic>diabetic</italic>. All of these OCTA images in FAZID have a FOV size of 6&#x202F;&#x00D7;&#x202F;6&#x202F;mm<sup>2</sup>. For OCTA-500, only three states of images are selected, which are <italic>normal</italic>, <italic>myopic</italic> and <italic>diabetic</italic>. These images are divided into two sub-datasets based on different resolutions and FOV sizes. In the sub-dataset with a resolution of 400&#x202F;&#x00D7;&#x202F;400 and a FOV size of 6&#x202F;&#x00D7;&#x202F;6&#x202F;mm<sup>2</sup>, there are 169 OCTA images, 91 of which are <italic>normal</italic>, 43 of which are <italic>myopic</italic> and 35 of which are <italic>diabetic</italic>. While in the sub-dataset with a resolution of 304&#x202F;&#x00D7;&#x202F;304 and a FOV size of 3&#x202F;&#x00D7;&#x202F;3&#x202F;mm<sup>2</sup>, there are 195 OCTA images, 160 of which are <italic>normal</italic>, only 6 of which are <italic>myopic</italic> and 29 of which are <italic>diabetic</italic>. The corresponding sample images are shown in <xref ref-type="fig" rid="fig3">Figure 3</xref>.</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>The details of three selected datasets.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Dataset</th>
<th align="center" valign="top">FOV [mm<sup>2</sup>]</th>
<th align="left" valign="top">State</th>
<th align="center" valign="top" colspan="2">Number</th>
<th align="center" valign="top">Resolution</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle" rowspan="2">OCTAGON</td>
<td align="center" valign="middle">6&#x202F;&#x00D7;&#x202F;6</td>
<td align="left" valign="middle">Normal</td>
<td align="center" valign="middle">144</td>
<td align="center" valign="middle" rowspan="2">213</td>
<td align="center" valign="middle" rowspan="2">320&#x202F;&#x00D7;&#x202F;320</td>
</tr>
<tr>
<td align="center" valign="middle">3&#x202F;&#x00D7;&#x202F;3</td>
<td align="left" valign="middle">Diabetic</td>
<td align="center" valign="middle">69</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="3">FAZID</td>
<td align="center" valign="middle" rowspan="3">6&#x202F;&#x00D7;&#x202F;6</td>
<td align="left" valign="middle">Normal</td>
<td align="center" valign="middle">88</td>
<td align="center" valign="middle" rowspan="3">304</td>
<td align="center" valign="middle" rowspan="3">420&#x202F;&#x00D7;&#x202F;420</td>
</tr>
<tr>
<td align="left" valign="middle">Myopic</td>
<td align="center" valign="middle">109</td>
</tr>
<tr>
<td align="left" valign="middle">Diabetic</td>
<td align="center" valign="middle">107</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="6">OCTA-500</td>
<td align="center" valign="middle" rowspan="3">6&#x202F;&#x00D7;&#x202F;6</td>
<td align="left" valign="middle">Normal</td>
<td align="center" valign="middle">91</td>
<td align="center" valign="middle" rowspan="3">169</td>
<td align="center" valign="middle" rowspan="3">400&#x202F;&#x00D7;&#x202F;400</td>
</tr>
<tr>
<td align="left" valign="middle">Myopic</td>
<td align="center" valign="middle">43</td>
</tr>
<tr>
<td align="left" valign="middle">Diabetic</td>
<td align="center" valign="middle">35</td>
</tr>
<tr>
<td align="center" valign="middle" rowspan="3">3&#x202F;&#x00D7;&#x202F;3</td>
<td align="left" valign="middle">Normal</td>
<td align="center" valign="middle">160</td>
<td align="center" valign="middle" rowspan="3">195</td>
<td align="center" valign="middle" rowspan="3">304&#x202F;&#x00D7;&#x202F;304</td>
</tr>
<tr>
<td align="left" valign="middle">Myopic</td>
<td align="center" valign="middle">6</td>
</tr>
<tr>
<td align="left" valign="middle">Diabetic</td>
<td align="center" valign="middle">29</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption>
<p>Some sample images from three selected datasets. The FAZ state is listed at the bottom of each column. On the left side of each row of images, the dataset where the picture is located and the field of view are marked.</p>
</caption>
<graphic xlink:href="frai-08-1463233-g003.tif">
<alt-text content-type="machine-generated">Comparison of retinal images using OCTA technology, categorized by scanning methods: OCTAGON and OCTA-500, and scan sizes, 3x3mm&#x00B2; and 6x6mm&#x00B2;. Images display retinal vasculature for normal, diabetic, and myopic conditions.</alt-text>
</graphic>
</fig>
<p>The quantitative evaluation metrics used for FAZ segmentation are Mean Intersection over Union (MIoU), Accuracy (ACC) and Dice coefficient (Dice), which are defined in <xref ref-type="disp-formula" rid="EQ6">Equations 6</xref>&#x2013;<xref ref-type="disp-formula" rid="EQ9">9</xref>.<disp-formula id="EQ7">
<label>(7)</label>
<mml:math id="M7">
<mml:mtext>MIoU</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:munderover>
<mml:mo movablelimits="false">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mi>k</mml:mi>
</mml:munderover>
<mml:mfrac>
<mml:mi mathvariant="italic">TP</mml:mi>
<mml:mrow>
<mml:mi mathvariant="italic">FN</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">FP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">TP</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:math>
</disp-formula><disp-formula id="EQ8">
<label>(8)</label>
<mml:math id="M8">
<mml:mi>ACC</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="italic">TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">TN</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">FP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">TN</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">FN</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:math>
</disp-formula><disp-formula id="EQ9">
<label>(9)</label>
<mml:math id="M9">
<mml:mtext>Dice</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi mathvariant="italic">TP</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi mathvariant="italic">TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">FP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">FN</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:math>
</disp-formula></p>
<p>Where <italic>TP</italic>, <italic>TN</italic>, <italic>FP</italic> and <italic>FN</italic> represent the numbers of true positive, true negative, false positive and false negative pixels respectively, <italic>k</italic> represents the number of segmentation categories, which is set to 1.</p>
</sec>
<sec id="sec9">
<label>4.2</label>
<title>Qualitative comparison results</title>
<p>The comparative experiments with the existing state-of-the-art methods (<xref ref-type="bibr" rid="ref26">Ronneberger et al., 2015</xref>; <xref ref-type="bibr" rid="ref10">Gu et al., 2019</xref>; <xref ref-type="bibr" rid="ref25">Mou et al., 2021</xref>; <xref ref-type="bibr" rid="ref33">Zhou et al., 2020</xref>; <xref ref-type="bibr" rid="ref15">Huang et al., 2020</xref>; <xref ref-type="bibr" rid="ref18">Li et al., 2020</xref>; <xref ref-type="bibr" rid="ref14">Hu et al., 2022</xref>; <xref ref-type="bibr" rid="ref20">Li et al., 2022</xref>) are carried out to prove the superiority of our proposed method.</p>
<p>The segmentation results of some examples using the existing representative methods [including the U-Net (<xref ref-type="bibr" rid="ref26">Ronneberger et al., 2015</xref>), CE-Net (<xref ref-type="bibr" rid="ref10">Gu et al., 2019</xref>), CS<sup>2</sup>-Net (<xref ref-type="bibr" rid="ref25">Mou et al., 2021</xref>), U-Net++ (<xref ref-type="bibr" rid="ref33">Zhou et al., 2020</xref>) and U-Net3&#x202F;+&#x202F;(<xref ref-type="bibr" rid="ref15">Huang et al., 2020</xref>)] and our method are shown in <xref ref-type="fig" rid="fig4">Figure 4</xref>. We can see that U-Net just roughly segment the outline of FAZ, it is difficult for the basic U-Net to accurately segment the FAZ with irregular contours for myopic and diabetic patients. Despite the good segmentation result on the sample image from OCTAGON, where some sharp regions are also well segmented, CE-Net cannot segment the outline of FAZ well on the sample images from OCTA-500. The similar segmentation effect appears in CS<sup>2</sup>-Net. For U-Net++ and U-Net3+, the outlines of FAZ are affected by blood vessels, resulting in imprecise segmentation. Compared with the above segmentation results, our results have clearer outlines or margins, and are more similar to GT.</p>
<fig position="float" id="fig4">
<label>Figure 4</label>
<caption>
<p>Qualitative comparison of segmentation results using different methods. The left-most and right-most columns, respectively, correspond to the input images and their given GT images. The name at the bottom of each column (except for the left-most and right-most columns) refers to the used segmentation method. On the left side of each row of images, the dataset where the picture is located, the field of view and the FAZ state are marked.</p>
</caption>
<graphic xlink:href="frai-08-1463233-g004.tif">
<alt-text content-type="machine-generated">OCT and FAZID retinal images with segmented regions showing input and corresponding outputs from six neural networks: U-Net, CE-Net, CS&#x00B2;-Net, U-Net++, U-Net3+, and FLAU-Net, compared to ground truth (GT). Each row represents a different dataset condition such as normal, diabetic, or myopic, with varying scan dimensions, showing variations in segmentation across methods.</alt-text>
</graphic>
</fig>
</sec>
<sec id="sec10">
<label>4.3</label>
<title>Quantitative comparison results</title>
<p>The quantitative comparison results with the existing representative methods on OCTAGON, FAZID and OCTA-500 are shown in <xref ref-type="table" rid="tab2">Table 2</xref>. We can see from <xref ref-type="table" rid="tab2">Table 2</xref>, our method achieves the best segmentation performance on the first two datasets. The MIoU, ACC and Dice of our method on OCTAGON (3&#x202F;&#x00D7;&#x202F;3) is, respectively, 1.23, 1.51 and 0.8% higher than results of the suboptimal method. Similarly, the MIoU, ACC and Dice of our method on FAZID is, respectively, 0.55, 2.93 and 1.19% higher than results of the suboptimal method. Although the ACC of our method on OCTAGON (6&#x202F;&#x00D7;&#x202F;6) is 1.56% lower than that of U-Net3+, its MIoU and Dice is, respectively, 2.63 and 1.54% higher. On OCTA500 (3&#x202F;&#x00D7;&#x202F;3), although both the MIoU and Dice of our method are lower than those of U-Net3+, the differences are small and the ACC of our method is still the highest. On OCTA500 (6&#x202F;&#x00D7;&#x202F;6), in spite of the slightly lower MIoU than the result of U-Net++, our method still achieves the highest ACC, and its Dice is 4.17% higher than that of the suboptimal method.</p>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>Quantitative comparisons with the existing representative methods for FAZ segmentation.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Dataset</th>
<th align="center" valign="top">FOV</th>
<th align="center" valign="top">Metrics</th>
<th align="center" valign="top">U-Net (<xref ref-type="bibr" rid="ref26">Ronneberger et al., 2015</xref>)</th>
<th align="center" valign="top">CE-Net (<xref ref-type="bibr" rid="ref10">Gu et al., 2019</xref>)</th>
<th align="center" valign="top">CS<sup>2</sup>-Net (<xref ref-type="bibr" rid="ref25">Mou et al., 2021</xref>)</th>
<th align="center" valign="top">U-Net++ (<xref ref-type="bibr" rid="ref33">Zhou et al., 2020</xref>)</th>
<th align="center" valign="top">U-Net3&#x202F;+&#x202F;(<xref ref-type="bibr" rid="ref15">Huang et al., 2020</xref>)</th>
<th align="center" valign="top">FLA-UNet</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle" rowspan="6">OCTAGON</td>
<td align="center" valign="middle" rowspan="3">3&#x202F;&#x00D7;&#x202F;3</td>
<td align="center" valign="middle">MIoU</td>
<td align="center" valign="middle">80.29</td>
<td align="center" valign="middle">74.59</td>
<td align="center" valign="middle">75.31</td>
<td align="center" valign="middle">82.56</td>
<td align="center" valign="middle">83.05</td>
<td align="center" valign="middle"><bold>84.28</bold></td>
</tr>
<tr>
<td align="center" valign="middle">ACC</td>
<td align="center" valign="middle">95.31</td>
<td align="center" valign="middle">87.57</td>
<td align="center" valign="middle">91.38</td>
<td align="center" valign="middle">95.03</td>
<td align="center" valign="middle">96.14</td>
<td align="center" valign="middle"><bold>97.65</bold></td>
</tr>
<tr>
<td align="center" valign="middle">Dice</td>
<td align="center" valign="middle">86.16</td>
<td align="center" valign="middle">84.24</td>
<td align="center" valign="middle">84.57</td>
<td align="center" valign="middle">86.52</td>
<td align="center" valign="middle">87.43</td>
<td align="center" valign="middle"><bold>88.23</bold></td>
</tr>
<tr>
<td align="center" valign="middle" rowspan="3">6&#x202F;&#x00D7;&#x202F;6</td>
<td align="center" valign="middle">MIoU</td>
<td align="center" valign="middle">81.04</td>
<td align="center" valign="middle">77.49</td>
<td align="center" valign="middle">73.65</td>
<td align="center" valign="middle">80.92</td>
<td align="center" valign="middle">83.83</td>
<td align="center" valign="middle"><bold>86.46</bold></td>
</tr>
<tr>
<td align="center" valign="middle">ACC</td>
<td align="center" valign="middle">94.91</td>
<td align="center" valign="middle">84.37</td>
<td align="center" valign="middle">92.16</td>
<td align="center" valign="middle">93.05</td>
<td align="center" valign="middle"><bold>96.29</bold></td>
<td align="center" valign="middle">94.73</td>
</tr>
<tr>
<td align="center" valign="middle">Dice</td>
<td align="center" valign="middle">86.13</td>
<td align="center" valign="middle">85.23</td>
<td align="center" valign="middle">82.64</td>
<td align="center" valign="middle">86.60</td>
<td align="center" valign="middle">87.27</td>
<td align="center" valign="middle"><bold>88.81</bold></td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="3">FAZID</td>
<td align="center" valign="middle" rowspan="3">6&#x202F;&#x00D7;&#x202F;6</td>
<td align="center" valign="middle">MIoU</td>
<td align="center" valign="middle">77.23</td>
<td align="center" valign="middle">78.79</td>
<td align="center" valign="middle">79.14</td>
<td align="center" valign="middle">77.80</td>
<td align="center" valign="middle">79.61</td>
<td align="center" valign="middle"><bold>80.16</bold></td>
</tr>
<tr>
<td align="center" valign="middle">ACC</td>
<td align="center" valign="middle">87.66</td>
<td align="center" valign="middle">87.39</td>
<td align="center" valign="middle">89.74</td>
<td align="center" valign="middle">88.36</td>
<td align="center" valign="middle">90.15</td>
<td align="center" valign="middle"><bold>93.08</bold></td>
</tr>
<tr>
<td align="center" valign="middle">Dice</td>
<td align="center" valign="middle">87.22</td>
<td align="center" valign="middle">84.68</td>
<td align="center" valign="middle">83.69</td>
<td align="center" valign="middle">86.55</td>
<td align="center" valign="middle">87.37</td>
<td align="center" valign="middle"><bold>88.56</bold></td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="6">OCTA-500</td>
<td align="center" valign="middle" rowspan="3">3&#x202F;&#x00D7;&#x202F;3</td>
<td align="center" valign="middle">MIoU</td>
<td align="center" valign="middle">79.21</td>
<td align="center" valign="middle">80.22</td>
<td align="center" valign="middle">80.83</td>
<td align="center" valign="middle">81.31</td>
<td align="center" valign="middle"><bold>83.70</bold></td>
<td align="center" valign="middle">83.11</td>
</tr>
<tr>
<td align="center" valign="middle">ACC</td>
<td align="center" valign="middle">87.51</td>
<td align="center" valign="middle">84.35</td>
<td align="center" valign="middle">83.86</td>
<td align="center" valign="middle">87.76</td>
<td align="center" valign="middle">88.16</td>
<td align="center" valign="middle"><bold>89.31</bold></td>
</tr>
<tr>
<td align="center" valign="middle">Dice</td>
<td align="center" valign="middle">89.56</td>
<td align="center" valign="middle">87.36</td>
<td align="center" valign="middle">89.12</td>
<td align="center" valign="middle">91.98</td>
<td align="center" valign="middle"><bold>95.22</bold></td>
<td align="center" valign="middle">93.27</td>
</tr>
<tr>
<td align="center" valign="middle" rowspan="3">6&#x202F;&#x00D7;&#x202F;6</td>
<td align="center" valign="middle">MIoU</td>
<td align="center" valign="middle">76.88</td>
<td align="center" valign="middle">80.93</td>
<td align="center" valign="middle">79.31</td>
<td align="center" valign="middle"><bold>81.79</bold></td>
<td align="center" valign="middle">80.16</td>
<td align="center" valign="middle">81.55</td>
</tr>
<tr>
<td align="center" valign="middle">ACC</td>
<td align="center" valign="middle">85.42</td>
<td align="center" valign="middle">83.39</td>
<td align="center" valign="middle">81.71</td>
<td align="center" valign="middle">86.32</td>
<td align="center" valign="middle">87.50</td>
<td align="center" valign="middle"><bold>88.64</bold></td>
</tr>
<tr>
<td align="center" valign="middle">Dice</td>
<td align="center" valign="middle">87.70</td>
<td align="center" valign="middle">84.68</td>
<td align="center" valign="middle">83.69</td>
<td align="center" valign="middle">88.58</td>
<td align="center" valign="middle">87.44</td>
<td align="center" valign="middle"><bold>92.75</bold></td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>The best results are marked in bold.</p>
</table-wrap-foot>
</table-wrap>
<p>In order to further demonstrate the superiority of our proposed method, we select some recent methods [including Automatic segmentation (<xref ref-type="bibr" rid="ref18">Li et al., 2020</xref>), Joint-Seg (<xref ref-type="bibr" rid="ref14">Hu et al., 2022</xref>), and RPS-Net (<xref ref-type="bibr" rid="ref20">Li et al., 2022</xref>)] for comparison. Due to its universality and importance in medical image segmentation task, the Dice is selected as the indicator for further comparison. The comparison results on OCTAGON and OCTA-500 (6&#x202F;&#x00D7;&#x202F;6) are shown in <xref ref-type="table" rid="tab3">Table 3</xref>. As can be seen from <xref ref-type="table" rid="tab3">Table 3</xref>, our method achieves the highest Dice on all three datasets, which confirms its superiority over the selected recent methods.</p>
<table-wrap position="float" id="tab3">
<label>Table 3</label>
<caption>
<p>The Dice coefficients of quantitative comparisons with the recent methods for FAZ segmentation.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Method</th>
<th align="center" valign="top" colspan="2">OCTAON</th>
<th align="center" valign="top">FAZID</th>
<th align="center" valign="top" colspan="2">OCTA-500</th>
</tr>
<tr>
<th align="center" valign="top">3&#x202F;&#x00D7;&#x202F;3</th>
<th align="center" valign="top">6&#x202F;&#x00D7;&#x202F;6</th>
<th align="center" valign="top">6&#x202F;&#x00D7;&#x202F;6</th>
<th align="center" valign="top">3&#x202F;&#x00D7;&#x202F;3</th>
<th align="center" valign="top">6&#x202F;&#x00D7;&#x202F;6</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Automatic segmentation (<xref ref-type="bibr" rid="ref18">Li et al., 2020</xref>)</td>
<td align="center" valign="middle">85.00</td>
<td align="center" valign="middle">83.62</td>
<td align="center" valign="middle">85.15</td>
<td align="center" valign="middle">88.64</td>
<td align="center" valign="middle">85.21</td>
</tr>
<tr>
<td align="left" valign="middle">Joint-Seg (<xref ref-type="bibr" rid="ref14">Hu et al., 2022</xref>)</td>
<td align="center" valign="middle">73.25</td>
<td align="center" valign="middle">75.17</td>
<td align="center" valign="middle">74.29</td>
<td align="center" valign="middle">87.01</td>
<td align="center" valign="middle">90.29</td>
</tr>
<tr>
<td align="left" valign="middle">RPS-Net (<xref ref-type="bibr" rid="ref20">Li et al., 2022</xref>)</td>
<td align="center" valign="middle">87.47</td>
<td align="center" valign="middle">86.61</td>
<td align="center" valign="middle">84.98</td>
<td align="center" valign="middle">84.00</td>
<td align="center" valign="middle">91.68</td>
</tr>
<tr>
<td align="left" valign="middle">FLA-UNet</td>
<td align="center" valign="middle"><bold>88.23</bold></td>
<td align="center" valign="middle"><bold>88.81</bold></td>
<td align="center" valign="middle"><bold>88.56</bold></td>
<td align="center" valign="middle"><bold>89.31</bold></td>
<td align="center" valign="middle"><bold>92.75</bold></td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec11">
<label>4.4</label>
<title>Ablation studies</title>
<p>To demonstrate the effectiveness of FLABs and joint loss function used in our proposed method for FAZ segmentation, a series of ablation experiments are conducted. The results of quantitative comparisons for different ablation methods on three datasets are shown in <xref ref-type="table" rid="tab4">Table 4</xref>.</p>
<table-wrap position="float" id="tab4">
<label>Table 4</label>
<caption>
<p>Quantitative results of different ablation methods for FAZ segmentation.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Dataset</th>
<th align="center" valign="top">FOV</th>
<th align="center" valign="top">Metrics</th>
<th align="center" valign="top">U-Net</th>
<th align="center" valign="top">U-Net&#x202F;+&#x202F;FLABs</th>
<th align="center" valign="top">FLA-UNet</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle" rowspan="6">OCTAGON</td>
<td align="center" valign="middle" rowspan="3">3&#x202F;&#x00D7;&#x202F;3</td>
<td align="center" valign="middle">MIoU</td>
<td align="center" valign="middle">80.29</td>
<td align="center" valign="middle">81.24</td>
<td align="center" valign="middle"><bold>84.28</bold></td>
</tr>
<tr>
<td align="center" valign="middle">ACC</td>
<td align="center" valign="middle">95.31</td>
<td align="center" valign="middle"><bold>98.29</bold></td>
<td align="center" valign="middle">97.65</td>
</tr>
<tr>
<td align="center" valign="middle">Dice</td>
<td align="center" valign="middle">86.16</td>
<td align="center" valign="middle">87.38</td>
<td align="center" valign="middle"><bold>88.23</bold></td>
</tr>
<tr>
<td align="center" valign="middle" rowspan="3">6&#x202F;&#x00D7;&#x202F;6</td>
<td align="center" valign="middle">MIoU</td>
<td align="center" valign="middle">81.04</td>
<td align="center" valign="middle">83.47</td>
<td align="center" valign="middle"><bold>86.46</bold></td>
</tr>
<tr>
<td align="center" valign="middle">ACC</td>
<td align="center" valign="middle">94.91</td>
<td align="center" valign="middle"><bold>95.75</bold></td>
<td align="center" valign="middle">94.73</td>
</tr>
<tr>
<td align="center" valign="middle">Dice</td>
<td align="center" valign="middle">86.13</td>
<td align="center" valign="middle">87.62</td>
<td align="center" valign="middle"><bold>88.81</bold></td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="3">FAZID</td>
<td align="center" valign="middle" rowspan="3">6&#x202F;&#x00D7;&#x202F;6</td>
<td align="center" valign="middle">MIoU</td>
<td align="center" valign="middle">77.23</td>
<td align="center" valign="middle">79.65</td>
<td align="center" valign="middle"><bold>80.16</bold></td>
</tr>
<tr>
<td align="center" valign="middle">ACC</td>
<td align="center" valign="middle">87.66</td>
<td align="center" valign="middle">92.11</td>
<td align="center" valign="middle"><bold>93.08</bold></td>
</tr>
<tr>
<td align="center" valign="middle">Dice</td>
<td align="center" valign="middle">87.22</td>
<td align="center" valign="middle">88.31</td>
<td align="center" valign="middle"><bold>88.56</bold></td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="6">OCTA-500</td>
<td align="center" valign="middle" rowspan="3">3&#x202F;&#x00D7;&#x202F;3</td>
<td align="center" valign="middle">MIoU</td>
<td align="center" valign="middle">79.21</td>
<td align="center" valign="middle">80.26</td>
<td align="center" valign="middle"><bold>83.11</bold></td>
</tr>
<tr>
<td align="center" valign="middle">ACC</td>
<td align="center" valign="middle">87.51</td>
<td align="center" valign="middle">87.41</td>
<td align="center" valign="middle"><bold>89.31</bold></td>
</tr>
<tr>
<td align="center" valign="middle">Dice</td>
<td align="center" valign="middle">89.56</td>
<td align="center" valign="middle">91.46</td>
<td align="center" valign="middle"><bold>93.27</bold></td>
</tr>
<tr>
<td align="center" valign="middle" rowspan="3">6&#x202F;&#x00D7;&#x202F;6</td>
<td align="center" valign="middle">MIoU</td>
<td align="center" valign="middle">76.88</td>
<td align="center" valign="middle">80.52</td>
<td align="center" valign="middle"><bold>81.55</bold></td>
</tr>
<tr>
<td align="center" valign="middle">ACC</td>
<td align="center" valign="top">85.42</td>
<td align="center" valign="middle">86.37</td>
<td align="center" valign="middle"><bold>88.64</bold></td>
</tr>
<tr>
<td align="center" valign="middle">Dice</td>
<td align="center" valign="middle">87.70</td>
<td align="center" valign="middle">91.77</td>
<td align="center" valign="middle"><bold>92.75</bold></td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As can be seen from <xref ref-type="table" rid="tab4">Table 4</xref>, when FLABs are added in U-Net, the MIoU, ACC and Dice are improved in most cases. This proves that the strategy of introducing FLABs into U-Net is effective. When we further use the joint loss function to adjust the influence of the CE loss function and the Dice loss function, the MIoU, ACC and Dice are further improved in comparison with U-Net&#x202F;+&#x202F;FLABs in most cases. In terms of MIoU and Dice, our proposed FLA-UNet achieves the best performance on three datasets. Although the ACC of our proposed FLA-UNet is not the best on OCTAGON, it achieves the highest values on other two datasets. This proves that the strategy of using the joint loss function also helps to improve the segmentation accuracy.</p>
<p>Based on the above analysis, we can confirm that FLABs and joint loss function are effective, and without them, the model&#x2019;s segmentation accuracy will deteriorate.</p>
</sec>
</sec>
<sec sec-type="conclusions" id="sec12">
<label>5</label>
<title>Conclusion</title>
<p>In this paper, a novel improved method named FLA-UNet is proposed for FAZ segmentation in OCTA images. On the basis of U-Net, by embedding an innovative FLAB into each decoder, the FAZ boundaries are accurately predicted; and by using the joint loss function, the optimization of the whole continuity of an image and its boundary recovery are realized. The effectiveness of FLABs and joint loss function used in FLA-UNet is verified by a series of ablation experiments conducted on OCTAGON, FAZID and OCTA-500. The quantitative comparisons with the existing representative methods on the three datasets show that our proposed FLA-UNet is superior to other methods, in most cases in terms of the MIoU, ACC and Dice coefficient. Accordingly, their qualitative comparison results also confirm this point. In addition, further quantitative comparisons with some recent methods also demonstrate the superiority of our proposed FLA-UNet. It is worth noting that since the OCTA images may be affected by eye movement, improper device parameter setting or ocular lesions of patients, which leads to blur, motion artifacts and occlusion in the images, the input images may have poor quality, as shown in the left-most column in <xref ref-type="fig" rid="fig5">Figure 5</xref>. Although our proposed FLA-UNet can segment FAZ to a certain extent, there is still a significant difference between the segmentation results and their GT values, which will lead to some problems in clinical application. In further work, we will try to perform data preprocessing on the input image to enhance the edge contrast between FAZ and its background, to improve the accuracy and reliability of segmentation. Furthermore, the optimization and adjustment of the loss function will also be attempted, such as introducing different train losses commonly used for non-medical applications (<xref ref-type="bibr" rid="ref28">Sherwani et al., 2020</xref>), to enhance the robustness and generalization of the model. It is believed that, the optimization and application of our proposed FLA-UNet for FAZ segmentation will improve the accuracy of auxiliary diagnosis of fundus diseases.</p>
<fig position="float" id="fig5">
<label>Figure 5</label>
<caption>
<p>Qualitative results using our proposed FLA-UNet. The left-most column from top to bottom corresponds to the input images, respectively, under the conditions of blur, motion artifacts and occlusion. The middle column refers to the segmentation results using our proposed FLA-UNet. The right-most column refers to their given GT images from OCTAGON.</p>
</caption>
<graphic xlink:href="frai-08-1463233-g005.tif">
<alt-text content-type="machine-generated">Three rows show different artifacts affecting images. The first column contains original images with labels: blurring, motion artifacts, and occlusion. The second column shows segmentation images, and the third column presents ground truth (GT) data. Each row illustrates how these issues impact segmentation accuracy.</alt-text>
</graphic>
</fig>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="sec13">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found at: <ext-link xlink:href="https://ieee-dataport.org/open-access/octa-500" ext-link-type="uri">https://ieee-dataport.org/open-access/octa-500</ext-link>. The source code is available at: <ext-link xlink:href="https://github.com/LiCao-WHPU/FLA-UNet" ext-link-type="uri">https://github.com/LiCao-WHPU/FLA-UNet</ext-link>.</p>
</sec>
<sec sec-type="author-contributions" id="sec14">
<title>Author contributions</title>
<p>WL: Conceptualization, Data curation, Methodology, Software, Writing &#x2013; original draft. LC: Conceptualization, Funding acquisition, Methodology, Writing &#x2013; review &#x0026; editing. HD: Conceptualization, Funding acquisition, Methodology, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec sec-type="funding-information" id="sec15">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research and/or publication of this article. This work was funded by Research Funding of Wuhan Polytechnic University No. 2023RZ036.</p>
</sec>
<sec sec-type="COI-statement" id="sec16">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="sec17">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Agarwal</surname> <given-names>A.</given-names></name> <name><surname>Balaji</surname> <given-names>J. J.</given-names></name> <name><surname>Raman</surname> <given-names>R.</given-names></name> <name><surname>Lakshminarayanan</surname> <given-names>V.</given-names></name></person-group> (<year>2020</year>). <article-title>The foveal avascular zone image database (FAZID)</article-title>. <conf-name>Conference on applications of digital image processing</conf-name>.</citation></ref>
<ref id="ref2"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Alam</surname> <given-names>M.</given-names></name> <name><surname>Le</surname> <given-names>D.</given-names></name> <name><surname>Lim</surname> <given-names>J. I.</given-names></name></person-group> (<year>2019</year>). <article-title>Supervised machine learning based multi-task artificial intelligence classification of retinopathies</article-title>. <source>J. Clin. Med.</source> <volume>8</volume>:<fpage>872</fpage>. doi: <pub-id pub-id-type="doi">10.3390/jcm8060872</pub-id>, PMID: <pub-id pub-id-type="pmid">31216768</pub-id></citation></ref>
<ref id="ref3"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Balaji</surname> <given-names>J. J.</given-names></name> <name><surname>Agarwal</surname> <given-names>A.</given-names></name> <name><surname>Raman</surname> <given-names>R.</given-names></name> <name><surname>Lakshminarayanan</surname> <given-names>V.</given-names></name></person-group> (<year>2020</year>). <article-title>Comparison of foveal avascular zone in diabetic retinopathy, high myopia, and normal fundus images</article-title>. <conf-name>Ophthalmic Technologies XXX</conf-name>.</citation></ref>
<ref id="ref4"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bourennane</surname> <given-names>A. H. A. R. C.</given-names></name></person-group> (<year>2010</year>). <article-title>Detection of the foveal avascular zone on retinal angiograms using Markov random fields</article-title>. <source>Digit. Signal Process.</source> <volume>20</volume>, <fpage>149</fpage>&#x2013;<lpage>154</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.dsp.2009.06.005</pub-id></citation></ref>
<ref id="ref5"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>Z.</given-names></name> <name><surname>Lan</surname> <given-names>H.</given-names></name> <name><surname>Meng</surname> <given-names>Y.</given-names></name></person-group>. (<year>2022</year>). <article-title>FAZ-BV: a diabetic macular ischemia grading framework combining Faz attention network and blood vessel enhancement filters</article-title>. <conf-name>IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</conf-name>.</citation></ref>
<ref id="ref6"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chui</surname> <given-names>T. Y. P.</given-names></name> <name><surname>Zhong</surname> <given-names>Z.</given-names></name> <name><surname>Song</surname> <given-names>H.</given-names></name></person-group> (<year>2012</year>). <article-title>Foveal avascular zone and its relationship to foveal pit shape</article-title>. <source>Optom. Vis. Sci.</source> <volume>89</volume>, <fpage>602</fpage>&#x2013;<lpage>610</lpage>. doi: <pub-id pub-id-type="doi">10.1097/OPX.0b013e3182504227</pub-id>, PMID: <pub-id pub-id-type="pmid">22426172</pub-id></citation></ref>
<ref id="ref7"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>D&#x00ED;az</surname> <given-names>M.</given-names></name> <name><surname>De</surname> <given-names>M. J.</given-names></name> <name><surname>Novo</surname> <given-names>J.</given-names></name> <name><surname>Ortega</surname> <given-names>M.</given-names></name></person-group> (<year>2019a</year>). <article-title>Automatic wide field registration and mosaicking of OCTA images using vascularity information</article-title>. <conf-name>23rd international conference on knowledge-based and intelligent information &#x0026; engineering systems</conf-name>, <volume>159</volume>, <fpage>505</fpage>&#x2013;<lpage>513</lpage>.</citation></ref>
<ref id="ref8"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>D&#x00ED;az</surname> <given-names>M.</given-names></name> <name><surname>Novo</surname> <given-names>J.</given-names></name> <name><surname>Cutr&#x00ED;n</surname> <given-names>P.</given-names></name> <name><surname>G&#x00F3;mez-Ulla</surname> <given-names>F.</given-names></name> <name><surname>Penedo</surname> <given-names>M. G.</given-names></name> <name><surname>Ortega</surname> <given-names>M.</given-names></name></person-group> (<year>2019b</year>). <article-title>Automatic segmentation of the foveal avascular zone in ophthalmological OCT-A images</article-title>. <source>PLoS One</source> <volume>14</volume>:<fpage>e0212364</fpage>. doi: <pub-id pub-id-type="doi">10.1371/journal.pone.0212364</pub-id>, PMID: <pub-id pub-id-type="pmid">30794594</pub-id></citation></ref>
<ref id="ref9"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ghassemi</surname> <given-names>F.</given-names></name> <name><surname>Mirzadeh</surname> <given-names>M.</given-names></name></person-group> (<year>2007</year>). <article-title>Automated segmentation of the foveal avascular zone in angiograms using region growing</article-title>. <source>Comput. Biol. Med.</source> <volume>37</volume>, <fpage>70</fpage>&#x2013;<lpage>75</lpage>.</citation></ref>
<ref id="ref10"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gu</surname> <given-names>Z. A.</given-names></name> <name><surname>Cheng</surname> <given-names>J.</given-names></name> <name><surname>Fu</surname> <given-names>H. Z.</given-names></name></person-group> (<year>2019</year>). <article-title>CE-net: context encoder network for 2D medical image segmentation</article-title>. <source>IEEE Trans. Med. Imaging</source> <volume>38</volume>, <fpage>2281</fpage>&#x2013;<lpage>2292</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TMI.2019.2903562</pub-id>, PMID: <pub-id pub-id-type="pmid">30843824</pub-id></citation></ref>
<ref id="ref11"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Guo</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Jia</surname> <given-names>Y.</given-names></name> <name><surname>Huang</surname> <given-names>D.</given-names></name> <name><surname>Camino</surname> <given-names>A.</given-names></name></person-group> (<year>2018</year>). <article-title>Med-net, a neural network for automated detection of avascular area in OCT angiography</article-title>. <source>Biomed. Opt. Express</source> <volume>9</volume>, <fpage>5147</fpage>&#x2013;<lpage>5158</lpage>. doi: <pub-id pub-id-type="doi">10.1364/BOE.9.005147</pub-id>, PMID: <pub-id pub-id-type="pmid">30460119</pub-id></citation></ref>
<ref id="ref12"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Guo</surname> <given-names>M.</given-names></name> <name><surname>Zhao</surname> <given-names>M.</given-names></name> <name><surname>Cheong</surname> <given-names>A. M.</given-names></name> <name><surname>Corvi</surname> <given-names>F.</given-names></name> <name><surname>Chen</surname> <given-names>X.</given-names></name> <name><surname>Chen</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Can deep learning improve the automatic segmentation of deep foveal avascular zone in optical coherence tomography angiography?</article-title> <source>Biomed. Signal Process. Control</source> <volume>66</volume>:102456. doi: <pub-id pub-id-type="doi">10.1016/j.bspc.2021.102456</pub-id></citation></ref>
<ref id="ref13"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Guo</surname> <given-names>M.</given-names></name> <name><surname>Zhao</surname> <given-names>M.</given-names></name> <name><surname>Cheong</surname> <given-names>A. M.</given-names></name> <name><surname>Dai</surname> <given-names>H.</given-names></name> <name><surname>Lam</surname> <given-names>A. K.</given-names></name> <name><surname>Zhou</surname> <given-names>Y.</given-names></name></person-group> (<year>2019</year>). <article-title>Automatic quantification of superficial foveal avascular zone in optical coherence tomography angiography implemented with deep learning</article-title>. <source>Vis. Comput. Ind. Biomed. Art</source> <volume>2</volume>:<fpage>9</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s42492-019-0031-8</pub-id></citation></ref>
<ref id="ref14"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hu</surname> <given-names>K.</given-names></name> <name><surname>Jiang</surname> <given-names>S.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name></person-group> (<year>2022</year>). <article-title>Joint-seg: treat foveal avascular zone and retinal vessel segmentation in OCTA images as a joint task</article-title>. <source>IEEE Trans. Instrum. Meas.</source> <volume>71</volume>, <fpage>1</fpage>&#x2013;<lpage>13</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TIM.2022.3193188</pub-id></citation></ref>
<ref id="ref15"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>H.</given-names></name> <name><surname>Lin</surname> <given-names>L</given-names></name> <name><surname>Tong</surname> <given-names>R.</given-names></name> <name><surname>Hu</surname> <given-names>H.</given-names></name> <name><surname>Wu</surname> <given-names>J.</given-names></name></person-group> (<year>2020</year>). <article-title>U-net 3+: a full-scale connected U-net for medical image segmentation</article-title>. <conf-name>International Conference on Acoustics, Speech and Signal Processing (ICASSP)</conf-name>.</citation></ref>
<ref id="ref16"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jia</surname> <given-names>W.</given-names></name> <name><surname>Ma</surname> <given-names>S.</given-names></name> <name><surname>Geng</surname> <given-names>P.</given-names></name> <name><surname>Sun</surname> <given-names>Y.</given-names></name></person-group> (<year>2023</year>). <article-title>DT-net: joint dual-input transformer and CNN for retinal vessel segmentation</article-title>. <source>Comput. Mater. Contin.</source> <volume>76</volume>, <fpage>3393</fpage>&#x2013;<lpage>3411</lpage>. doi: <pub-id pub-id-type="doi">10.32604/cmc.2023.040091</pub-id></citation></ref>
<ref id="ref17"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kashani</surname> <given-names>A. H.</given-names></name> <name><surname>Chen</surname> <given-names>C. L.</given-names></name> <name><surname>Gahm</surname> <given-names>J. K.</given-names></name></person-group> (<year>2017</year>). <article-title>Optical coherence tomography angiography: a comprehensive review of current methods and clinical applications</article-title>. <source>Prog. Retin. Eye Res.</source> <volume>60</volume>, <fpage>66</fpage>&#x2013;<lpage>100</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.preteyeres.2017.07.002</pub-id>, PMID: <pub-id pub-id-type="pmid">28760677</pub-id></citation></ref>
<ref id="ref18"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>M.</given-names></name> <name><surname>Chen</surname> <given-names>Y.</given-names></name> <name><surname>Ji</surname> <given-names>Z.</given-names></name> <name><surname>Xie</surname> <given-names>K.</given-names></name> <name><surname>Li</surname> <given-names>S.</given-names></name></person-group> (<year>2020</year>). <article-title>Image projection network: 3D to 2D image segmentation in octa images</article-title>. <source>IEEE Trans. Med. Imaging</source> <volume>39</volume>, <fpage>3343</fpage>&#x2013;<lpage>3354</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TMI.2020.2992244</pub-id>, PMID: <pub-id pub-id-type="pmid">32365023</pub-id></citation></ref>
<ref id="ref19"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>M.</given-names></name> <name><surname>Huang</surname> <given-names>K.</given-names></name> <name><surname>Xu</surname> <given-names>Q.</given-names></name> <name><surname>Yang</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Ji</surname> <given-names>Z.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>OCTA-500: a retinal dataset for optical coherence tomography angiography study</article-title>. <source>Med. Image Anal.</source> <volume>93</volume>:<fpage>103092</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.media.2024.103092</pub-id></citation></ref>
<ref id="ref20"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>W. S.</given-names></name> <name><surname>Zhang</surname> <given-names>H. C.</given-names></name> <name><surname>Li</surname> <given-names>F. Y.</given-names></name></person-group> (<year>2022</year>). <article-title>RPS-net: an effective retinal image projection segmentation network for retinal vessels and foveal avascular zone based on OCTA data</article-title>. <source>Med. Phys.</source> <volume>49</volume>, <fpage>3830</fpage>&#x2013;<lpage>3844</lpage>. doi: <pub-id pub-id-type="doi">10.1002/mp.15608</pub-id>, PMID: <pub-id pub-id-type="pmid">35297061</pub-id></citation></ref>
<ref id="ref21"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Lin</surname> <given-names>L.</given-names></name> <name><surname>Wang</surname> <given-names>Z.</given-names></name> <name><surname>Wu</surname> <given-names>J.</given-names></name> <name><surname>Huang</surname> <given-names>Y.</given-names></name> <name><surname>Lyu</surname> <given-names>J.</given-names></name> <name><surname>Cheng</surname> <given-names>P.</given-names></name></person-group> (<year>2021</year>). <article-title>BSDA-Net: a boundary shape and distance aware joint learning framework for segmenting and classifying OCTA images</article-title>. In <conf-name>The 24th International Conference on Medical Image Computing and Computer Assisted Intervention (MICCAI)</conf-name>, <comment>Strasbourg, France, 27th September to 1st October</comment>.</citation></ref>
<ref id="ref22"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>H.</given-names></name> <name><surname>Li</surname> <given-names>M.</given-names></name></person-group> (<year>2019</year>). <article-title>Automated segmentation of foveal avascular zone in optical coherence tomography angiography images using sparse deep learning</article-title>. <source>J. Biomed. Opt.</source> <volume>24</volume>, <fpage>1</fpage>&#x2013;<lpage>10</lpage>.</citation></ref>
<ref id="ref23"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Yao</surname> <given-names>J.</given-names></name></person-group> (<year>2025</year>). <article-title>Dual-branch image projection network for geographic atrophy segmentation in retinal OCT images</article-title>. <source>Sci. Rep.</source> <volume>15</volume>:<fpage>6535</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-025-90709-6</pub-id>, PMID: <pub-id pub-id-type="pmid">39994280</pub-id></citation></ref>
<ref id="ref24"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>J.</given-names></name> <name><surname>Yan</surname> <given-names>S.</given-names></name> <name><surname>Lu</surname> <given-names>N.</given-names></name> <name><surname>Yang</surname> <given-names>D.</given-names></name> <name><surname>Fan</surname> <given-names>C.</given-names></name> <name><surname>Lv</surname> <given-names>H.</given-names></name></person-group> (<year>2022</year>). <article-title>Automatic segmentation of foveal avascular zone based on adaptive watershed algorithm in retinal optical coherence tomography angiography images</article-title>. <source>J. Innov. Opt. Health Sci.</source> <volume>15</volume>:<fpage>13</fpage>. doi: <pub-id pub-id-type="doi">10.1142/S1793545822420019</pub-id></citation></ref>
<ref id="ref25"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mou</surname> <given-names>L.</given-names></name> <name><surname>Zhao</surname> <given-names>Y.</given-names></name> <name><surname>Fu</surname> <given-names>H.</given-names></name> <name><surname>Liu</surname> <given-names>Y.</given-names></name> <name><surname>Cheng</surname> <given-names>J.</given-names></name> <name><surname>Zheng</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>CS<sup>2</sup>-Net: deep learning segmentation of curvilinear structures in medical imaging</article-title>. <source>Med. Image Anal.</source> <volume>67</volume>:<fpage>101874</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.media.2020.101874</pub-id></citation></ref>
<ref id="ref26"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Ronneberger</surname> <given-names>O.</given-names></name> <name><surname>Fischer</surname> <given-names>P.</given-names></name> <name><surname>Brox</surname> <given-names>T.</given-names></name></person-group> (<year>2015</year>). <article-title>U-net: Convolutional networks for biomedical image segmentation</article-title>. <conf-name>Medical Image Computing and Computer Assisted Intervention</conf-name>, Springer, Cham <volume>9351</volume>, <fpage>234</fpage>&#x2013;<lpage>241</lpage>.</citation></ref>
<ref id="ref27"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sherwani</surname> <given-names>M. K.</given-names></name> <name><surname>Gopalakrishnan</surname> <given-names>S.</given-names></name></person-group> (<year>2024</year>). <article-title>A systematic literature review: deep learning techniques for synthetic medical image generation and their applications in radiotherapy</article-title>. <source>Front. Radiol.</source> <volume>4</volume>:<fpage>1385742</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fradi.2024.1385742</pub-id>, PMID: <pub-id pub-id-type="pmid">38601888</pub-id></citation></ref>
<ref id="ref28"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Sherwani</surname> <given-names>M.K.</given-names></name> <name><surname>Zaffino</surname> <given-names>P.</given-names></name> <name><surname>Bruno</surname> <given-names>P.</given-names></name> <name><surname>Spadea</surname> <given-names>M.F.</given-names></name> <name><surname>Calimeri</surname> <given-names>F.</given-names></name></person-group> (<year>2020</year>). <article-title>Evaluating the impact of training loss on MR to synthetic CT conversion</article-title>. <conf-name>International conference on machine learning, optimization, and data science</conf-name>. <publisher-name>LOD</publisher-name>. <comment>Lecture Notes in Computer Science</comment>, Springer, Cham <volume>12565</volume>.</citation></ref>
<ref id="ref29"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sim&#x00F3;</surname> <given-names>A.</given-names></name> <name><surname>Ves</surname> <given-names>E. D.</given-names></name></person-group> (<year>2001</year>). <article-title>Segmentation of macular fluorescein angiographies. A statistical approach</article-title>. <source>Pattern Recogn.</source> <volume>34</volume>, <fpage>795</fpage>&#x2013;<lpage>809</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S0031-3203(00)00032-7</pub-id></citation></ref>
<ref id="ref30"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>C.</given-names></name> <name><surname>Fan</surname> <given-names>J.</given-names></name> <name><surname>Bai</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Xiao</surname> <given-names>Q.</given-names></name> <name><surname>Li</surname> <given-names>Z.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>ODDF-net: multi-object segmentation in 3D retinal OCTA using optical density and disease features</article-title>. <source>Knowl.-Based Syst.</source> <volume>306</volume>:<fpage>112704</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.knosys.2024.112704</pub-id></citation></ref>
<ref id="ref31"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>D.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Yu</surname> <given-names>J.</given-names></name></person-group> (<year>2023</year>). <article-title>Multi-task thyroid tumor segmentation based on the joint loss function</article-title>. <source>Biomed. Signal process. Control</source> <volume>79</volume>:<fpage>104249</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.bspc.2022.104249</pub-id>, PMID: <pub-id pub-id-type="pmid">40575459</pub-id></citation></ref>
<ref id="ref32"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>Z. K.</given-names></name> <name><surname>Qu</surname></name> <name><surname>Yu</surname> <given-names>W. H.</given-names></name> <name><surname>Xiao</surname></name> <name><surname>Zhang</surname></name> <name><surname>Dong</surname> <given-names>F.</given-names></name></person-group> (<year>2016</year>). <article-title>A morphological study of the foveal avascular zone in patients with diabetes mellitus using optical coherence tomography angiography</article-title>. <source>Graefes Arch. Clin. Exp. Ophthalmol.</source> <volume>254</volume>, <fpage>873</fpage>&#x2013;<lpage>879</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s00417-015-3143-7</pub-id></citation></ref>
<ref id="ref33"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhou</surname> <given-names>Z.</given-names></name> <name><surname>Siddiquee</surname> <given-names>M. R.</given-names></name> <name><surname>Tajbakhsh</surname> <given-names>N.</given-names></name> <name><surname>Liang</surname> <given-names>J.</given-names></name></person-group> (<year>2020</year>). <article-title>U-net++: redesigning skip connections to exploit multiscale features in image segmentation</article-title>. <source>IEEE Trans. Med. Imaging</source> <volume>39</volume>, <fpage>1856</fpage>&#x2013;<lpage>1867</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TMI.2019.2959609</pub-id>, PMID: <pub-id pub-id-type="pmid">31841402</pub-id></citation></ref>
</ref-list>
</back>
</article>