<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Signal Process.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Signal Processing</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Signal Process.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2673-8198</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1527975</article-id>
<article-id pub-id-type="doi">10.3389/frsip.2025.1527975</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>MSWAFFNet: improved segmentation of nucleus using feature fusion of multi scale wavelet attention</article-title>
<alt-title alt-title-type="left-running-head">Zhang et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/frsip.2025.1527975">10.3389/frsip.2025.1527975</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Jun</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Hu</surname>
<given-names>Yangsheng</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2890698"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>An</surname>
<given-names>Zhenzhou</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
</contrib-group>
<aff id="aff1">
<label>1</label>
<institution>Yuxi Normal University</institution>, <city>Yuxi</city>, <country country="CN">China</country>
</aff>
<aff id="aff2">
<label>2</label>
<institution>Kunming University of Science and Technology</institution>, <city>Kunming</city>, <country country="CN">China</country>
</aff>
<aff id="aff3">
<label>3</label>
<institution>Honghe University</institution>, <city>Honghe</city>, <country country="CN">China</country>
</aff>
<author-notes>
<corresp id="c001">
<label>&#x2a;</label>Correspondence: Zhenzhou An, <email xlink:href="an@yxnu.edu.cn">an@yxnu.edu.cn</email>
</corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2025-11-07">
<day>07</day>
<month>11</month>
<year>2025</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>5</volume>
<elocation-id>1527975</elocation-id>
<history>
<date date-type="received">
<day>14</day>
<month>11</month>
<year>2024</year>
</date>
<date date-type="rev-recd">
<day>03</day>
<month>09</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>14</day>
<month>10</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Zhang, Hu and An.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Zhang, Hu and An</copyright-holder>
<license>
<ali:license_ref start_date="2025-11-07">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Nucleus segmentation plays an essential role in digital pathology,particularly in cancer diagnosis and the evaluation of treatment efficacy. Accurate nucleus segmentation provides critical guidance for pathologists. However, due to the wide variability instructure, color, and morphology of nuclei in histopathological images, automated segmentation remains highly challenging. Previous neural networks employing wavelet-guided, boundary-aware attention mechanisms have demonstrated certain advantages in delineating nuclear boundaries. However, their feature fusion strategies have been suboptimal, limiting overall segmentation accuracy.</p>
</sec>
<sec>
<title>Methods</title>
<p>In this study, we propose a novel architecture&#x2014;the Multi-Scale Wavelet Fusion Attention Network (MSWAFFNet)&#x2014;which incorporates an Attention Feature Fusion (AFF) mechanism to effectively integrate high-frequency features extracted via 2D Discrete Wavelet Transform (DWT) from different Unet scales. This approach enhances boundary perception and improves segmentation performance. To address the variation across datasets, we apply a series of preprocessing steps to normalize the color distribution and statistical characteristics, thereby ensuring training consistency.</p>
</sec>
<sec>
<title>Results and Discussion</title>
<p>The proposed method is evaluated on three public histopathology datasets (DSB, TNBC, CoNIC), achieving Dice coefficients of 91.33%, 80.56%, and 91.03%, respectively&#x2014;demonstrating superior segmentation performance across diverse scenarios.</p>
</sec>
</abstract>
<kwd-group>
<kwd>deep learning</kwd>
<kwd>image segmentation</kwd>
<kwd>nucleus segmentation</kwd>
<kwd>attention fusion</kwd>
<kwd>discrete wavelet transform</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declare that financial support was received for the research and/or publication of this article. The authors gratefully acknowledge the financial support from the Project of Ding Zhiming Academician Expert Workstation (No: 202305AF150036). And sponsored by the Opening Foundation of Yunnan Key Laboratory of Smart City in Cyberspace Security (No. 202105AG070010).</funding-statement>
</funding-group>
<counts>
<fig-count count="5"/>
<table-count count="5"/>
<equation-count count="4"/>
<ref-count count="33"/>
<page-count count="9"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Image Processing</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>In recent years, deep learning has achieved remarkable success and significantly enhanced medical imaging performance. Beyond improving image quality, deep learning has also enabled novel capabilities such as image classification, segmentation, and cross-modality image translation. Numerous studies have leveraged automated approaches to assist in diagnosis and address specific challenges across various medical imaging modalities. The rapid advancement of image processing techniques based on convolutional neural networks (CNNs) has revolutionized both medical diagnostics and treatment planning. From identifying complex patterns in clinical images to accurately segmenting lesion areas, neural network&#x2013;based methods have become indispensable tools in modern healthcare. Histopathology, as a critical component of medical diagnostics, has also benefited substantially from deep learning advancements. Nevertheless, existing models still require further optimization to improve generalization and ensure robust performance across diverse clinical scenarios.</p>
<p>Nucleus segmentation in histopathological images is a crucial step in the analysis of microscope-acquired data. The quality of these images and the effectiveness of their processing significantly impact medical decision-making, enabling earlier diagnoses and potentially reducing the cost of subsequent treatments (<xref ref-type="bibr" rid="B16">Krupinski, 2000</xref>; <xref ref-type="bibr" rid="B7">Gali&#x107; et al., 2023</xref>). Due to the inherent complexity of this task, automating the segmentation process remains challenging. In this context, deep learning frameworks have gained increasing popularity (<xref ref-type="bibr" rid="B30">Xu et al., 2023</xref>). However, nucleus segmentation must address several difficulties, including variations in image quality across different microscopes, diverse staining protocols, blurred cell boundaries, intensity heterogeneity across cancer subtypes, and the close proximity or overlapping of nuclei in histopathological images (<xref ref-type="bibr" rid="B21">Mouelhi et al., 2018</xref>). Among deep learning&#x2013;based segmentation approaches, U-Net architectures remain the most widely adopted (<xref ref-type="bibr" rid="B1">Al Qurri and Almekkawy, 2023</xref>). For instance, U-Net achieves high accuracy on classical benchmark datasets (<xref ref-type="bibr" rid="B4">Castro et al., 2024</xref>), but its performance degrades considerably on more complex or varied datasets (<xref ref-type="bibr" rid="B2">Azad et al., 2024</xref>). To improve performance, a Fast U-Net (FU-Net) was proposed in (<xref ref-type="bibr" rid="B24">Olimov et al., 2021</xref>), which redesigned the encoder of the traditional U-Net by introducing bottleneck convolutional layers into both encoder and decoder branches, improving computational efficiency and segmentation accuracy. Nevertheless, accurate nucleus segmentation remains difficult, particularly in separating clustered or overlapping nuclei in microscopic images (<xref ref-type="bibr" rid="B8">Gehlot et al., 2020</xref>). One major limitation of U-Net&#x2013;based networks arises from the downsampling process, where operations such as max or average pooling often violate the Nyquist sampling theorem. This can result in the loss of high-frequency detail and distortion of structural information in the low-frequency domain (<xref ref-type="bibr" rid="B28">Wang et al., 2024</xref>). To preserve image details, several studies have explored using Discrete Wavelet Transform (DWT) as a replacement for traditional pooling layers (<xref ref-type="bibr" rid="B29">Williams and Li, 2018</xref>). More recent architectural advancements include HanNet (<xref ref-type="bibr" rid="B12">H et al., 2021</xref>), a hybrid attention nested U-Net incorporating dense connections for improved feature representation. In (<xref ref-type="bibr" rid="B27">Vahadane et al., 2021</xref>), a dual-encoder attention U-Net was proposed, introducing a secondary encoder to better capture attention-relevant features. A multitask U-Net variant was introduced in (<xref ref-type="bibr" rid="B32">Zhao et al., 2021</xref>), where a context encoding layer was applied after each encoder and its output was fused with decoder features using attention mechanisms. Another enhancement was proposed in (<xref ref-type="bibr" rid="B17">Lal et al., 2021</xref>), where residual blocks were added to extract high-level semantic features, coupled with attention mechanisms to improve decoding. Building upon these developments, <xref ref-type="bibr" rid="B14">Imtiaz et al. (2023)</xref> proposed a boundary-aware, wavelet-guided network that combines encoder and decoder information via attention while generating explicit boundary cues. This approach helped preserve fine structural details and small nuclei, and the incorporation of wavelet features led to improved segmentation performance over prior methods.</p>
<p>While feature fusion is widely adopted in deep learning&#x2013;based segmentation models, it is not universally suitable across all scenarios (<xref ref-type="bibr" rid="B5">Dai et al., 2021</xref>). Simple fusion strategies such as direct addition or concatenation often lack adaptability to spatial variation and semantic heterogeneity, particularly in histopathological images where nuclei are densely packed or overlapping. These fixed strategies may dilute critical high-frequency boundary information or amplify irrelevant noise, ultimately compromising segmentation accuracy. Previous studies such as (<xref ref-type="bibr" rid="B18">Li et al., 2019</xref>; <xref ref-type="bibr" rid="B31">Zhang et al., 2022</xref>) have focused on soft feature selection within single layers, leaving cross-layer fusion&#x2014;especially via skip connections&#x2014;largely unaddressed. This limitation also extends to U-Net variants that incorporate discrete wavelet feature extraction (<xref ref-type="bibr" rid="B14">Imtiaz et al., 2023</xref>). Moreover, in attention-based modules, the success of feature fusion heavily relies on accurately learning fusion weights across multi-scale representations. Although wavelet transforms help preserve high-frequency boundary features, the subsequent fusion and utilization of these features remain suboptimal in many existing frameworks.</p>
<p>To address the limitations of existing segmentation methods, we propose a novel architecture called the Multi-Scale Wavelet Attention Feature Fusion Network (MSWAFFNet). Prior to feeding data into the network, we apply a comprehensive preprocessing pipeline designed to mitigate data diversity issues and normalize color distribution across different image modalities. In contrast to traditional skip connection strategies and conventional feature aggregation units, our approach performs separate fusion of wavelet-based features and boundary-aware features at multiple scales. This design enhances the model&#x2019;s ability to capture fine-grained structural information. To validate the effectiveness and generalizability of our method, we conducted extensive experiments on three publicly available datasets. These datasets encompass a large number of histopathological images from various organs and disease types, thereby ensuring a robust assessment of cross-dataset generalization performance.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<label>2</label>
<title>Materials and methods</title>
<p>In this section, the proposed method is described in detail, including the preprocessing steps, the overall network architecture, the boundary wavelet attention module, and the feature fusion module. The model proposed in this paper uses U-Net as the backbone network and incorporates a boundary wavelet-aware attention module and a multi-scale attention fusion module to extract and integrate boundary information into the U-Net through an attention mechanism. The overall structure is shown in <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>The structure of MSWAFFNet. The basic U-Net includes three downsampling layers and three upsampling layers. In each downsampling step, DWT is used to extract wavelet boundary information, which is then fused into boundary information through a fusion module. Subsequently, all different wavelet boundary information is upsampled and fused through an attention fusion mechanism. Finally, the ultimate output is obtained by combining all the outputs.</p>
</caption>
<graphic xlink:href="frsip-05-1527975-g001.tif">
<alt-text content-type="machine-generated">Flowchart of a convolutional neural network architecture for image processing. It includes layers such as Conv BN ReLU, DWT, Boundary Attention, and Boundary Fusion. Arrows indicate data flow, with processes like max pooling and up sampling highlighted.</alt-text>
</graphic>
</fig>
<sec id="s2-1">
<label>2.1</label>
<title>Preprocessing</title>
<p>Since we use several very different datasets, it is necessary to normalize them using appropriate methods. Effective preprocessing can significantly improve the prediction results. Due to the differences between the datasets, it is essential to process them to have similar characteristics for training.</p>
<p>In the experiments, three main preprocessing steps were primarily used. First, basic image augmentation techniques were employed to increase the amount of training data for the supervised deep neural network (<xref ref-type="bibr" rid="B19">Maharana et al., 2022</xref>). The most common forms of augmentation include flipping, rotation, adding noise, and random cropping. By representing a broader range of potential data points, the augmented data narrows the gap between the training set, validation set, and any upcoming test sets, thereby enhancing the performance of the neural network.</p>
<p>The second preprocessing step is color intensity level transformation, which can make datasets from different sources more uniform, providing users with a comparable view of data from different studies or modalities (<xref ref-type="bibr" rid="B22">Nan et al., 2022</xref>). By transforming the intensity levels to a similar visual appearance, it reduces the side effects that might be introduced by different modalities in the model.</p>
<p>Finally, a combination of contrast enhancement and color inversion techniques was used (<xref ref-type="bibr" rid="B25">Reza, 2004</xref>) to produce the final images for the training set to be fed into the network. Contrast enhancement increases the contrast between the darkest and brightest regions of the image, thereby enhancing visibility and the ability to see fine details. On the other hand, color inversion describes the reversal of brightness values in the color transitions within the image. In <xref ref-type="fig" rid="F2">Figure 2</xref>, we provide an example of the effect of image preprocessing, where different images are transformed into more consistent black-and-white images, thus reducing the difficulty of model training.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Example of image preprocessing.</p>
</caption>
<graphic xlink:href="frsip-05-1527975-g002.tif">
<alt-text content-type="machine-generated">Three rows of medical images labeled TNBC, DSB, and CoNIC, each with four columns labeled Origin, LAB, CLAHE, and REVERSE. The TNBC row shows histological tissue samples with varying contrast methods. The DSB row displays bright spots on a dark background, each image enhancing contrast differently. The CoNIC row shows a colored tissue sample transitioning to grayscale with different contrast enhancements. Each technique offers distinct visualization of the cellular structures.</alt-text>
</graphic>
</fig>
<sec id="s2-1-1">
<label>2.1.1</label>
<title>Intensity level transformation</title>
<p>One of the key issues in the nucleus segmentation task is the generalization performance of the model to different image modalities. Due to their variations in visual appearance and intensity levels, it is challenging to train a universal model that performs equally well on both modalities. Therefore, we adopted color intensity transformation to normalize the datasets first, thereby reducing the difficulty of training the network. To achieve this, we used the LAB color space transformation scheme (<xref ref-type="bibr" rid="B9">Gonzales and Wintz, 1987</xref>). The LAB color space, defined by the International Commission on Illumination, represents colors as three values: L for perceived lightness, and A and B for the four unique colors in human vision: red, green, blue, and yellow. Converting all three-channel images to the LAB color space helps preserve the original structure and maintains similar brightness and color statistical levels by leveraging the uniformity of data characteristics.</p>
</sec>
<sec id="s2-1-2">
<label>2.1.2</label>
<title>Contrast enhancement and inversion</title>
<p>Typically, nuclei in cellular space have small regions that may be overlooked by algorithms. Therefore, contrast enhancement is a crucial step to improve the visibility of small nucleus regions. In this method, Contrast-Limited Adaptive Histogram Equalization (CLAHE) is applied to enhance the contrast of histopathological images (<xref ref-type="bibr" rid="B25">Reza, 2004</xref>). By limiting the contrast, it provides better equalization and reduces the problem of noise amplification. Generally, in both modalities, brightfield images are primarily used in clinical settings. Thus, a color inversion operation is performed on all images to shift the intensity levels of fluorescence histopathological images, as their overall intensity levels are much higher than those of brightfield histopathological images. The inversion process <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> for a random pixel <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> in an image, using the mean brightness level <inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula>, is represented in <xref ref-type="disp-formula" rid="e1">Equation 1</xref>.<disp-formula id="e1">
<mml:math id="m4">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="{" close="">
<mml:mrow>
<mml:mtable class="cases">
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mn>255</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mspace width="1em"/>
<mml:mspace width="1em"/>
<mml:mi>i</mml:mi>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x3e;</mml:mo>
<mml:mn>127</mml:mn>
<mml:mspace width="1em"/>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="left">
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mspace width="1em"/>
<mml:mspace width="1em"/>
<mml:mspace width="1em"/>
<mml:mspace width="1em"/>
<mml:mtext>&#x2003;&#x2003;</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>127</mml:mn>
<mml:mspace width="1em"/>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
</sec>
</sec>
<sec id="s2-2">
<label>2.2</label>
<title>Boundary wavelet-aware attention</title>
<p>Boundary Wavelet-Aware Attention (BWA) is divided into two parts: Wavelet Guided Attention Unit (WGAU) and Boundary Aware Unit (BAU), as shown in <xref ref-type="fig" rid="F3">Figure 3</xref>. This mechanism first extracts image information at different frequencies using Discrete Wavelet Transform (DWT). The 2D-DWT uses Multi-Resolution Analysis (MRA) to transform a 2D signal into a series of wavelet coefficients at various scales and orientations (<xref ref-type="bibr" rid="B20">Mallat, 1989</xref>). Each level of decomposition receives a set of coefficients as a result of applying MRA to the rows and columns of the 2D signal. The low-pass filtered signal is subtracted from the original signal, leaving only the high-pass filtered signal to produce the wavelet coefficient <inline-formula id="inf4">
<mml:math id="m5">
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. Second, <inline-formula id="inf5">
<mml:math id="m6">
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is used as a gating vector to guide the network to utilize the salient regions of the given image. It also includes contextual information that can cut off lower-level feature responses in natural image classification tasks. First, a linear transformation is applied to the gating vector <inline-formula id="inf6">
<mml:math id="m7">
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and the input tensor <inline-formula id="inf7">
<mml:math id="m8">
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> using 1x1 convolutional layers, and they are then added together. The result is passed through a ReLU function <inline-formula id="inf8">
<mml:math id="m9">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, followed by another 1x1 convolution to obtain the gating coefficient <inline-formula id="inf9">
<mml:math id="m10">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">att,c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. Additional attention is used to obtain this gating coefficient as described in <xref ref-type="disp-formula" rid="e2">Equation 2</xref>.<disp-formula id="e2">
<mml:math id="m11">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">att,c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3c6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mi>x</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mi>g</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3c6;</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Structure of boundary wavelet-aware attention module.</p>
</caption>
<graphic xlink:href="frsip-05-1527975-g003.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a neural network architecture with two main sections. The first section processes three types of inputs (DWT output, skip outputs, former output) through convolutional (1x1 Conv) and upsampling (2x2 UpSample) layers, and attention mechanisms. The second section refines the data with additional convolutional layers (1x1 and 3x3 Conv) and outputs a Segmentation Image and a Boundary Image.</alt-text>
</graphic>
</fig>
<p>where <inline-formula id="inf10">
<mml:math id="m12">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf11">
<mml:math id="m13">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf12">
<mml:math id="m14">
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> are linear transformations, and <inline-formula id="inf13">
<mml:math id="m15">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the ReLU activation operator. Then, the attention coefficient is obtained by applying a Sigmoid activation function <inline-formula id="inf14">
<mml:math id="m16">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>m</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula id="inf15">
<mml:math id="m17">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">att,c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. Finally, the output of the wavelet-guided attention block <inline-formula id="inf16">
<mml:math id="m18">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>w</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is found by performing element-wise multiplication between the attention coefficient and the input tensor <inline-formula id="inf17">
<mml:math id="m19">
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The final output is shown in <xref ref-type="disp-formula" rid="e3">Equation 3</xref>.<disp-formula id="e3">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>w</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">att,c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
</p>
<p>The second part, BAU, adds a side branch to the output that has been processed through summation and attention mechanisms, to additionally output a boundary information map. Since aggregation modules are added at different levels of the U-Net, each level outputs a boundary map at a different scale. Therefore, upsampling is used to fuse these maps and obtain the final boundary-aware map.</p>
<p>However, when the BAU block fuses the multi-scale boundary-aware maps, it uses a very simple summation method. Although this simple attention-based approach can achieve improved perception of multi-scale features and better results after feature fusion, it still has several drawbacks.</p>
</sec>
<sec id="s2-3">
<label>2.3</label>
<title>Multi-Scale Boundary Fusion Module</title>
<p>To address the limitations of traditional fusion strategies&#x2014;such as direct addition or concatenation&#x2014;which often fail to adaptively integrate multi-scale features, <xref ref-type="bibr" rid="B5">Dai et al. (2021)</xref> proposed the Attention Feature Fusion (AFF) mechanism. AFF has since shown strong performance in various vision tasks by extending attention-based fusion from same-level to cross-level scenarios, including both short and long skip connections (<xref ref-type="bibr" rid="B6">Fu et al., 2022</xref>). Motivated by these advances and the need to better preserve boundary information, we adopt and further enhance the AFF framework in our model. Specifically, after extracting high-frequency boundary cues through the Boundary-Aware Wavelet (BAW) module, we apply AFF to adaptively fuse multi-scale features rather than relying on fixed operations such as addition or concatenation. In our implementation, global channel attention is obtained via global average pooling, while local channel attention is captured using point-wise convolutions. These two attention maps are then combined to generate adaptive fusion weights that guide the integration of boundary-aware and wavelet-enhanced features. This design enables our model to better emphasize discriminative regions, particularly around cell contours. The complete structure of the AFF-based boundary fusion module is shown in <xref ref-type="fig" rid="F4">Figure 4</xref>.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Structure of multi-scale boundary fusion module.</p>
</caption>
<graphic xlink:href="frsip-05-1527975-g004.tif">
<alt-text content-type="machine-generated">Flowchart depicting a neural network architecture with labeled components. Inputs B1, B2, and B3 feed into summation nodes. Processes include global pooling, point-wise convolution, and ReLU activation. Outputs undergo sigmoid activation, with equations involving output and operations like multiplication and addition. The layout highlights sequential and parallel operations within a bounded area, marked by orange lines and arrows indicating data flow.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec sec-type="results|discussion" id="s3">
<label>3</label>
<title>Results and discussion</title>
<p>In this section, we introduce the datasets used in the experiments and their sources, and then present the experimental setup conditions and results.</p>
<sec id="s3-1">
<label>3.1</label>
<title>Dataset</title>
<p>To validate the effectiveness of the proposed algorithm, three public available datasets were used. The first is the Data Science Bowl (DSB-2018) dataset, released by Kaggle for competition purposes (<xref ref-type="bibr" rid="B3">Caicedo et al., 2019</xref>). This dataset contains over 37,000 manually annotated nuclei from more than 30 experiments across different samples, cell lines, microscopy instruments, imaging conditions, operators, research facilities and staining protocols. The annotations were manually made by a team of expert biologists. It is one of the earlier and well-annotated datasets with a significant amount of data, and many networks have been tested on this dataset, often achieving good results. The training set of this dataset includes 670 images, with 546 being fluorescence and the rest brightfield. The test set contains 65 images.</p>
<p>The second dataset is the Triple-Negative Breast Cancer (TNBC) dataset (<xref ref-type="bibr" rid="B23">Naylor et al., 2018</xref>), which consists of 50 images with a total of 4022 annotated cells, including normal epithelial and myoepithelial breast cells, invasive cancer cells, fibroblasts, endothelial cells, adipocytes, macrophages, and inflammatory cells. The image size is 500<inline-formula id="inf18">
<mml:math id="m21">
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 500. Due to the limited amount of data, we extensively used random cropping to augment the dataset (into 256<inline-formula id="inf19">
<mml:math id="m22">
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 256 size). This was possible because of the larger image size, and in the cropping process, we used a stride (at least 50 pixels) to ensure the coverage of every entire image.</p>
<p>The third dataset is the CoNIC(Lizard dataset) dataset (<xref ref-type="bibr" rid="B11">Graham et al., 2021</xref>), which comes from the CoNIC challenge. It includes histological image regions of colon tissue from 6 different dataset sources, with complete segmentation annotations for different types of nuclei. They provided 4,981 patches of size 256<inline-formula id="inf20">
<mml:math id="m23">
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 256 extracted from the original Lizard dataset. It is currently the largest publicly available nucleus-level dataset, containing approximately 500,000 labeled nuclei across six different types of cells.</p>
<p>For all datasets, we divided the datasets into training, validation, and testing sets in an 8:1:1 ratio. The DSB dataset provided additional test data, which is also evaluated. Data augmentation techniques, such as rotation, flipping, translation, and cropping, were applied to all datasets. The data used for each dataset is listed in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>The number of training sets, validation sets, and test sets for each dataset.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Dataset</th>
<th align="center">Total</th>
<th align="center">Argumentation</th>
<th align="center">Training</th>
<th align="center">Validation</th>
<th align="center">Testing</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">DSB</td>
<td align="center">650</td>
<td align="center">3900</td>
<td align="center">3510</td>
<td align="center">390</td>
<td align="center">65</td>
</tr>
<tr>
<td align="center">TNBC</td>
<td align="center">50</td>
<td align="center">750</td>
<td align="center">600</td>
<td align="center">75</td>
<td align="center">75</td>
</tr>
<tr>
<td align="center">CoNIC</td>
<td align="center">4981</td>
<td align="center">4981</td>
<td align="center">3985</td>
<td align="center">498</td>
<td align="center">498</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3-2">
<label>3.2</label>
<title>Experimental setup</title>
<p>For the DSB dataset, where the image sizes are not uniform, all images were first resized to 256<inline-formula id="inf21">
<mml:math id="m24">
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 256. For the TNBC dataset, the original image size is 500<inline-formula id="inf22">
<mml:math id="m25">
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 500, and we crop them into 256<inline-formula id="inf23">
<mml:math id="m26">
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 256. For the CoNIC dataset, the original image size is already standardized at 256<inline-formula id="inf24">
<mml:math id="m27">
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 256. For all images, we applied data augmentation techniques, including rotating the original images by 30 and 60&#xb0;, horizontal flipping, mirroring, and cropping. Additionally, due to inconsistencies in color spaces across the datasets, we normalize the color of the images to make the images more consistent when feed into the network.</p>
<p>To enable our proposed model to perform the nucleus segmentation task more effectively, different hyperparameters were selected based on empirical analysis. Our model was implemented in TensorFlow 2.15 (Pyhton 3.9) on a Windows system with an NVIDIA GeForce RTX 4090 GPU. And the core code can be obtained from <ext-link ext-link-type="uri" xlink:href="https://gitee.com/hu_yang_sheng/mswaffnet.git">https://gitee.com/hu_yang_sheng/mswaffnet.git</ext-link>. To reduce the network&#x2019;s loss, a learning rate of 0.01 was chosen, along with an SGD optimizer with weight decay values of <inline-formula id="inf25">
<mml:math id="m28">
<mml:mrow>
<mml:mn>5</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>1</mml:mn>
<mml:msup>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf26">
<mml:math id="m29">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>1</mml:mn>
<mml:msup>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, and a momentum of 0.9. The training was conducted for 1000 epochs on each dataset. Additionally, a composite loss function was used, incorporating Dice, BCE Dice, and Dice loss functions at the stages of wavelet boundary attention feature extraction, boundary fusion, and final output, respectively. These loss functions were combined to form the overall loss function for evaluating the training results. Finally, several standard evaluation metrics were used to assess the performance of the proposed model. These metrics are well-known and widely used in biomedical image analysis (<xref ref-type="bibr" rid="B33">Zhao et al., 2022</xref>; <xref ref-type="bibr" rid="B15">Kha et al., 2022</xref>), and are described in <xref ref-type="disp-formula" rid="e4">Equation 4</xref>.<disp-formula id="e4">
<mml:math id="m30">
<mml:mrow>
<mml:mtable class="gathered">
<mml:mtr>
<mml:mtd>
<mml:mtext>Dice</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mtext>IOU</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mtext>Precision</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mtext>Recall</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mtext>Accuracy</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
</p>
<p>Where the TP, TN, FP, and FN represent true positive, true negative, false positive, and false negative predictions, respectively. We trained and tested our model on the three datasets and compared it with state-of-the-art segmentation networks.</p>
</sec>
<sec id="s3-3">
<label>3.3</label>
<title>Segmentation and analysis</title>
<sec id="s3-3-1">
<label>3.3.1</label>
<title>Ablation study</title>
<p>In proposed network, the traditional U-Net architecture is used as the backbone network. Additionally, two modules were incorporated, and their individual and combined performances were compared. In <xref ref-type="table" rid="T2">Table 2</xref>, we evaluate using the Dice metric. After analyzing the table, it was found that there is an improvement across all three datasets. The preprocessing steps allow for the reduction of intensity variations between cell subtypes and enable the model to distinguish between nucleus and non-nucleus features. When using Boundary Wavelet-Aware Attention, the attention mechanism guides spatial features from the frequency domain information of DWT, which helps in effectively representing features by combining spatial and frequency level information, providing feature maps with fine-scale details. Secondly, the wavelet boundary information is fused using AFF.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Ablation study result of proposed network.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Prepr.</th>
<th align="center">BWAA</th>
<th align="center">AFF</th>
<th align="center">DSB</th>
<th align="center">TNBC</th>
<th align="center">CoNIC</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">
<inline-formula id="inf27">
<mml:math id="m31">
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf28">
<mml:math id="m32">
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf29">
<mml:math id="m33">
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">81.63%</td>
<td align="center">72.58%</td>
<td align="center">86.59%</td>
</tr>
<tr>
<td align="center">
<inline-formula id="inf30">
<mml:math id="m34">
<mml:mrow>
<mml:mi>&#x2713;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf31">
<mml:math id="m35">
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf32">
<mml:math id="m36">
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">85.25%</td>
<td align="center">76.49%</td>
<td align="center">87.74%</td>
</tr>
<tr>
<td align="center">
<inline-formula id="inf33">
<mml:math id="m37">
<mml:mrow>
<mml:mi>&#x2713;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf34">
<mml:math id="m38">
<mml:mrow>
<mml:mi>&#x2713;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf35">
<mml:math id="m39">
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">90.05%</td>
<td align="center">77.66%</td>
<td align="center">88.45%</td>
</tr>
<tr>
<td align="center">
<inline-formula id="inf36">
<mml:math id="m40">
<mml:mrow>
<mml:mi>&#x2713;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf37">
<mml:math id="m41">
<mml:mrow>
<mml:mi>&#x2713;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf38">
<mml:math id="m42">
<mml:mrow>
<mml:mi>&#x2713;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">91.33%</td>
<td align="center">80.56%</td>
<td align="center">91.03%</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3-3-2">
<label>3.3.2</label>
<title>Quantitative analysis</title>
<p>To demonstrate the effectiveness of our proposed method, comparative analyses for the TNBC, DSB, and CoNIC datasets are presented in <xref ref-type="table" rid="T3">Tables 3</xref>&#x2013;<xref ref-type="table" rid="T5">5</xref>, respectively. Compared to existing networks, our network exhibits better performance in the nucleus segmentation task. The robustness and effectiveness of the proposed network are confirmed by high scores in other metrics that can verify the accuracy of nucleus region determination. The proposed network&#x2019;s capability is enhanced by the additional wavelet domain information provided by wavelet, which is fused using the attention mechanism in attention fusion. Furthermore, with the help of the boundary-aware unit, it can effectively capture small nucleus regions, thereby improving precision and recall. We report the mean and 95% confidence interval of Dice scores based on 5 independent runs. In addition, paired t-tests between our method and baselines show statistically significant improvements (p &#x3c; 0.01).</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Comparison with existing networks on TNBC.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Network</th>
<th align="center">Dice (%)</th>
<th align="center">IOU (%)</th>
<th align="center">Precision (%)</th>
<th align="center">Recall (%)</th>
<th align="center">Accuracy (%)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">U-Net (<xref ref-type="bibr" rid="B26">Ronneberger et al., 2015</xref>)</td>
<td align="center">68.61</td>
<td align="center">52.92</td>
<td align="center">65.94</td>
<td align="center">72.54</td>
<td align="center">74.41</td>
</tr>
<tr>
<td align="center">HoverNet (<xref ref-type="bibr" rid="B10">Graham et al., 2019</xref>)</td>
<td align="center">74.32</td>
<td align="center">57.13</td>
<td align="center">68.91</td>
<td align="center">80.61</td>
<td align="center">78.97</td>
</tr>
<tr>
<td align="center">BAWGNet (<xref ref-type="bibr" rid="B14">Imtiaz et al., 2023</xref>)</td>
<td align="center">78.57</td>
<td align="center">61.90</td>
<td align="center">73.45</td>
<td align="center">81.90</td>
<td align="center">82.94%</td>
</tr>
<tr>
<td align="center">Ours</td>
<td align="center">80.56</td>
<td align="center">75.47</td>
<td align="center">80.71</td>
<td align="center">82.46</td>
<td align="center">83.16%</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Comparison with existing networks on DSB.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Network</th>
<th align="center">Dice (%)</th>
<th align="center">IOU(%)</th>
<th align="center">Precision (%)</th>
<th align="center">Recall (%)</th>
<th align="center">Accuracy (%)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">U-Net (<xref ref-type="bibr" rid="B26">Ronneberger et al., 2015</xref>)</td>
<td align="center">86.75</td>
<td align="center">76.97</td>
<td align="center">85.55</td>
<td align="center">87.98</td>
<td align="center">92.58</td>
</tr>
<tr>
<td align="center">HoverNet (<xref ref-type="bibr" rid="B10">Graham et al., 2019</xref>)</td>
<td align="center">89.42</td>
<td align="center">80.14</td>
<td align="center">87.92</td>
<td align="center">90.97</td>
<td align="center">95.86</td>
</tr>
<tr>
<td align="center">BAWGNet (<xref ref-type="bibr" rid="B14">Imtiaz et al., 2023</xref>)</td>
<td align="center">90.82</td>
<td align="center">82.43</td>
<td align="center">88.56</td>
<td align="center">98.65</td>
<td align="center">82.94</td>
</tr>
<tr>
<td align="center">Ours</td>
<td align="center">91.33</td>
<td align="center">82.86</td>
<td align="center">87.89</td>
<td align="center">98.08</td>
<td align="center">98.45</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Comparison with existing networks on CoNIC.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Network</th>
<th align="center">Dice (%)</th>
<th align="center">IOU (%)</th>
<th align="center">Precision (%)</th>
<th align="center">Recall (%)</th>
<th align="center">Accuracy (%)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">U-Net (<xref ref-type="bibr" rid="B26">Ronneberger et al., 2015</xref>)</td>
<td align="center">80.75</td>
<td align="center">75.17</td>
<td align="center">82.55</td>
<td align="center">84.98</td>
<td align="center">90.08</td>
</tr>
<tr>
<td align="center">HoverNet (<xref ref-type="bibr" rid="B10">Graham et al., 2019</xref>)</td>
<td align="center">88.43</td>
<td align="center">86.64</td>
<td align="center">89.52</td>
<td align="center">88.66</td>
<td align="center">93.89</td>
</tr>
<tr>
<td align="center">BAWGNet (<xref ref-type="bibr" rid="B14">Imtiaz et al., 2023</xref>)</td>
<td align="center">89.52</td>
<td align="center">87.43</td>
<td align="center">85.73</td>
<td align="center">90.63</td>
<td align="center">95.65</td>
</tr>
<tr>
<td align="center">Ours</td>
<td align="center">91.03</td>
<td align="center">91.22</td>
<td align="center">90.65</td>
<td align="center">91.32</td>
<td align="center">96.77</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Quantitative analysis alone does not always determine the effectiveness and superiority of a method. <xref ref-type="fig" rid="F5">Figure 5</xref> shows the segmentation performance of different networks, including our proposed network, in some challenging cases. It is evident from the segmentation performance of other networks that all of them struggle with these issues. On the other hand, our proposed method, through its effective utilization of spatial and frequency level information along with boundary information, significantly addresses these challenges and demonstrates its performance notably in the challenging nucleus segmentation task.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>The proposed network visually compares the nucleus segmentation performance of each network in three datasets.</p>
</caption>
<graphic xlink:href="frsip-05-1527975-g005.tif">
<alt-text content-type="machine-generated">Comparison of cell segmentation results for TNBC, DSB, and CoNIC datasets using different methods. Each row shows the original image, ground truth (GT), and results from BAWNet, HoverNet, Unet, and a new method. Segmentation accuracy is indicated by false positive areas in green and false negative areas in red.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3-3-3">
<label>3.3.3</label>
<title>Limitations and future work</title>
<p>Although our model demonstrates superior performance compared to previous methods across the evaluated datasets, it is important to note that these datasets were carefully curated and annotated by expert pathologists. In real-world clinical scenarios, issues such as noise contamination, low-contrast images, and out-of-focus artifacts may still occur. While such conditions may be excluded during model development, they represent inevitable challenges for clinical deployment. Although we employed preprocessing techniques to normalize image appearance&#x2014;primarily to address differences between fluorescence and H&#x26;E stained images&#x2014;these methods were not specifically designed to handle more complex visual degradations. Another emerging trend is the widespread application of large-scale models (<xref ref-type="bibr" rid="B13">H&#xf6;rst et al., 2024</xref>). Recent studies have begun to explore the use of such models in histopathological image analysis, showing promising progress. In our future work, we plan to build upon these advances and further improve the performance of large models in this domain. And for clinical work, we plan to actively collaborate with hospitals and further investigate advanced preprocessing strategies to enhance the model&#x2019;s robustness and practicality in real-world clinical environments. In the future, we plan to strengthen collaboration with hospitals, and our follow-up work will focus on two main directions:<list list-type="order">
<list-item>
<p>Refining and improving model performance using real clinical data. Data from different hospitals often exhibit domain-specific variations, which may differ significantly from public datasets. Incorporating such data will help enhance the robustness and generalizability of the model.</p>
</list-item>
<list-item>
<p>Engineering integration into real-world clinical workflows. We aim to design a practical deployment pipeline that allows the model to be embedded into pathologists&#x2019; routine diagnostic processes, making it easier for clinicians to evaluate and validate the quality of the segmentation results in real time.</p>
</list-item>
</list>
</p>
</sec>
</sec>
</sec>
<sec sec-type="conclusion" id="s4">
<label>4</label>
<title>Conclusion</title>
<p>Accurate nucleus segmentation can provide valuable reference information for pathologists. Although this challenging task has been addressed through various techniques, neural networks based on wavelet-guided boundary-aware attention have shown certain advantages in identifying nucleus boundaries, but their feature fusion performance has not been ideal, limiting the accuracy of segmentation. In this study, we propose a Multi-Scale Wavelet Fusion Attention Network that effectively fuses high-frequency 2D Discrete Wavelet Transform features using an Attention Feature Fusion mechanism to achieve more precise identification of nucleus regions. Additionally, considering the differences between different datasets, we ensured the consistency of training data by transforming them to have similar color statistics. Through experiments conducted on three publicly available pathological datasets, the main performance metrics demonstrate the superiority of our method in accurately segmenting nuclei in cellular microscopic images compared to existing architectures.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="ethics-statement" id="s6">
<title>Ethics statement</title>
<p>The manuscript presents research on animals that do not require ethical approval for their study.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>JZ: Methodology, Software, Writing &#x2013; original draft, Writing &#x2013; review and editing. YH: Validation, Writing &#x2013; review and editing. ZA: Project administration, Validation, Writing &#x2013; review and editing.</p>
</sec>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<fn-group>
<fn fn-type="custom" custom-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1120289/overview">Frederic Dufaux</ext-link>, L2S, Universit&#xe9; Paris-Saclay, CNRS, CentraleSup&#xe9;lec, France</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/711875/overview">Nguyen Quoc Khanh Le</ext-link>, Taipei Medical University, Taiwan</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2853479/overview">Giovanni Scribano</ext-link>, University of Ferrara, Italy</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Al Qurri</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Almekkawy</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Improved UNet with attention for medical image segmentation</article-title>. <source>Sensors</source> <volume>23</volume>, <fpage>8589</fpage>. <pub-id pub-id-type="doi">10.3390/s23208589</pub-id>
<pub-id pub-id-type="pmid">37896682</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Azad</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Aghdam</surname>
<given-names>E. K.</given-names>
</name>
<name>
<surname>Rauland</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Jia</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Avval</surname>
<given-names>A. H.</given-names>
</name>
<name>
<surname>Bozorgpour</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Medical image segmentation review: the success of U-Net</article-title>. <source>IEEE Trans. Pattern Analysis Mach. Intell.</source> <volume>46</volume>, <fpage>10076</fpage>&#x2013;<lpage>10095</lpage>. <pub-id pub-id-type="doi">10.1109/tpami.2024.3435571</pub-id>
<pub-id pub-id-type="pmid">39167505</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Caicedo</surname>
<given-names>J. C.</given-names>
</name>
<name>
<surname>Goodman</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Karhohs</surname>
<given-names>K. W.</given-names>
</name>
<name>
<surname>Cimini</surname>
<given-names>B. A.</given-names>
</name>
<name>
<surname>Ackerman</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Haghighi</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Nucleus segmentation across imaging experiments: the 2018 data science bowl</article-title>. <source>Nat. methods</source> <volume>16</volume>, <fpage>1247</fpage>&#x2013;<lpage>1253</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-019-0612-7</pub-id>
<pub-id pub-id-type="pmid">31636459</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Castro</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Pereira</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Silva</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Improved segmentation of cellular nuclei using UNET architectures for enhanced pathology imaging</article-title>. <source>Electronics</source> <volume>13</volume>, <fpage>3335</fpage>. <pub-id pub-id-type="doi">10.3390/electronics13163335</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Dai</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Gieseke</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Oehmcke</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Barnard</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Attentional feature fusion</article-title>,&#x201d; in <source>Proceedings of the IEEE/CVF winter conference on applications of computer vision</source>, <fpage>3560</fpage>&#x2013;<lpage>3569</lpage>.</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>StoHisNet: a hybrid multi-classification model with CNN and transformer for gastric pathology images</article-title>. <source>Comput. Methods Programs Biomed.</source> <volume>221</volume>, <fpage>106924</fpage>. <pub-id pub-id-type="doi">10.1016/j.cmpb.2022.106924</pub-id>
<pub-id pub-id-type="pmid">35671603</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gali&#x107;</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Habijan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Leventi&#x107;</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Romi&#x107;</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Machine learning empowering personalized medicine: a comprehensive review of medical image analysis methods</article-title>. <source>Electronics</source> <volume>12</volume>, <fpage>4411</fpage>. <pub-id pub-id-type="doi">10.3390/electronics12214411</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Gehlot</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Gupta</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Gupta</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Ednfc-net: convolutional neural network with nested feature concatenation for nuclei-instance segmentation</article-title>,&#x201d; in <source>ICASSP 2020-2020 IEEE international conference on acoustics, speech and signal processing (ICASSP)</source>, <fpage>1389</fpage>&#x2013;<lpage>1393</lpage>.</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Gonzales</surname>
<given-names>R. C.</given-names>
</name>
<name>
<surname>Wintz</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>1987</year>). <source>Digital image processing; addison-wesley longman publishing Co., Inc.</source>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Graham</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Vu</surname>
<given-names>Q. D.</given-names>
</name>
<name>
<surname>Raza</surname>
<given-names>S. E. A.</given-names>
</name>
<name>
<surname>Azam</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Tsang</surname>
<given-names>Y. W.</given-names>
</name>
<name>
<surname>Kwak</surname>
<given-names>J. T.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Hover-net: simultaneous segmentation and classification of nuclei in multi-tissue histology images</article-title>. <source>Med. image Anal.</source> <volume>58</volume>, <fpage>101563</fpage>. <pub-id pub-id-type="doi">10.1016/j.media.2019.101563</pub-id>
<pub-id pub-id-type="pmid">31561183</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Graham</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Jahanifar</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Vu</surname>
<given-names>Q. D.</given-names>
</name>
<name>
<surname>Hadjigeorghiou</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Leech</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Snead</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>CoNIC: colon nuclei identification and counting challenge 2022</article-title>. <source>arXiv Prepr. arXiv:2111.14485</source>. <pub-id pub-id-type="doi">10.48550/arXiv.2111.14485</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>He</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Geng</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>A hybrid-attention nested UNet for nuclear segmentation in histopathological images</article-title>. <source>Front. Mol. Biosci.</source> <volume>8</volume>, <fpage>614174</fpage>. <pub-id pub-id-type="doi">10.3389/fmolb.2021.614174</pub-id>
<pub-id pub-id-type="pmid">33681291</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>H&#xf6;rst</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Rempe</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Heine</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Seibold</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Keyl</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Baldini</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>CellViT: vision transformers for precise cell segmentation and classification</article-title>. <source>Med. Image Anal.</source> <volume>94</volume>, <fpage>103143</fpage>. <pub-id pub-id-type="doi">10.1016/j.media.2024.103143</pub-id>
<pub-id pub-id-type="pmid">38507894</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Imtiaz</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Fattah</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Kung</surname>
<given-names>S.-Y.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>BAWGNet: boundary aware wavelet guided network for the nuclei segmentation in histopathology images</article-title>. <source>Comput. Biol. Med.</source> <volume>165</volume>, <fpage>107378</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2023.107378</pub-id>
<pub-id pub-id-type="pmid">37678139</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kha</surname>
<given-names>Q.-H.</given-names>
</name>
<name>
<surname>Tran</surname>
<given-names>T.-O.</given-names>
</name>
<name>
<surname>Nguyen</surname>
<given-names>V.-N.</given-names>
</name>
<name>
<surname>Than</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Le</surname>
<given-names>N. Q. K.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>An interpretable deep learning model for classifying adaptor protein complexes from sequence information</article-title>. <source>Methods</source> <volume>207</volume>, <fpage>90</fpage>&#x2013;<lpage>96</lpage>. <pub-id pub-id-type="doi">10.1016/j.ymeth.2022.09.007</pub-id>
<pub-id pub-id-type="pmid">36174933</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Krupinski</surname>
<given-names>E. A.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>The importance of perception research in medical imaging</article-title>. <source>Radiat. Med.</source> <volume>18</volume>, <fpage>329</fpage>&#x2013;<lpage>334</lpage>.<pub-id pub-id-type="pmid">11153684</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lal</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Das</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Alabhya</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Kanfade</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kumar</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kini</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>NucleiSegNet: robust deep learning architecture for the nuclei segmentation of liver cancer histopathology images</article-title>. <source>Comput. Biol. Med.</source> <volume>128</volume>, <fpage>104075</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2020.104075</pub-id>
<pub-id pub-id-type="pmid">33190012</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Selective kernel networks</article-title>,&#x201d; in <source>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</source>, <fpage>510</fpage>&#x2013;<lpage>519</lpage>.</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maharana</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Mondal</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Nemade</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A review: data pre-processing and data augmentation techniques</article-title>. <source>Glob. Transitions Proc.</source> <volume>3</volume>, <fpage>91</fpage>&#x2013;<lpage>99</lpage>. <pub-id pub-id-type="doi">10.1016/j.gltp.2022.04.020</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mallat</surname>
<given-names>S. G.</given-names>
</name>
</person-group> (<year>1989</year>). <article-title>A theory for multiresolution signal decomposition: the wavelet representation</article-title>. <source>IEEE Trans. pattern analysis Mach. Intell.</source> <volume>11</volume>, <fpage>674</fpage>&#x2013;<lpage>693</lpage>. <pub-id pub-id-type="doi">10.1109/34.192463</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mouelhi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Rmili</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ali</surname>
<given-names>J. B.</given-names>
</name>
<name>
<surname>Sayadi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Doghri</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Mrad</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Fast unsupervised nuclear segmentation and classification scheme for automatic allred cancer scoring in immunohistochemical breast tissue images</article-title>. <source>Comput. methods programs Biomed.</source> <volume>165</volume>, <fpage>37</fpage>&#x2013;<lpage>51</lpage>. <pub-id pub-id-type="doi">10.1016/j.cmpb.2018.08.005</pub-id>
<pub-id pub-id-type="pmid">30337080</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Del Ser</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Walsh</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sch&#xf6;nlieb</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Roberts</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Selby</surname>
<given-names>I.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Data harmonisation for information fusion in digital healthcare: a state-of-the-art systematic review, meta-analysis and future research directions</article-title>. <source>Inf. Fusion</source> <volume>82</volume>, <fpage>99</fpage>&#x2013;<lpage>122</lpage>. <pub-id pub-id-type="doi">10.1016/j.inffus.2022.01.001</pub-id>
<pub-id pub-id-type="pmid">35664012</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Naylor</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>La&#xe9;</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Reyal</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Walter</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Segmentation of nuclei in histopathology images by deep regression of the distance map</article-title>. <source>IEEE Trans. Med. imaging</source> <volume>38</volume>, <fpage>448</fpage>&#x2013;<lpage>459</lpage>. <pub-id pub-id-type="doi">10.1109/TMI.2018.2865709</pub-id>
<pub-id pub-id-type="pmid">30716022</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Olimov</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Sanjar</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Din</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ahmad</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Paul</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <source>Multimedia systems</source>, <fpage>1</fpage>&#x2013;<lpage>14</lpage>.</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reza</surname>
<given-names>A. M.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Realization of the contrast limited adaptive histogram equalization (CLAHE) for real-time image enhancement</article-title>. <source>J. VLSI signal Process. Syst. signal, image video Technol.</source> <volume>38</volume>, <fpage>35</fpage>&#x2013;<lpage>44</lpage>. <pub-id pub-id-type="doi">10.1023/b:vlsi.0000028532.53893.82</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Ronneberger</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Fischer</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Brox</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>U-net: convolutional networks for biomedical image segmentation</article-title>,&#x201d; in <source>Medical image computing and computer-assisted intervention&#x2013;MICCAI 2015: 18Th international conference, Munich, Germany, October 5-9, 2015, proceedings, part III</source>, <volume>18</volume>, <fpage>234</fpage>&#x2013;<lpage>241</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-24574-4_28</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Vahadane</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Atheeth</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Majumdar</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Dual encoder attention u-net for nuclei segmentation</article-title>,&#x201d; in <source>2021 43rd annual international conference of the IEEE engineering in medicine and biology society</source> (<publisher-name>EMBC</publisher-name>), <fpage>3205</fpage>&#x2013;<lpage>3208</lpage>.</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Dai</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2024</year>). <source>IEEE sensors journal</source>.</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Williams</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Wavelet pooling for convolutional neural networks</article-title>,&#x201d; in <source>International conference on learning representations</source>.</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Cong</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Kang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Vision transformers for computational histopathology</article-title>. <source>IEEE Rev. Biomed. Eng.</source> <volume>17</volume>, <fpage>63</fpage>&#x2013;<lpage>79</lpage>. <pub-id pub-id-type="doi">10.1109/rbme.2023.3297604</pub-id>
<pub-id pub-id-type="pmid">37478035</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). &#x201c;<article-title>Resnest: split-attention networks</article-title>,&#x201d; in <source>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</source>, <fpage>2736</fpage>&#x2013;<lpage>2746</lpage>.</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>Y.-J.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>S.-Q.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>J.-J.</given-names>
</name>
<name>
<surname>Zuo</surname>
<given-names>W.-M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>AL-Net: Attention learning network based on multi-task learning for cervical nucleus segmentation</article-title>. <source>IEEE J. Biomed. Health Inf.</source> <volume>26</volume>, <fpage>2693</fpage>&#x2013;<lpage>2702</lpage>. <pub-id pub-id-type="doi">10.1109/jbhi.2021.3136568</pub-id>
<pub-id pub-id-type="pmid">34928808</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Gui</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Le</surname>
<given-names>N. Q. K.</given-names>
</name>
<name>
<surname>Chua</surname>
<given-names>M. C. H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Improved prediction model of protein and peptide toxicity by integrating channel attention into a convolutional neural network and gated recurrent units</article-title>. <source>ACS omega</source> <volume>7</volume>, <fpage>40569</fpage>&#x2013;<lpage>40577</lpage>. <pub-id pub-id-type="doi">10.1021/acsomega.2c05881</pub-id>
<pub-id pub-id-type="pmid">36385847</pub-id>
</mixed-citation>
</ref>
</ref-list>
</back>
</article>