<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article article-type="research-article" dtd-version="1.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Astron. Space Sci.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Astronomy and Space Sciences</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Astron. Space Sci.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2296-987X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1782465</article-id>
<article-id pub-id-type="doi">10.3389/fspas.2026.1782465</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>ATD-DL: a deep learning framework for faint astronomical target detection</article-title>
<alt-title alt-title-type="left-running-head">He et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fspas.2026.1782465">10.3389/fspas.2026.1782465</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>He</surname>
<given-names>Junyao</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3328062"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal Analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Luo</surname>
<given-names>Hao</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3357827"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal Analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Xiao</surname>
<given-names>Wei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3393871/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Shuwei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3393719/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Qi</surname>
<given-names>Zhaoxiang</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/274044"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
</contrib-group>
<aff id="aff1">
<label>1</label>
<institution>Shanghai Astronomical Observatory, Chinese Academy of Sciences</institution>, <city>Shanghai</city>, <country country="CN">China</country>
</aff>
<aff id="aff2">
<label>2</label>
<institution>School of Astronomy and Space Science, University of Chinese Academy of Sciences</institution>, <city>Beijing</city>, <country country="CN">China</country>
</aff>
<author-notes>
<corresp id="c001">
<label>&#x2a;</label>Correspondence: Hao Luo, <email xlink:href="mailto:luoh@shao.ac.cn">luoh@shao.ac.cn</email>
</corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-23">
<day>23</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>13</volume>
<elocation-id>1782465</elocation-id>
<history>
<date date-type="received">
<day>07</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="rev-recd">
<day>06</day>
<month>02</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>11</day>
<month>02</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 He, Luo, Xiao, Liu and Qi.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>He, Luo, Xiao, Liu and Qi</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-23">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Astronomical imaging data frequently exhibit low signal-to-noise ratios (SNRs), especially for observations obtained from small-aperture, wide-field survey instruments, in which the detected signals are inherently faint and dominated by noise. Such characteristics pose a substantial technical challenge for subsequent target detection and quantitative measurement tasks. This challenge is particularly pronounced for faint astronomical targets with SNRs ranging from 1 to 10. When the SNR decreases below approximately 5, the useful signal approaches or falls beneath the detection threshold imposed by background noise, leading to a pronounced degradation in the performance of traditional threshold-based detection algorithms, such as Sextractor. Furthermore, astronomical imaging data are typically characterized by a high 16-bit dynamic range. This wide dynamic range results in the intensities of faint targets being compressed into a narrow interval of low pixel values. Standard global normalization strategies employed in deep learning models further compress this narrow intensity band, thereby suppressing and obscuring discriminative target features. To address these challenges, we propose ATD-DL, a deep learning&#x2013;based framework specifically designed for faint astronomical target detection. The core of the proposed framework is an enhanced U-Net&#x2013;based segmentation architecture. This architecture is integrated with a multi-stage image preprocessing pipeline, target separation, and centroid extraction modules to enable efficient and robust detection of astronomical objects. Experimental results demonstrate that the proposed method achieves excellent performance in detecting extremely faint targets with SNRs in the range 2 &#x2264; SNR &#x003C; 5. Compared with traditional approaches, including SExtractor and DAOPHOT, the proposed framework exhibits a markedly superior detection capability under low-SNR conditions near the detection limit. In future applications, ATD-DL may be extended to space object detection tasks, where it has the potential to substantially improve the identification of extremely faint targets.</p>
</abstract>
<kwd-group>
<kwd>astronomical image processing</kwd>
<kwd>daophot</kwd>
<kwd>deep learning</kwd>
<kwd>faint astronomical target detection</kwd>
<kwd>method comparison</kwd>
<kwd>SExtractor</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement>
</funding-group>
<counts>
<fig-count count="15"/>
<table-count count="8"/>
<equation-count count="18"/>
<ref-count count="27"/>
<page-count count="00"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Astrostatistics</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>As a passive detection technique, ground-based photographic astrometry plays an indispensable role in the monitoring of moving targets, such as near-Earth objects and space debris, owing to its high precision, low operational cost, and ease of deployment (<xref ref-type="bibr" rid="B18">Schildknecht, 2007</xref>). High-precision monitoring of these targets is essential not only for ensuring human safety and daily life but also for supporting the increasingly frequent and complex activities in space (<xref ref-type="bibr" rid="B3">Borgefors, 1986</xref>; <xref ref-type="bibr" rid="B5">Flury and Contant, 2001</xref>; <xref ref-type="bibr" rid="B1">Bancelin et al., 2012</xref>; <xref ref-type="bibr" rid="B25">Ye, 2018</xref>). In recent years, ground-based photoelectric telescopes, characterized by low cost and wide fields of view, have been widely adopted for such applications (<xref ref-type="bibr" rid="B14">Luo et al., 2019</xref>; <xref ref-type="bibr" rid="B15">Luo et al., 2022</xref>). Their high observational efficiency renders them particularly valuable for the discovery of unknown moving targets and the monitoring of highly maneuverable objects. However, the detection capability of small-aperture, wide-field instruments remains limited, resulting in low signal-to-noise ratios (SNRs) for observed targets and consequently imposing more stringent requirements on signal detection and measurement. Traditional detection methods based on threshold segmentation and morphological separation, such as SExtractor (<xref ref-type="bibr" rid="B2">Bertin and Arnouts, 1996</xref>) and DAOPHOT (<xref ref-type="bibr" rid="B19">Stetson, 1987</xref>), exhibit inherent limitations in target detection performance. Their detection accuracy degrades sharply when applied to faint targets with low SNRs. Recent advances in deep learning for astronomical image processing have opened promising new avenues for addressing this challenge.</p>
<p>Early studies primarily relied on simple thresholding techniques and morphological features, which are prone to false detections and missed sources when applied to low-SNR images. Nevertheless, these conventional methods retain practical value in astronomical applications, particularly in scenarios where computational resources are limited. In recent years, deep learning&#x2013;based frameworks have been increasingly applied to source detection in astronomical images. For instance, a framework based on Faster R-CNN has been employed for astronomical target detection and classification (<xref ref-type="bibr" rid="B7">Jia et al., 2020</xref>). However, this study focused on object-level detection rather than pixel-level segmentation. Moreover, its evaluation relied on SExtractor outputs as the benchmark, without incorporating catalog cross-matching to comprehensively assess the detection capabilities of either SExtractor or the proposed model. Another study employed Mask R-CNN trained on simulated images for source classification and deblending (<xref ref-type="bibr" rid="B22">Tanoglidis et al., 2021</xref>). This objective is fundamentally different from that of the present work, and no SNR-dependent comparison of targets was conducted, with the outputs again limited to detection results rather than segmentation maps. Several studies have adopted a dual-network architecture that combines Swin Transformer and Faster R-CNN to separately detect extended and point-like sources (<xref ref-type="bibr" rid="B8">Jia et al., 2023</xref>), thereby leveraging multi-band information to enhance detection performance. However, this framework depends on multi-band inputs and relatively high-SNR images, and its two-stage processing pipeline is prone to error propagation under low-SNR conditions. Additionally, the LSBG-AD model (<xref ref-type="bibr" rid="B26">Yi et al., 2022</xref>) focuses on the automatic detection of low-surface-brightness galaxies, a task that is inherently distinct from point-source detection. Other studies combined the U-Net segmentation architecture (<xref ref-type="bibr" rid="B16">Ronneberger et al., 2015</xref>) with a Laplacian-of-Gaussian filter, achieving improved localization performance compared with SExtractor on multi-band images (<xref ref-type="bibr" rid="B20">Stoppa et al., 2022</xref>). However, this approach relies on manually designed filter parameters and does not optimize key SExtractor parameters (e.g., FWHM), instead adopting default settings and thus failing to fully exploit SExtractor&#x2019;s detection potential. For the detection of faint, small targets in stellar images, a pixel-level classification method based on a convolutional neural network was proposed (<xref ref-type="bibr" rid="B24">Xue et al., 2020</xref>). Although this method performs well on real images, the labels for its training dataset were generated using a star recognition algorithm (<xref ref-type="bibr" rid="B17">Schiattarella et al., 2017</xref>), which may not accurately reflect detection performance in complete and densely populated stellar fields. The DNA-Net model is built upon a U-shaped architecture and enhances the detection of infrared dim and small targets by incorporating dense nested connections and dual attention mechanisms. Such targets often lack distinct and regular shape features, and the study demonstrates that DNA-Net (<xref ref-type="bibr" rid="B11">Li et al., 2023</xref>) achieves superior detection performance compared with several representative deep learning models. Other studies have compared the detection performance of U-Net and its variants&#x2014;such as SqueezeUNet, ELUNet, and MobileUNet&#x2014;with that of traditional threshold-based methods (<xref ref-type="bibr" rid="B27">Zhao et al., 2025</xref>). However, because their simulated images fail to adequately reproduce extremely faint point sources, the comparative detection capability of these methods for faint targets remains unclear. PSDetNet (<xref ref-type="bibr" rid="B12">Long et al., 2025</xref>) is a two-stage target detection framework. It first generates object segmentation maps using a deep learning model with an encoder&#x2013;decoder structure and subsequently constructs a template from aligned samples to filter targets. This approach assumes that targets exhibit stable and approximately Gaussian morphologies, making it well suited to typical point sources with high SNRs and regular shapes. For faint targets under low-SNR conditions, however, their apparent morphology is strongly influenced by noise, making it difficult to construct a stable morphological template. Collectively, these studies demonstrate the considerable potential of deep learning for source detection in astronomical images, while also indicating that further methodological improvements are required under specific observational conditions.</p>
<p>Although existing deep learning methods have achieved substantial progress in detecting sources in astronomical images, their performance remains limited when applied to faint targets. One of the primary challenges arises from signal-to-noise ratio (SNR) limitations: as target brightness approaches the background noise level, both detection precision and recall deteriorate markedly. When processing faint astronomical features, conventional convolutional neural networks are constrained by fixed receptive fields and by the suppression of low-response signals induced by activation functions. As a result, critical information associated with faint targets may be attenuated or lost during forward propagation, ultimately impairing detection performance for low-SNR objects. In addition, many existing approaches rely on large, well-annotated datasets, which are often difficult to obtain in practical observational settings. To address these limitations, we propose ATD-DL, a comprehensive framework for astronomical target detection. The framework integrates a U-Net&#x2013;based architecture with target separation and centroid extraction. Experimental results show that the proposed method provides a clear improvement in detection performance compared with traditional approaches such as SExtractor and DAOPHOT.</p>
<p>The remainder of this paper is organized as follows. <xref ref-type="sec" rid="s2">Section 2</xref> describes the data sources and preprocessing pipeline used in this study, including raw image enhancement, field-of-view division, and the construction of the labeled dataset. These steps are essential for supervised learning, as they provide standardized, high-quality samples and labels for model training. <xref ref-type="sec" rid="s3">Section 3</xref> presents the model architecture, training configuration, and the processing of segmentation outputs, ensuring that the detection results are formatted consistently with those produced by traditional methods and enabling a fair comparison. <xref ref-type="sec" rid="s4">Section 4</xref> discusses the performance differences between the proposed method and traditional approaches, including analyses of overall performance, performance across different faintness intervals, and comparisons at the segmentation level. Finally, <xref ref-type="sec" rid="s5">Section 5</xref> summarizes the main results and discusses the implications of the proposed deep learning model.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Data</title>
<p>High-quality data form the foundation for the effective application of deep learning models. This chapter presents a systematic description of the data foundation and processing pipeline of this study, including data preprocessing and label set creation. First, raw astronomical observational data undergo rigorous preprocessing&#x2014;including denoising, field-of-view partitioning, and image enhancement&#x2014;to suppress noise, standardize data formats, and improve the target signal-to-noise ratio. Label set creation provides the fundamental basis for subsequent model training; the choice of labeling strategy and threshold settings directly determines the model&#x2019;s ability to learn discriminative features and is among the key factors influencing final detection performance. Accordingly, <xref ref-type="sec" rid="s2-1">Section 2.1</xref> describes the data acquisition instrumentation, characteristics of the raw data, and the preprocessing pipeline, while <xref ref-type="sec" rid="s2-2">Section 2.2</xref> details the label set creation method based on threshold segmentation combined with catalog mapping.</p>
<sec id="s2-1">
<label>2.1</label>
<title>Data preprocessing</title>
<p>The primary instrument used in this study is a wide-field computational imaging array developed by our research group. The system consists of ten identical optical telescope units, each equipped with a QHY4040 camera and a 150 mm F/1.15 (D130) lens, and mounted on a simple alt-azimuth array frame (see <xref ref-type="fig" rid="F1">Figure 1</xref> for photographs of the instrument and observation site). Each unit covers a sky area of approximately 14 &#xb0; &#xd7; 14 &#xb0;, corresponding to a spatial scale of about 12.3 arcseconds per pixel.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Schematic diagram of the astronomical observation device - <bold>(A)</bold> The computational imaging array, <bold>(B)</bold> Observation site and setup of the computational imaging array.</p>
</caption>
<graphic xlink:href="fspas-13-1782465-g001.tif">
<alt-text content-type="machine-generated">Panel A shows a close-up view of several telescopes or scientific instruments with purple-tinted lenses mounted on black supports, surrounded by wiring on an outdoor platform. Panel B provides a wider view of the same platform with mounted instruments on a rooftop facility, bordered by buildings and vegetation under a cloudy sky.</alt-text>
</graphic>
</fig>
<p>Astronomical imaging acquired from ground-based stations is fundamentally limited by atmospheric turbulence, readout noise, and photon noise, often resulting in raw data frames with low signal-to-noise ratios (SNRs). To improve image quality while addressing the inherent trade-off between model complexity (in terms of parameters and memory) and the large size of wide-field images, a preprocessing stage was applied, focusing on single-frame enhancement and field division (<xref ref-type="fig" rid="F2">Figure 2</xref>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Schematic overview of the image preprocessing pipeline.</p>
</caption>
<graphic xlink:href="fspas-13-1782465-g002.tif">
<alt-text content-type="machine-generated">Flowchart illustrating an image processing pipeline for astronomical imaging, starting with a sixteen hundred by sixteen hundred single-frame image, followed by dark frame subtraction, field-of-view partitioning, background subtraction, ZSCALE adjustment, normalization, and resulting in a five hundred twelve by five hundred twelve single field-of-view enhanced image.</alt-text>
</graphic>
</fig>
<p>Initial noise reduction for single-frame images follows standard calibration procedures (<xref ref-type="bibr" rid="B6">Howell, 2006</xref>), including subtraction of detector bias, dark current, and flat-field effects, as well as estimation and removal of the sky background. Pixel-by-pixel subtraction of master dark frames effectively suppresses fixed-pattern noise arising from hot pixels and readout processes. This procedure removes discrete bright artifacts caused by spatial variations in dark current (<xref ref-type="fig" rid="F3">Figure 3</xref>), yielding a statistically uniform background. Such background uniformity is critical for improving the accuracy of subsequent centroid estimation and SNR calculation.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Close-up view illustrating noise suppression via dark frame subtraction.</p>
</caption>
<graphic xlink:href="fspas-13-1782465-g003.tif">
<alt-text content-type="machine-generated">Astronomical grayscale image showing a star field with a red box highlighting a section. Two zoomed-in panels on the right illustrate the effect of dark subtraction, with the lower panel marking faint points using yellow squares.</alt-text>
</graphic>
</fig>
<p>To mitigate the conflict between data volume and model complexity, the original 1,600 &#xd7; 1,600 pixel FITS images were partitioned into smaller subfields. Using a non-overlapping sliding window, each full-frame image was uniformly divided into nine 512 &#xd7; 512 pixel patches (<xref ref-type="fig" rid="F4">Figure 4</xref>). This strategy serves two purposes: first, the patch size is compatible with the downsampling factors of the U-Net architecture, thereby avoiding interpolation artifacts; second, it substantially reduces the number of parameters required during training, limiting GPU memory usage and reducing overall training time.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Schematic diagram of the wide-field image partitioning strategy.</p>
</caption>
<graphic xlink:href="fspas-13-1782465-g004.tif">
<alt-text content-type="machine-generated">Grid of nine textured squares in varied colors, each with a large white number centered. The grid reads left to right, top to bottom: 9, 2, 3, 8, 1, 4, 7, 6, 5.</alt-text>
</graphic>
</fig>
<p>Astronomical raw data typically exhibit a high dynamic range, commonly stored in 16-bit format. However, signals from faint targets are often confined to a narrow range of low-intensity values. Direct global normalization would further compress the contrast of these faint objects. The adopted approach therefore combines background subtraction with an adaptive contrast-stretching algorithm, enhancing faint-signal contrast while simultaneously standardizing the dataset and reducing data complexity.</p>
<p>To enhance the effectiveness of the adaptive stretching algorithm in amplifying faint signals, background subtraction is first applied to the images to mitigate the influence of non-uniform backgrounds. Specifically, a tile-based median background estimation method was applied using the photutils package from the Astropy project (<xref ref-type="bibr" rid="B4">Collaboration et al., 2013</xref>). Each image is first divided into tiles of 50 &#xd7; 50 pixels. A spatially varying background model is then constructed by adopting the median statistic (MedianBackground) as the estimator within each tile. The resulting background model is subsequently subtracted on a pixel-by-pixel basis from the original image, yielding a signal frame with the background effectively suppressed.</p>
<p>The background-subtracted frame then undergoes adaptive contrast stretching. The ZScale algorithm implemented in Astropy (<xref ref-type="bibr" rid="B4">Collaboration et al., 2013</xref>) is used to automatically determine the optimal display intensity range [<inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">V</mml:mi>
<mml:mi mathvariant="bold-italic">min</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">V</mml:mi>
<mml:mi mathvariant="bold-italic">max</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>] (<xref ref-type="disp-formula" rid="e1">Equation 1</xref>). Briefly, the algorithm performs random sampling of the input data and employs the median and median absolute deviation (MAD) to reduce the influence of outliers, such as bright stars. An iterative linear fitting procedure with outlier rejection is then applied to determine the final display range. Signal values from the background-subtracted frame (<inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) are clipped to this range (<xref ref-type="disp-formula" rid="e2">Equation 2</xref>), where <inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">l</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denotes a pixel-value truncation function. This strategy preserves the dynamic range for most pixels while avoiding information loss associated with aggressive clipping. Faint signals are linearly remapped into a wider display range, thereby enhancing their intrinsic contrast features.<disp-formula id="e1">
<mml:math id="m5">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">V</mml:mi>
<mml:mi mathvariant="bold-italic">min</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">V</mml:mi>
<mml:mi mathvariant="bold-italic">max</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold-italic">Z</mml:mi>
<mml:mi mathvariant="bold-italic">S</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mi mathvariant="bold-italic">l</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
<disp-formula id="e2">
<mml:math id="m6">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">l</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">l</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">V</mml:mi>
<mml:mi mathvariant="bold-italic">min</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">V</mml:mi>
<mml:mi mathvariant="bold-italic">max</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
</p>
<p>Finally, the processed signal is normalized and exported as an 8-bit grayscale PNG image (<xref ref-type="disp-formula" rid="e3">Equation 3</xref>).<disp-formula id="e3">
<mml:math id="m7">
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mi mathvariant="bold-italic">N</mml:mi>
<mml:mi mathvariant="bold-italic">G</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mi mathvariant="bold-italic">d</mml:mi>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:mn mathvariant="bold">255</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mo>_</mml:mo>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">l</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">V</mml:mi>
<mml:mo>_</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="bold-italic">min</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="bold-italic">V</mml:mi>
<mml:mo>_</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="bold-italic">max</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">V</mml:mi>
<mml:mo>_</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="bold">min</mml:mi>
<mml:mo>&#x2061;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
</p>
<p>With respect to the aforementioned enhancement pipeline, analysis of the image grayscale histogram reveals a highly skewed distribution, in which more than 99% of pixel values lie below the upper bound of the 8-bit dynamic range. This result indicates that the majority of the dynamic range is contributed by a small number of high-value pixels, typically associated with bright stars. Furthermore, <inline-formula id="inf5">
<mml:math id="m8">
<mml:mrow>
<mml:mi mathvariant="bold-italic">V</mml:mi>
<mml:mo>_</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="bold-italic">min</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is statistically defined at a level well below the background mean, ensuring that the clipping operation targets only these extreme outliers. Pixels that are highly unlikely to contain genuine astronomical signal are excluded from the stretching process. For valid faint signals located near or above the background level, subtracting <inline-formula id="inf6">
<mml:math id="m9">
<mml:mrow>
<mml:mi mathvariant="bold-italic">V</mml:mi>
<mml:mo>_</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="bold-italic">min</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> expands subtle intensity differences into tens of gray levels in the 8-bit output image, thereby accentuating the features of faint targets. Therefore, this preprocessing stage does not simply discard information but instead represents a statistically grounded step for feature selection and enhancement. As a result, it provides subsequent deep learning models with input data characterized by enhanced feature clarity and higher contrast.</p>
<p>To quantitatively evaluate the enhancement results, several metrics are employed. The mean gray value (<xref ref-type="disp-formula" rid="e4">Equation 4</xref>) represents the arithmetic average of all pixel intensities and reflects the overall image brightness. Global contrast (<xref ref-type="disp-formula" rid="e5">Equation 5</xref>) measures the dispersion of the intensity distribution, indicating overall contrast strength. Information entropy (<xref ref-type="disp-formula" rid="e6">Equation 6</xref>) characterizes the complexity of the intensity distribution, where <inline-formula id="inf11">
<mml:math id="m14">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes a pixel gray value, <inline-formula id="inf12">
<mml:math id="m15">
<mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the total number of pixels, and <inline-formula id="inf13">
<mml:math id="m16">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the probability of gray level <inline-formula id="inf14">
<mml:math id="m17">
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> occurring in the image.<disp-formula id="e4">
<mml:math id="m18">
<mml:mrow>
<mml:mover accent="true">
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
<disp-formula id="e5">
<mml:math id="m19">
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c3;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
<disp-formula id="e6">
<mml:math id="m20">
<mml:mrow>
<mml:mi mathvariant="bold-italic">H</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">0</mml:mn>
</mml:mrow>
<mml:mn mathvariant="bold">255</mml:mn>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>&#x2061;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">log</mml:mi>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msub>
<mml:mo>&#x2061;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
</p>
<p>Applying these metrics to images before and after enhancement yields the comparative results summarized in <xref ref-type="table" rid="T1">Table 1</xref>. The results indicate that applying ZScale stretching following background subtraction yields increases of approximately 23.53-fold, 17.7-fold, and 3.3-fold in the mean gray value, global contrast, and information entropy, respectively, accompanied by a substantial expansion of the dynamic range and a more effective utilization of the gray-level distribution. Furthermore, incorporating non-uniform background subtraction into the stretching enhancement results in improvements of 1.53% and 14.9% in the mean gray value and information entropy, respectively. A slight decrease in global contrast is observed after background subtraction, which can be attributed to a reduction in overall gray-level dispersion following the removal of non-uniform background components. This behavior is expected and does not hinder the effective enhancement of faint-target contrast. Overall, the proposed pipeline effectively exploits the detailed information of faint targets in compressed images and confirms the effectiveness of background subtraction in improving the performance of adaptive stretching.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Quality metrics values for original images, directly enhanced images, and enhanced images after background subtraction.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Metric name</th>
<th align="left">Original image</th>
<th align="left">Enhanced image</th>
<th align="left">Enhanced image (background-subtracted)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Mean intensity</td>
<td align="left">3.448</td>
<td align="left">79.938</td>
<td align="left">81.160</td>
</tr>
<tr>
<td align="left">Global contrast</td>
<td align="left">1.807</td>
<td align="left">32.344</td>
<td align="left">31.991</td>
</tr>
<tr>
<td align="left">Information entropy</td>
<td align="left">2.055</td>
<td align="left">5.902</td>
<td align="left">6.781</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2-2">
<label>2.2</label>
<title>To create a label set</title>
<p>Preparation of the label set is a critical step for providing reliable supervisory signals to the deep learning model. An automated label-generation pipeline was developed for the preprocessed images. The pipeline consists of four main steps: background suppression, connected-component segmentation, signal-to-noise ratio (SNR) filtering, and spatial mask encoding. It takes the original single-frame FITS image and the corresponding processed PNG image as inputs and produces a mask image with target annotations suitable for direct use in model training.</p>
<p>To mitigate the influence of dark current and fixed-pattern noise, candidate targets are initially extracted using a threshold-based segmentation criterion (<xref ref-type="disp-formula" rid="e7">Equation 7</xref>):<disp-formula id="e7">
<mml:math id="m21">
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold-italic">B</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mo>&#xb7;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3c3;</mml:mi>
<mml:mi mathvariant="bold-italic">B</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">0.5</mml:mn>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>where <inline-formula id="inf15">
<mml:math id="m22">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3c3;</mml:mi>
<mml:mi mathvariant="bold-italic">B</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the background standard deviation and <inline-formula id="inf16">
<mml:math id="m23">
<mml:mrow>
<mml:mi mathvariant="bold-italic">f</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is an empirical scaling factor. As this step is intended as a preliminary extraction stage designed to retain as many potential targets as possible for subsequent filtering, <inline-formula id="inf17">
<mml:math id="m24">
<mml:mrow>
<mml:mi mathvariant="bold-italic">f</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is conservatively set to 0.5. Connected-component labeling is then applied to the resulting binary image, after which regions containing fewer than two pixels are automatically discarded. This step ensures that only physically plausible source blobs are retained for further analysis.</p>
<p>To prevent undetectably faint targets from contaminating the training set, the pipeline applies an explicit SNR-based threshold filter (11). The SNR calculation strictly follows the aperture photometry framework to maintain consistency with standard astronomical measurement practices. For each source blob identified from the connected components, a bounding circular aperture radius <inline-formula id="inf18">
<mml:math id="m25">
<mml:mrow>
<mml:mi mathvariant="bold-italic">R</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is first calculated (<xref ref-type="disp-formula" rid="e8">Equation 8</xref>):<disp-formula id="e8">
<mml:math id="m26">
<mml:mrow>
<mml:mi mathvariant="bold-italic">R</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:munder>
<mml:mi mathvariant="bold">max</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:munder>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">c</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="&#x2329;" close="&#x232a;" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>where <inline-formula id="inf19">
<mml:math id="m27">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the spatial coordinate of the <inline-formula id="inf20">
<mml:math id="m28">
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th pixel and <inline-formula id="inf21">
<mml:math id="m29">
<mml:mrow>
<mml:mi mathvariant="bold-italic">c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represents the centroid. One pixel is added to the calculated radius to ensure that the aperture fully encloses the target signal. Using this radius to define a circular aperture, the total net signal in electron units within the aperture is computed (<xref ref-type="disp-formula" rid="e9">Equation 9</xref>):<disp-formula id="e9">
<mml:math id="m30">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">A</mml:mi>
<mml:mi mathvariant="bold-italic">D</mml:mi>
<mml:mi mathvariant="bold-italic">U</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">A</mml:mi>
<mml:mo>&#xb7;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3bc;</mml:mi>
<mml:mi mathvariant="bold-italic">B</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xb7;</mml:mo>
<mml:mi mathvariant="bold-italic">G</mml:mi>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>where <inline-formula id="inf22">
<mml:math id="m31">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">A</mml:mi>
<mml:mi mathvariant="bold-italic">D</mml:mi>
<mml:mi mathvariant="bold-italic">U</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the total Analog-to-Digital Unit (ADU) count within the aperture, <inline-formula id="inf23">
<mml:math id="m32">
<mml:mrow>
<mml:mi mathvariant="bold-italic">A</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the aperture area in pixels, <inline-formula id="inf24">
<mml:math id="m33">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3bc;</mml:mi>
<mml:mi mathvariant="bold-italic">B</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the mean background level estimated from a concentric sky annulus, and <inline-formula id="inf25">
<mml:math id="m34">
<mml:mrow>
<mml:mi mathvariant="bold-italic">G</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the camera gain (e<sup>&#x2212;</sup>/ADU). Background fluctuations are estimated within the same sky annulus, yielding the per-pixel background variance <inline-formula id="inf26">
<mml:math id="m35">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="bold-italic">&#x3c3;</mml:mi>
<mml:mi mathvariant="bold-italic">B</mml:mi>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>. According to standard error propagation in aperture photometry, the expected background noise in electron units is given by <xref ref-type="disp-formula" rid="e10">Equation 10</xref>:<disp-formula id="e10">
<mml:math id="m36">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3c3;</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">b</mml:mi>
<mml:mi mathvariant="bold-italic">g</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold-italic">A</mml:mi>
<mml:mo>&#xb7;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3c3;</mml:mi>
<mml:mi mathvariant="bold-italic">B</mml:mi>
</mml:msub>
<mml:mo>&#xb7;</mml:mo>
<mml:mi mathvariant="bold-italic">G</mml:mi>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>where <inline-formula id="inf27">
<mml:math id="m37">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3c3;</mml:mi>
<mml:mi mathvariant="bold-italic">B</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the standard deviation measured in the background annulus. The target signal-to-noise ratio is then calculated in the standard form used in aperture photometry (<xref ref-type="disp-formula" rid="e11">Equation 11</xref>):<disp-formula id="e11">
<mml:math id="m38">
<mml:mrow>
<mml:mi mathvariant="bold-italic">S</mml:mi>
<mml:mi mathvariant="bold-italic">N</mml:mi>
<mml:mi mathvariant="bold-italic">R</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3c3;</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">b</mml:mi>
<mml:mi mathvariant="bold-italic">g</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>
</p>
<p>Which accounts for both source photon noise and background Poisson noise, consistent with theoretical models used in mainstream photometry software. Furthermore, to accurately anchor the celestial coordinates of real targets, the Gaia DR3 catalog (<xref ref-type="bibr" rid="B23">Vallenari et al., 2023</xref>) is used as a reference, with its sub-milliarcsecond-precision positions mapped to image pixel coordinates. Because the detection limit of Gaia DR3 (G &#x2248; 20.7 mag) is deeper than that of the observational images, many faint cataloged sources have fluxes below the background noise level and therefore do not form distinct connected components. To construct a reliable training label set, the detectability of each Gaia source is assessed directly in the images. Specifically, fixed 3 &#xd7; 3 pixel aperture photometry is performed at the mapped pixel position of each source, and the corresponding SNR is calculated. This aperture size is approximately twice the full width at half maximum (FWHM) of the point spread function (PSF) measured in the images, ensuring that it fully encloses a point source. Finally, only sources with an SNR &#x2265;2.0 are retained as valid training labels. This threshold was selected to preserve as many detectable astronomical objects as possible while maintaining a one-to-one correspondence with the training labels (<xref ref-type="fig" rid="F5">Figure 5</xref>).</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Visualization of image labeling (Green: labels from connected components; Blue: labels for targets meeting the SNR threshold but without a connected component; Red: pixel coordinates of Gaia DR3 sources).</p>
</caption>
<graphic xlink:href="fspas-13-1782465-g005.tif">
<alt-text content-type="machine-generated">Astronomical digital image showing a dark, pixelated star field with numerous small green, blue, and red points. Two magnified insets highlight specific regions, revealing clusters of colored dots representing celestial objects.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec sec-type="methods" id="s3">
<label>3</label>
<title>Methods</title>
<p>The ATD-DL framework comprises three core components: astronomical image enhancement, target segmentation based on a deep learning model, and result separation with centroid determination. The image enhancement component is described in detail in <xref ref-type="sec" rid="s2">Section 2</xref>. The following sections describe the model architecture, training configuration, and comparative validation experiments. The performance of target segmentation is primarily determined by the model architecture and training configuration, including the choice of loss function, learning rate, and the quality of the labeled dataset. The separation and centroid determination process determines how effectively individual targets are extracted from the segmentation map and how accurately their positions are measured. Taken together, these components define the upper bound of the achievable performance of the ATD-DL framework. As shown in <xref ref-type="fig" rid="F6">Figure 6</xref>, the overall workflow of the methods section is illustrated.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Complete workflow diagram of the ATD-DL method.</p>
</caption>
<graphic xlink:href="fspas-13-1782465-g006.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a deep learning image analysis pipeline with boxes for training set images, label set images, deep learning model, model parameters, test set images, segmentation map, deblending, centroid extraction, and result set, connected by labeled arrows showing data flow and process sequence.</alt-text>
</graphic>
</fig>
<sec id="s3-1">
<label>3.1</label>
<title>Model introduction</title>
<p>This study develops an astronomical image segmentation model based on the U-Net architecture (<xref ref-type="bibr" rid="B16">Ronneberger et al., 2015</xref>), which is further adapted to meet the specific requirements of faint astronomical target detection (<xref ref-type="fig" rid="F7">Figure 7</xref>).</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Architecture of the proposed deep learning model.</p>
</caption>
<graphic xlink:href="fspas-13-1782465-g007.tif">
<alt-text content-type="machine-generated">Neural network architecture diagram with a U-shaped structure featuring downsampling and upsampling paths, skip connections, and a feature enhancement module. Process flows, block operations, and layer sizes are color-coded and referenced in a legend.</alt-text>
</graphic>
</fig>
<p>The conventional U-Net architecture employs paired 3 &#xd7; 3 convolutional operations at each layer, resulting in an effective receptive field of only 5 &#xd7; 5 after stacking. For faint targets, whose informative content is often confined to only a few pixels, this receptive field&#x2014;although sufficient to encompass the target itself&#x2014;provides limited contextual information for discriminating genuine sources from background noise. To address this limitation, we design a multi-branch Feature Enhancement Module (FEM). This module adopts a parallel architecture, incorporating both a standard 3 &#xd7; 3 convolutional branch and a 5 &#xd7; 5 depthwise separable convolutional branch. These branches are designed to capture fine-grained local features and broader contextual information, respectively. The extracted features are subsequently fused using a 1 &#xd7; 1 convolution, enabling effective multi-scale feature integration and enhancing the network&#x2019;s representational capacity for faint targets. Furthermore, the use of depthwise separable convolution enables dual receptive fields while substantially reducing the parameter count associated with the 5 &#xd7; 5 kernel, thereby improving training efficiency and generalization performance.</p>
<p>In contrast to the three-channel input used in the original U-Net, the proposed model adopts a single-channel input to better accommodate astronomical grayscale imagery. The encoder follows the canonical U-Net hierarchical downsampling scheme, employing max-pooling operations and double-convolution blocks. Each block consists of two consecutive 3 &#xd7; 3 convolutional layers that serve as the fundamental feature extraction unit. The input image first passes through an initial double-convolution block to extract low-level feature representations. Each subsequent downsampling stage halves the feature-map resolution using a 2 &#xd7; 2 max-pooling operation, followed by a double-convolution block that doubles the number of output channels (64 &#x2192; 128 &#x2192; 256 &#x2192; 512), forming a four-stage encoding path. This design reduces computational cost while preserving critical edge information. Skip connections forward feature maps from each encoder level directly to the corresponding decoder level, providing high-resolution structural priors for the upsampling path.</p>
<p>In the decoder, bilinear interpolation&#x2013;based upsampling is employed to mitigate checkerboard artifacts. Which smoothly enlarges feature maps by computing weighted averages of neighboring pixels. The upsampled features are concatenated channel-wise with the corresponding encoder features through skip connections, and the resulting feature map is subsequently processed by a double-convolution block. This process is repeated at each decoding level, progressively restoring spatial resolution and ultimately producing the final segmentation map.</p>
<p>Furthermore, the symmetric structure of U-Net, in which the encoder and decoder comprise an equal number of layers, is essential to its performance. This symmetry enables skip connections to merge feature maps at corresponding spatial scales, allowing high-resolution information from early encoder stages to compensate for detail loss incurred during downsampling. As a result, multi-scale feature fusion is achieved, significantly enhancing overall segmentation accuracy.</p>
</sec>
<sec id="s3-2">
<label>3.2</label>
<title>Training</title>
<p>This study constructs a dedicated dataset for astronomical target recognition using real observational images acquired with the same telescope system. To enable a rigorous assessment of the deep learning model&#x2019;s generalization capability and to minimize overfitting, the dataset is constructed such that the sky regions covered by the training and testing data are strictly isolated. The complete dataset comprises 20 images, which are divided into two mutually exclusive subsets in terms of sky coverage: 8 images for training and validation, and 12 images reserved as an independent test set. The observational parameters of key reference images are summarized in <xref ref-type="table" rid="T2">Table 2</xref> for quantitative analysis. The field center coordinates of the final image in the training phase and the first image in the testing phase (<xref ref-type="fig" rid="F8">Figure 8</xref>) are (RA, Dec) &#x3d; (300.76&#xb0;, &#x2212;5.90&#xb0;) and (308.86&#xb0;, &#x2212;5.91&#xb0;), respectively. These values indicate that, following completion of the training observations, the telescope pointing was deliberately adjusted for the test set, resulting in a substantial right ascension offset of approximately 8.09&#xb0;. Based on the system optical parameters&#x2014;focal length FL &#x3d; 124.9 mm and pixel size Pscale &#x3d; 0.0090 mm&#x2014;the resulting image scale is approximately 14.86 arcsec pixel<sup>-1</sup>. With an effective imaging area of 1800 &#xd7; 1800 pixels, the corresponding field of view has a width of approximately 7.4&#xb0; and a diagonal of about 10.5&#xb0;. Under these conditions, an RA offset exceeding 8&#xb0; ensures that the projected sky areas of the two fields are completely separated. Specifically, the RA coverage of sky region A spans approximately 297.06&#xb0;&#x2013;304.46&#xb0;, while that of sky region B spans approximately 305.16&#xb0;&#x2013;312.56&#xb0;. A gap of approximately 0.7&#xb0; exists between the two regions, ensuring the absence of any sky overlap.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Details of observation parameters for important dataset partitioning nodes.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Metric name</th>
<th align="left">Training set<break/>-Image 1 (start)</th>
<th align="left">Training set<break/>-Image 8 (end)</th>
<th align="left">Test set<break/>-Image 9 (start)</th>
<th align="left">Test set<break/>-Image 20 (end)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Observation time (UTC)</td>
<td align="left">2023-09-21<break/>11:21:32.000</td>
<td align="left">2023-09-21<break/>11:46:49.000</td>
<td align="left">2023-09-21<break/>11:54:01.000</td>
<td align="left">2023-09-21<break/>12:19:20.000</td>
</tr>
<tr>
<td align="left">Exposure time (ms)</td>
<td align="left">100</td>
<td align="left">100</td>
<td align="left">100</td>
<td align="left">100</td>
</tr>
<tr>
<td align="left">Pointing center (RA/&#xb0;)</td>
<td align="left">300.69</td>
<td align="left">300.76</td>
<td align="left">308.86</td>
<td align="left">308.94</td>
</tr>
<tr>
<td align="left">Pointing center (DE/&#xb0;)</td>
<td align="left">&#x2212;5.90</td>
<td align="left">&#x2212;5.90</td>
<td align="left">&#x2212;5.91</td>
<td align="left">&#x2212;5.91</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Visual comparison of key node images for dataset partition-<bold>(A)</bold> Training set-Image 1 (start), <bold>(B)</bold> Training set-Image 8 (end), <bold>(C)</bold> Test set-Image 9 (start), <bold>(D)</bold> Test set-Image 20 (end).</p>
</caption>
<graphic xlink:href="fspas-13-1782465-g008.tif">
<alt-text content-type="machine-generated">Panel A shows a grayscale speckled texture with fine white dots scattered throughout. Panel B presents a similar speckled pattern. Panels C and D also display dense, uniform distributions of small white specks on dark backgrounds.</alt-text>
</graphic>
</fig>
<p>Based on the observational images described above, each full-frame image was partitioned into subfields, yielding a total of 180 subfield images with a resolution of 512 &#xd7; 512 pixels. For the first 8 images, subfields 1, 3, 4, 6, 7, and 9 were combined to form the training set, while subfields 2, 5, and 8 were used as the validation set for hyperparameter tuning. All subfields derived from the remaining 12 images were reserved as an independent test set to evaluate model performance on entirely unseen data, including potential observational variations.</p>
<p>The dataset was annotated using the Gaia catalog, resulting in approximately 208,095 labeled stellar point-source targets. The model was implemented using the PyTorch framework and trained on an NVIDIA GeForce RTX 4060Ti GPU.</p>
<sec id="s3-2-1">
<label>3.2.1</label>
<title>Training setup</title>
<p>The model was trained using a composite loss function that combines the widely adopted Cross-Entropy loss (<xref ref-type="bibr" rid="B10">Krizhevsky et al., 2017</xref>) and Dice loss (<xref ref-type="bibr" rid="B21">Sudre et al., 2017</xref>). The Cross-Entropy loss component measures the pixel-wise discrepancy between predicted probabilities and ground-truth labels, ensuring accurate pixel-level classification. This property is essential for distinguishing subtle intensity differences between faint targets and background noise in astronomical images. In contrast, the Dice loss component evaluates the spatial overlap between predicted regions and ground-truth masks, emphasizing overall segmentation performance within the target area.</p>
<p>The Cross-Entropy loss is defined as <xref ref-type="disp-formula" rid="e12">Equation 12</xref>:<disp-formula id="e12">
<mml:math id="m39">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">L</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">C</mml:mi>
<mml:mi mathvariant="bold-italic">E</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mrow>
<mml:mi mathvariant="bold-italic">H</mml:mi>
<mml:mi mathvariant="bold-italic">W</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">H</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">W</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="bold">log</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi mathvariant="bold">log</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>here, <inline-formula id="inf28">
<mml:math id="m40">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> &#x2208; {0,1} denotes the ground-truth label at pixel <inline-formula id="inf29">
<mml:math id="m41">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf30">
<mml:math id="m42">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> &#x2208; (0,1) represents the predicted probability, and <inline-formula id="inf31">
<mml:math id="m43">
<mml:mrow>
<mml:mi mathvariant="bold-italic">H</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf32">
<mml:math id="m44">
<mml:mrow>
<mml:mi mathvariant="bold-italic">W</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> correspond to the image height and width, respectively. This pixel-wise formulation ensures precise classification, particularly along object boundaries.</p>
<p>The Dice loss is defined as <xref ref-type="disp-formula" rid="e13">Equation 13</xref>:<disp-formula id="e13">
<mml:math id="m45">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">L</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">D</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn mathvariant="bold">2</mml:mn>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:msubsup>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">M</mml:mi>
</mml:msubsup>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mover accent="true">
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:msubsup>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">M</mml:mi>
</mml:msubsup>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:msubsup>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">M</mml:mi>
</mml:msubsup>
<mml:msub>
<mml:mover accent="true">
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(13)</label>
</disp-formula>where <inline-formula id="inf33">
<mml:math id="m46">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the ground-truth label and <inline-formula id="inf34">
<mml:math id="m47">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the predicted probability.</p>
<p>A dynamic learning-rate schedule was employed during training. The initial learning rate was set to <inline-formula id="inf35">
<mml:math id="m48">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3b7;</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:msup>
<mml:mn mathvariant="bold">10</mml:mn>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="bold">4</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, and a cosine annealing scheduler (<xref ref-type="bibr" rid="B13">Loshchilov and Hutter, 2017</xref>) was used to adjust the learning rate dynamically within the range <inline-formula id="inf36">
<mml:math id="m49">
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3b7;</mml:mi>
<mml:mi mathvariant="bold-italic">min</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:msup>
<mml:mn mathvariant="bold">10</mml:mn>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="bold">6</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3b7;</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>. This scheduler nonlinearly decays the learning rate from its initial value to a predefined minimum following a cosine profile, promoting stable convergence and reducing the risk of optimization stagnation (<xref ref-type="bibr" rid="B9">Kingma and Ba, 2014</xref>). The learning rate at training epoch t is given by <xref ref-type="disp-formula" rid="e14">Equation 14</xref>:<disp-formula id="e14">
<mml:math id="m50">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3b7;</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3b7;</mml:mi>
<mml:mi mathvariant="bold-italic">min</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3b7;</mml:mi>
<mml:mi mathvariant="bold-italic">max</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3b7;</mml:mi>
<mml:mi mathvariant="bold-italic">min</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold">cos</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mi mathvariant="bold-italic">&#x3c0;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(14)</label>
</disp-formula>where <inline-formula id="inf37">
<mml:math id="m51">
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes the current epoch and <inline-formula id="inf38">
<mml:math id="m52">
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the total number of training epochs. This strategy enables rapid convergence during early training while allowing fine-grained parameter refinement in later epochs.</p>
</sec>
<sec id="s3-2-2">
<label>3.2.2</label>
<title>Training results</title>
<p>Analysis of the training and validation loss curves (<xref ref-type="fig" rid="F9">Figure 9</xref>) shows that both decrease monotonically and ultimately converge. The training loss decreased rapidly from an initial value of 1.287 to approximately 0.713 within the first 20 epochs, followed by a slower decay phase, stabilizing near 0.565 by epoch 100. This behavior demonstrates good convergence and training stability. The validation loss exhibited moderate mid-training fluctuations but maintained an overall downward trend, converging to a level close to that of the training loss. The gap between the two curves gradually narrowed, reaching a final difference of approximately 0.005, suggesting that no significant overfitting occurred during training.</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>Training and validation loss curves throughout the learning process.</p>
</caption>
<graphic xlink:href="fspas-13-1782465-g009.tif">
<alt-text content-type="machine-generated">Line chart showing train loss and validation loss over 100 epochs, with loss decreasing from about 1.3 to 0.6. Smooth train and validation loss are represented by dashed lines.</alt-text>
</graphic>
</fig>
<p>The model produces a pixel-level probability map that is thresholded to generate a binary mask. Because the model performs semantic rather than instance segmentation, additional post-processing steps, including morphological analysis and instance separation, are required to derive sub-pixel centroid coordinates consistent with traditional detection methods.</p>
<p>Connected-component analysis is first applied to the 512 &#xd7; 512 binary mask to identify pixel clusters corresponding to individual targets. In astronomical images, crowded stellar fields or closely spaced sources may cause multiple targets to merge into a single connected component, a phenomenon commonly referred to as blending. To address this issue, a blending-target separation algorithm based on morphological features and distance transform analysis was employed. Specifically, the circularity of each connected component is computed as a blending criterion, defined as <xref ref-type="disp-formula" rid="e15">Equation 15</xref>:<disp-formula id="e15">
<mml:math id="m53">
<mml:mrow>
<mml:mi mathvariant="bold-italic">C</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn mathvariant="bold">4</mml:mn>
<mml:mi mathvariant="bold-italic">&#x3c0;</mml:mi>
<mml:mi mathvariant="bold-italic">A</mml:mi>
</mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msup>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(15)</label>
</disp-formula>where <inline-formula id="inf39">
<mml:math id="m54">
<mml:mrow>
<mml:mi mathvariant="bold-italic">A</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes the component area and <inline-formula id="inf40">
<mml:math id="m55">
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is its perimeter. A component is classified as blended if its circularity falls below a predefined threshold of 0.7. For each blended component, the internal pixel with the maximum Euclidean distance to the background&#x2014;corresponding to the peak of the distance transform&#x2014;is selected as a seed point. A region-growing procedure is initiated from this seed, during which the region is iteratively expanded and its circularity recalculated after each step. Expansion proceeds only while the circularity remains below the threshold, thereby isolating the first sub-target. The remaining pixels are subsequently processed morphologically to extract the second sub-target (<xref ref-type="fig" rid="F10">Figure 10</xref>).</p>
<fig id="F10" position="float">
<label>FIGURE 10</label>
<caption>
<p>Example of blended target separation using the circularity-based algorithm-<bold>(A)</bold> Detection of blended objects via circularity metric from the ATD-DL model output. (Green: Isolated targets; Red: Suspected blended objects.), <bold>(B)</bold> Result of the circularity-based separation algorithm applied to a blended region. (Green: Target 1; Blue: Target 2; Red: Seed center point).</p>
</caption>
<graphic xlink:href="fspas-13-1782465-g010.tif">
<alt-text content-type="machine-generated">Panel A shows a black background overlaid with green and red segmented regions, where green indicates isolated targets and red marks candidates identified as blended sources. Panel B presents a 4&#xd7;4 grid of close-up views, each illustrating the segmentation of blended targets in green and blue on a black background, with the centroid of each segmented component marked in red.</alt-text>
</graphic>
</fig>
<p>It is worth noting that asteroids or space debris may appear as elongated targets (e.g., near-Earth objects) in astronomical images. To avoid erroneous deblending of objects that are intrinsically single sources, a rapid discrimination step is introduced based on the presence of significant local intensity maxima within the target structure. Specifically, if no distinct local peak is detected within the target region, the resulting smooth intensity profile is more consistent with that expected from a moving object exhibiting uniform trailing during the exposure. Such targets are therefore preliminarily classified as moving objects and excluded from subsequent deblending procedures. Conversely, the presence of multiple well-defined local peaks suggests a blended projection of nearby stars, in which case deblending is applied.</p>
<p>Following target separation, a weighted centroid is computed for each independent object, including both non-blended targets and separated sub-components. The centroid calculation uses pixel values from the original calibrated FITS image as weights for the corresponding connected component. The weighted centroid coordinates <inline-formula id="inf41">
<mml:math id="m56">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> are given by <xref ref-type="disp-formula" rid="e16">Equation 16</xref>:<disp-formula id="e16">
<mml:math id="m57">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="bold">&#x3a9;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>&#xb7;</mml:mo>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="bold">&#x3a9;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="bold">&#x3a9;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mo>&#xb7;</mml:mo>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="bold">&#x3a9;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(16)</label>
</disp-formula>
</p>
<p>Here, <inline-formula id="inf42">
<mml:math id="m58">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3a9;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes the set of pixels belonging to the connected component, and <inline-formula id="inf43">
<mml:math id="m59">
<mml:mrow>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the electron count at pixel position <inline-formula id="inf44">
<mml:math id="m60">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>. This procedure yields sub-pixel target centroid coordinates in a format directly comparable to the outputs of traditional astronomical detection pipelines.</p>
</sec>
</sec>
</sec>
<sec sec-type="results|discussion" id="s4">
<label>4</label>
<title>Results and discussion</title>
<p>To comprehensively evaluate the model&#x2019;s detection performance, a cross-matching strategy based on the nearest-neighbor principle is adopted. Specifically, Stars from the Gaia DR3 catalog are first projected onto the observed image region. All cataloged sources with a calculated signal-to-noise ratio (SNR) &#x2265; 2 are defined as the ground-truth set, denoted as G. To account for uncertainties in SNR estimation, the complete set of all cataloged sources, without SNR filtering, is defined as the full reference set A. For any detection method, its output detections constitute the candidate set D.</p>
<p>Because a target&#x2019;s light profile may extend into the field of view while its centroid lies just outside, candidates located within a 3-pixel border of the image edges are excluded prior to matching to avoid edge artifacts introduced during subfield partitioning. For each candidate in D, the nearest neighbor is searched within sets A and G using a fixed matching radius of 3 pixels. This radius, approximately twice the full width at half maximum of the image point spread function, accommodates systematic offsets among sub-pixel centroids produced by the proposed method, SExtractor moment-based estimates, and DAOPHOT Gaussian fitting results. If a reference source is identified within this neighborhood in either set A or G, the candidate is classified as a true positive (TP). Conversely, a candidate for which no corresponding reference source is found is recorded as a false positive (FP). After all candidates have been processed, any reference source in set G that remains unmatched is counted as a false negative (FN). This matching procedure was applied on a frame-by-frame basis across all 108 test images. Detection performance was evaluated using the standard metrics of precision, recall, and F1-score, defined as follows.<disp-formula id="equ1">
<mml:math id="m61">
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ2">
<mml:math id="m62">
<mml:mrow>
<mml:mtable class="align" columnalign="left">
<mml:mtr>
<mml:mtd columnalign="right"/>
<mml:mtd columnalign="right">
<mml:mi mathvariant="bold-italic">R</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mi mathvariant="bold-italic">l</mml:mi>
<mml:mi mathvariant="bold-italic">l</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right"/>
<mml:mtd columnalign="left">
<mml:mspace width="1em"/>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">2</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi mathvariant="bold-italic">R</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mi mathvariant="bold-italic">l</mml:mi>
<mml:mi mathvariant="bold-italic">l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold-italic">R</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mi mathvariant="bold-italic">l</mml:mi>
<mml:mi mathvariant="bold-italic">l</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<sec id="s4-1">
<label>4.1</label>
<title>Performance analysis</title>
<p>To systematically assess the performance of the proposed ATD-DL framework, we compare it against two widely used traditional astronomical detection tools: SExtractor and DAOPHOT (specifically its DAOFind module). This evaluation follows established practices for benchmarking astronomical detection methods. Precision, Recall, and F1-score were computed consistently across all 108 test images, which contain approximately 87,000 true sources. Because traditional methods rely on manually specified detection thresholds, a dense parameter scan was first conducted: thresholds from 0.5 to 2.0 for SExtractor and from 4.0 to 8.0 for DAOFind, both in steps of 0.1. Subsequently, ten representative parameter points were selected for each method based on the Precision&#x2013;Recall criterion (<xref ref-type="table" rid="T3">Table 3</xref>). The remaining primary parameters used for SExtractor are listed in <xref ref-type="table" rid="T4">Table 4</xref>, with SEEING_FWHM selected as the optimal value through fine-tuning and comparative testing on real images. For deep learning&#x2013;based target detection, StarNet (<xref ref-type="bibr" rid="B24">Xue et al., 2020</xref>) and DNA-Net (<xref ref-type="bibr" rid="B11">Li et al., 2023</xref>) were selected for comparison with the proposed method, and ten checkpoints saved during training were used for each model.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Details of the check point parameters for SExtractor and DAOFind methods.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Parameter</th>
<th align="left">Value</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">DETECT_TYPE</td>
<td align="left">CCD</td>
</tr>
<tr>
<td align="left">DETECT_MINAREA</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">DEBLEND_NTHRESH</td>
<td align="left">32</td>
</tr>
<tr>
<td align="left">DEBLEND_MINCONT</td>
<td align="left">0.005</td>
</tr>
<tr>
<td align="left">SEEING_FWHM</td>
<td align="left">1.2</td>
</tr>
<tr>
<td align="left">BACK_SIZE</td>
<td align="left">64</td>
</tr>
<tr>
<td align="left">BACK_FILTERSIZE</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">BACKPHOTO_TYPE</td>
<td align="left">GLOBAL</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Details of the main parameters used in the SExtractor.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Checkpoint number</th>
<th align="left">SExtractor threshold</th>
<th align="left">DAOFind threshold</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">0</td>
<td align="left">0.1</td>
<td align="left">3.0</td>
</tr>
<tr>
<td align="left">1</td>
<td align="left">0.2</td>
<td align="left">3.3</td>
</tr>
<tr>
<td align="left">2</td>
<td align="left">0.3</td>
<td align="left">3.5</td>
</tr>
<tr>
<td align="left">3</td>
<td align="left">0.4</td>
<td align="left">3.7</td>
</tr>
<tr>
<td align="left">4</td>
<td align="left">0.5</td>
<td align="left">4.0</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">0.6</td>
<td align="left">4.5</td>
</tr>
<tr>
<td align="left">6</td>
<td align="left">0.7</td>
<td align="left">5.0</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">0.8</td>
<td align="left">5.5</td>
</tr>
<tr>
<td align="left">8</td>
<td align="left">0.9</td>
<td align="left">6.0</td>
</tr>
<tr>
<td align="left">9</td>
<td align="left">1.0</td>
<td align="left">6.5</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Under identical test conditions and hardware settings, the comparison sequence shown in <xref ref-type="fig" rid="F11">Figure 11</xref> uses ten nodes along the horizontal axis to represent progressively increasing detection thresholds for the traditional methods, which simultaneously reduce both detection and false-alarm rates. A systematic comparison of the overall performance trends is conducted in terms of precision, recall, and F1-score. The analysis focuses on quantifying performance gains using the mean of the three data points with the highest F1-scores (<xref ref-type="fig" rid="F12">Figure 12</xref>), and on comparing the magnitude of these gains across different signal-to-noise ratio (SNR) intervals (<xref ref-type="fig" rid="F13">Figure 13</xref>).</p>
<fig id="F11" position="float">
<label>FIGURE 11</label>
<caption>
<p>Curve graphs of the results of each method on various statistical indicators.</p>
</caption>
<graphic xlink:href="fspas-13-1782465-g011.tif">
<alt-text content-type="machine-generated">Four line charts compare five methods&#x2014;SEX, DAO, ATD-DL, DNA-NET, and StarNet&#x2014;across checkpoint numbers. Metrics shown are precision, recall, F1, and precision-recall curves. DNA-NET and ATD-DL achieve the highest precision, recall, and F1 values, while DAO performs poorest throughout. Each line is color-coded and labeled with its final value at the last checkpoint.</alt-text>
</graphic>
</fig>
<fig id="F12" position="float">
<label>FIGURE 12</label>
<caption>
<p>Comparison of the mean values of the top three data sets with the highest F1 values among all methods.</p>
</caption>
<graphic xlink:href="fspas-13-1782465-g012.tif">
<alt-text content-type="machine-generated">Bar chart titled &#x201C;Average Metrics for Top 3 F1 Scores&#x201D; comparing Precision, Recall, and F1 across five models: SEX, DAO, ATD-DL, DNA-NET, and StarNet, with numeric values and error bars accompanying each bar.</alt-text>
</graphic>
</fig>
<fig id="F13" position="float">
<label>FIGURE 13</label>
<caption>
<p>Comparison of recall rates across different signal-to-noise ratios for each method in the top three data groups with the highest F1 values.</p>
</caption>
<graphic xlink:href="fspas-13-1782465-g013.tif">
<alt-text content-type="machine-generated">Heatmap comparing the recall scores of five algorithms (ATD-DL, DNA-NET, StarNet, SExtractor, DAO) across eight signal-to-noise ratio (SNR) ranges, with higher recall values indicated by darker red shading and lower values by lighter shades.</alt-text>
</graphic>
</fig>
<p>As shown in <xref ref-type="fig" rid="F11">Figure 11</xref>, a modest increase in the detection threshold for SExtractor initially leads to a rapid rise in precision accompanied by only a minor decrease in recall, resulting in a peak F1-score. In contrast, the deep learning&#x2013;based methods maintain consistently higher average levels of both precision and recall than the two traditional approaches. To evaluate detection robustness (<xref ref-type="fig" rid="F12">Figure 12</xref>), the three data points with the highest F1-scores for each method were selected and averaged. The deep learning approach exhibits more stable performance across all metrics compared with traditional methods. Detailed metric values at peak F1-score performance for each method are listed in <xref ref-type="table" rid="T5">Table 5</xref>. Furthermore, to facilitate a fair comparison of detection capability across methods, thresholds for traditional approaches and parameter settings for deep learning models were selected to yield a Precision of approximately 90%. Under these matched Precision conditions, detection performance for faint targets (2 &#x3c; SNR &#x3c;5) was compared. As shown in <xref ref-type="table" rid="T5">Table 5</xref>, ATD-DL achieves a higher recall for faint targets (54.93%) than DNA-Net (48.99%), StarNet (45.83%), SExtractor (36.11%), and DAOFind (1.81%), demonstrating its superior sensitivity in the low-SNR regime.</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Detailed performance metrics for each detection method on the test set at their respective peak F1-score operating points, together with a comparative analysis of overall recall and faint-/bright-target recall under aligned Precision conditions.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center"/>
<th colspan="3" align="center">Highest F1 score</th>
<th colspan="3" align="center">Precision alignment</th>
</tr>
<tr>
<th align="center">Method</th>
<th align="center">Precision (%)</th>
<th align="center">Recall (%)</th>
<th align="center">F1 score (%)</th>
<th align="center">Precision (%)</th>
<th align="center">Recall (%)</th>
<th align="center">Recall (%)<break/>(2&#x3c;SNR&#x3c;5)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">SExtractor</td>
<td align="center">80.93</td>
<td align="center">62.55</td>
<td align="center">70.56</td>
<td align="center">90.70</td>
<td align="center">55.68</td>
<td align="center">36.11</td>
</tr>
<tr>
<td align="center">DAOFind</td>
<td align="center">72.85</td>
<td align="center">41.42</td>
<td align="center">52.82</td>
<td align="center">89.02</td>
<td align="center">21.10</td>
<td align="center">1.81</td>
</tr>
<tr>
<td align="center">StarNet</td>
<td align="center">91.69</td>
<td align="center">66.37</td>
<td align="center">77.00</td>
<td align="center">90.10</td>
<td align="center">63.78</td>
<td align="center">45.83</td>
</tr>
<tr>
<td align="center">DNA-NET</td>
<td align="center">93.93</td>
<td align="center">65.64</td>
<td align="center">77.28</td>
<td align="center">89.11</td>
<td align="center">65.68</td>
<td align="center">48.99</td>
</tr>
<tr>
<td align="center">ATD-DL</td>
<td align="center">91.70</td>
<td align="center">69.88</td>
<td align="center">79.32</td>
<td align="center">
<bold>91.70</bold>
</td>
<td align="center">
<bold>69.88</bold>
</td>
<td align="center">
<bold>54.93</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold values denote the best performance achieved after Precision alignment.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>To assess the ability of the models to detect extremely faint targets, the recall rate was computed within fine SNR bins of width 1 for each model at its maximum F1-score (<xref ref-type="fig" rid="F13">Figure 13</xref>). Compared with traditional methods, the performance advantage of ATD-DL increases steadily toward lower SNR values, rising from approximately 3% for sources with SNR &#x3e;10 to about 20% for sources in the SNR three to four bin. Although DNA-Net and StarNet exhibit detection performance comparable to ATD-DL in high-SNR regimes, ATD-DL shows a marked improvement for SNR values below 5. This indicates that a substantial fraction of the overall performance gain of ATD-DL arises from its enhanced ability to recover the faintest detectable targets.</p>
<p>Under identical optimal F1-score conditions, the astrometric positioning accuracy of the proposed method was further evaluated. Gaia DR3 catalog coordinates were first projected into the image plane using the instrument plate model to serve as reference positions. The predicted catalog was then cross-matched with the reference catalog using a 2-pixel matching radius, yielding matched source pairs. Coordinate residuals in the image plane were calculated for each pair, and their mean was taken as the systematic bias. After subtracting the corresponding directional bias, the remaining residuals were used to compute the root mean square (RMS) error in each direction. Based on the system optical parameters&#x2014;focal length FL &#x3d; 124.9 mm and pixel size Pscale &#x3d; 0.0090 mm&#x2014;the telescope image scale is approximately 14.86 arcsec pixel<sup>-1</sup>.</p>
<p>The evaluation results presented in <xref ref-type="table" rid="T6">Table 6</xref> indicate that the proposed method exhibits a clear advantage in astrometric positioning accuracy. The total RMS error achieved by ATD-DL is 0.2247 pixels (3.3385 arcseconds), corresponding to reductions of 43% and 27% relative to the traditional methods SExtractor (0.3946 pixels, 5.8640 arcseconds) and DAOFind (0.3080 pixels, 4.5765 arcseconds), respectively. ATD-DL further attains the lowest RMS errors in both the X and Y directions among the three methods considered. In addition, DNA-Net and StarNet do not explicitly address precise source centroiding in their original formulations. Accordingly, the same weighted-average centroiding strategy adopted by ATD-DL was also applied to these models for consistency. Under the constraints of this centroiding approach, all three segmentation-based deep learning models yield comparable centroiding accuracy. Nevertheless, the results indicate that deep learning&#x2013;based approaches generally provide higher positioning accuracy than traditional methods, particularly when operating near the faint detection limit.</p>
<table-wrap id="T6" position="float">
<label>TABLE 6</label>
<caption>
<p>Detailed values of positioning accuracy for each method.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Method</th>
<th align="center">Total RMS error (corrected)</th>
<th align="center">RMS(X) error (corrected)</th>
<th align="center">RMS(Y) error (corrected)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">SExtractor</td>
<td align="center">0.3491 pix (5.1883&#x2033;)</td>
<td align="center">0.2566 pix (3.8135&#x2033;)</td>
<td align="center">0.2367 pix (3.5179&#x2033;)</td>
</tr>
<tr>
<td align="center">DAOFind</td>
<td align="center">0.3080 pix (4.5765&#x2033;)</td>
<td align="center">0.2078 pix (3.0880&#x2033;)</td>
<td align="center">0.2273 pix (3.3776&#x2033;)</td>
</tr>
<tr>
<td align="center">StarNet</td>
<td align="center">0.2815 pix (4.1831&#x2033;)</td>
<td align="center">0.2047 pix (3.0425&#x2033;)</td>
<td align="center">0.1932 pix (2.8709&#x2033;)</td>
</tr>
<tr>
<td align="center">DNA-NET</td>
<td align="center">0.2341 pix (3.4785&#x2033;)</td>
<td align="center">0.1631 pix (2.4243&#x2033;)</td>
<td align="center">0.1679 pix (2.4945&#x2033;)</td>
</tr>
<tr>
<td align="center">ATD-DL</td>
<td align="center">0.2353 pix (3.4963&#x2033;)</td>
<td align="center">0.1629 pix (2.4200&#x2033;)</td>
<td align="center">0.1698 pix (2.5234&#x2033;)</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In summary, although traditional methods can improve Precision by increasing detection thresholds, this gain is accompanied by a rapid loss in Recall. In contrast, the deep learning approach preserves sensitivity to faint target features, achieving a more favorable balance between detection completeness and false-alarm suppression, particularly near the detection limit.</p>
</sec>
<sec id="s4-2">
<label>4.2</label>
<title>Ablation study</title>
<p>To rigorously assess the effectiveness of the proposed Feature Enhancement Module (FEM), a dedicated ablation study was conducted. The baseline model adopted a standard U-Net architecture, in which the first encoder layer employed a conventional 3 &#xd7; 3 double-convolution block. To avoid masking improvements in faint-target detection by aggregate performance metrics, a more targeted evaluation strategy was adopted, focusing exclusively on detection counts for faint targets with signal-to-noise ratios (SNR) between 2 and 5. To eliminate the influence of differing Precision levels, five parameter sets with closely matched Precision values were selected for comparison between the baseline U-Net and the enhanced model (UNet &#x2b; FEM). The corresponding results are summarized in <xref ref-type="table" rid="T7">Table 7</xref>.</p>
<table-wrap id="T7" position="float">
<label>TABLE 7</label>
<caption>
<p>Comparison of faint-target (2 &#x3c; SNR &#x3c;5) detection rates for U-Net before and after the incorporation of the Feature Enhancement Module (FEM), evaluated under matched Precision conditions.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Parameter number</th>
<th align="center">UNet Precision (%)</th>
<th align="center">UNet TP (2&#x2013;5)</th>
<th align="center">UNet &#x2b; FEM Precision (%)</th>
<th align="center">UNet &#x2b; FEM TP (2&#x2013;5)</th>
<th align="center">Lift ratio (%)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">1</td>
<td align="center">97.49</td>
<td align="center">20,344</td>
<td align="center">97.50</td>
<td align="center">20,637</td>
<td align="center">
<bold>1.44</bold>
</td>
</tr>
<tr>
<td align="center">2</td>
<td align="center">97.55</td>
<td align="center">20,153</td>
<td align="center">97.56</td>
<td align="center">20,589</td>
<td align="center">
<bold>2.16</bold>
</td>
</tr>
<tr>
<td align="center">3</td>
<td align="center">97.59</td>
<td align="center">19,946</td>
<td align="center">97.61</td>
<td align="center">20,467</td>
<td align="center">
<bold>2.61</bold>
</td>
</tr>
<tr>
<td align="center">4</td>
<td align="center">97.79</td>
<td align="center">19,659</td>
<td align="center">97.81</td>
<td align="center">19,920</td>
<td align="center">
<bold>1.33</bold>
</td>
</tr>
<tr>
<td align="center">5</td>
<td align="center">99.03</td>
<td align="center">1,498</td>
<td align="center">99.08</td>
<td align="center">4,864</td>
<td align="center">
<bold>224.70</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold values highlight the performance gains introduced by FEM in the faint-SNR regime.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>The results show that, while maintaining comparable overall Precision (for example, 97.49% for U-Net and 97.50% for UNet &#x2b; FEM in parameter set 1), the enhanced model yields a pronounced increase in the number of detected faint targets across all comparisons. Notably, even under highly conservative operating conditions with Precision exceeding 99%, the FEM preserves strong sensitivity to faint sources. In particular, for parameter set 5, the number of detected faint targets is approximately doubled. These results demonstrate that the FEM effectively enhances the model&#x2019;s ability to learn and recognize faint-target features without compromising overall detection accuracy, thereby substantially improving faint-source recovery.</p>
</sec>
<sec id="s4-3">
<label>4.3</label>
<title>Investigation of performance disparities</title>
<p>SExtractor provides both source positions and segmentation masks, a functionality analogous to the output of the proposed model. To investigate the underlying causes of the observed performance differences, a comparative analysis was performed between the segmentation maps generated by SExtractor (SEGMENT) and those produced by ATD-DL.</p>
<p>SExtractor relies on classical threshold-connected segmentation. As illustrated in <xref ref-type="fig" rid="F14">Figure 14</xref>, when the detection threshold is lowered to recover fainter sources, the resulting segmentation masks for bright targets become increasingly diffuse. Mask contours become strongly perturbed by background noise, leading to substantial centroid shifts that may ultimately produce false positives. In contrast, the encoder&#x2013;decoder architecture of ATD-DL integrates multi-scale contextual information by formulating semantic boundary localization as a pixel-wise classification task. As a result, the segmentation masks exhibit low dispersion during centroid extraction. For brighter, high-SNR targets, mask boundaries remain stable across different operating points (conservative high-precision estimation versus optimistic high-completeness estimation), with performance differences arising mainly from the recovery of additional faint, low-SNR targets (<xref ref-type="fig" rid="F15">Figure 15</xref>).</p>
<fig id="F14" position="float">
<label>FIGURE 14</label>
<caption>
<p>Segmentation results from SExtractor under varying detection thresholds.</p>
</caption>
<graphic xlink:href="fspas-13-1782465-g014.tif">
<alt-text content-type="machine-generated">Five black square panels arranged in two rows display scattered white dots in varying densities, labeled 0.5, 0.7, 0.9, 1.2, 1.6, and 2.0, indicating decreasing dot density from left to right and top to bottom.</alt-text>
</graphic>
</fig>
<fig id="F15" position="float">
<label>FIGURE 15</label>
<caption>
<p>Comparison of ATD-DL outputs optimized for highest Precision versus highest Recall, showing differences primarily in faint target recovery and visual comparison of the original image, SExtractor segmentation, and ATD-DL segmentation results at maximum F1 score - <bold>(A)</bold> Highest precision, <bold>(B)</bold> Highest recall, <bold>(C)</bold> SExtractor Segment, <bold>(D)</bold> Input, <bold>(E)</bold> ATD-DL model Output.</p>
</caption>
<graphic xlink:href="fspas-13-1782465-g015.tif">
<alt-text content-type="machine-generated">Five panels, labeled A through E, show star-like white sources on black backgrounds. Panels A and B include colored boxes highlighting the same region of interest, shown in green in A and yellow in B; the yellow box encloses a greater number of faint white sources. Panels C, D, and E present differing source densities: C shows more extended white regions, D corresponds to the original astronomical image, and E displays segmented regions that more closely match the visually discernible targets.</alt-text>
</graphic>
</fig>
<p>To further illustrate the effect of lowering SExtractor&#x2019;s detection threshold in pursuit of increased faint-source recovery, the same centroid extraction procedure used for ATD-DL was applied to SExtractor outputs. As shown in <xref ref-type="table" rid="T8">Table 8</xref>, when the detection threshold is reduced below 0.5, Recall decreases rather than improves. This behavior arises because excessively low thresholds cause segmentation masks to over-expand, shifting centroids to such an extent that otherwise correctly segmented targets are assigned incorrect coordinates and are subsequently missed during the cross-matching process. Although this effect is also present at higher thresholds, it is partially obscured by the increased number of newly detected faint sources.</p>
<table-wrap id="T8" position="float">
<label>TABLE 8</label>
<caption>
<p>Cross-matching performance metrics for centroids extracted from SExtractor&#x2019;s segmentation map.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Threshold</th>
<th align="left">Precision</th>
<th align="left">Recall</th>
<th align="left">F1 score</th>
<th align="left">TP</th>
<th align="left">FP</th>
<th align="left">FN</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">0.1</td>
<td align="left">0.1394</td>
<td align="left">0.0973</td>
<td align="left">0.1146</td>
<td align="left">12,888</td>
<td align="left">79,570</td>
<td align="left">72,319</td>
</tr>
<tr>
<td align="left">0.2</td>
<td align="left">0.2143</td>
<td align="left">0.2709</td>
<td align="left">0.2393</td>
<td align="left">30,690</td>
<td align="left">11,253</td>
<td align="left">56,432</td>
</tr>
<tr>
<td align="left">0.3</td>
<td align="left">0.3328</td>
<td align="left">0.4592</td>
<td align="left">0.3859</td>
<td align="left">44,901</td>
<td align="left">90,012</td>
<td align="left">40,479</td>
</tr>
<tr>
<td align="left">0.4</td>
<td align="left">0.5019</td>
<td align="left">0.5678</td>
<td align="left">0.5328</td>
<td align="left">49,139</td>
<td align="left">48,774</td>
<td align="left">31,714</td>
</tr>
<tr>
<td align="left">0.5</td>
<td align="left">0.6931</td>
<td align="left">0.5888</td>
<td align="left">0.6367</td>
<td align="left">46,502</td>
<td align="left">20,587</td>
<td align="left">29,855</td>
</tr>
<tr>
<td align="left">0.6</td>
<td align="left">0.8371</td>
<td align="left">0.5492</td>
<td align="left">0.6633</td>
<td align="left">40,940</td>
<td align="left">7,965</td>
<td align="left">32,655</td>
</tr>
<tr>
<td align="left">0.7</td>
<td align="left">0.9130</td>
<td align="left">0.4937</td>
<td align="left">0.6409</td>
<td align="left">35,753</td>
<td align="left">3,406</td>
<td align="left">36,728</td>
</tr>
<tr>
<td align="left">0.8</td>
<td align="left">0.9414</td>
<td align="left">0.4364</td>
<td align="left">0.5963</td>
<td align="left">31,321</td>
<td align="left">1949</td>
<td align="left">41,097</td>
</tr>
<tr>
<td align="left">0.9</td>
<td align="left">0.9520</td>
<td align="left">0.3874</td>
<td align="left">0.5507</td>
<td align="left">27,805</td>
<td align="left">1,401</td>
<td align="left">44,880</td>
</tr>
<tr>
<td align="left">1.0</td>
<td align="left">0.9576</td>
<td align="left">0.3482</td>
<td align="left">0.5107</td>
<td align="left">25,046</td>
<td align="left">1,110</td>
<td align="left">47,958</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Visual inspection of the segmentation masks at optimal F1 performance (<xref ref-type="fig" rid="F15">Figure 15</xref>) indicates that SExtractor tends to overestimate the spatial extent of bright sources, frequently merging adjacent faint targets into a single connected component. This behavior substantially limits its ability to individually segment faint objects. In contrast, ATD-DL produces segmentation masks that are both more faithful to the underlying sources and more stable.</p>
<p>In summary, the threshold-connected segmentation mechanism employed by SExtractor suffers from excessive bright-source area expansion and limited separation of faint targets when operating near the detection limit, leading to deteriorating performance at lower thresholds. By contrast, ATD-DL benefits from low-dispersion segmentation masks produced by the deep learning model and superior sensitivity to low-SNR targets, maintaining high centroid accuracy. The method consistently outperforms SExtractor across all key metrics, demonstrating a clear advantage in the combined task of target segmentation and centroid extraction. It consistently outperforms SExtractor across all key metrics. This analysis elucidates the primary factors underlying the superior performance of ATD-DL in astronomical target detection.</p>
</sec>
</sec>
<sec id="s5">
<label>5</label>
<title>Conclusions and future work</title>
<p>This study presents ATD-DL, a deep learning&#x2013;based framework for astronomical target detection that is coupled with a dedicated preprocessing pipeline designed to adapt high&#x2013;dynamic-range observational data for deep learning applications. When evaluated on a test set containing approximately 87,000 real sources, the ATD-DL framework demonstrates a clear performance advantage over traditional methods such as SExtractor and DAOFind. Unlike threshold-based approaches, which are prone to diffuse segmentation masks and centroid shifts at low signal levels, ATD-DL achieves stable pixel-level semantic boundary localization through multi-scale context fusion within its encoder&#x2013;decoder architecture. This design substantially improves both segmentation quality and centroid extraction accuracy for faint targets.</p>
<p>Furthermore, when deployed on an NVIDIA GeForce RTX 4060Ti platform, the model achieves an inference speed of approximately 50 ms per frame. Because model training does not rely on specific prior knowledge, ATD-DL is applicable to both historical archival data and real-time telescope observation pipelines. Its ability to meet real-time processing requirements makes it well suited for time-sensitive applications such as space object monitoring, with strong potential to significantly enhance the detection of extremely faint astronomical targets.</p>
<p>While ATD-DL establishes an effective detection framework, target detection represents only the initial stage of the overall processing pipeline. Future work will focus on integrating more precise centroiding techniques to further improve astrometric accuracy. Enhancing detection depth will require additional optimization of model sensitivity to extremely low-SNR targets, as well as advances in image preprocessing to mitigate systematic effects such as atmospheric turbulence. Although preliminary experiments with more complex architectures have not yet yielded significant improvements, continued exploration of task-driven network designs and their integration into the validated pipeline is expected to further extend the performance limits of the proposed framework.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>JH: Conceptualization, Data curation, Formal Analysis, Investigation, Methodology, Project administration, Software, Validation, Visualization, Writing &#x2013; original draft, Writing &#x2013; review and editing. HL: Data curation, Formal Analysis, Funding acquisition, Investigation, Methodology, Resources, Supervision, Writing &#x2013; review and editing. WX: Supervision, Validation, Writing &#x2013; review and editing. SL: Supervision, Validation, Writing &#x2013; review and editing. ZQ: Supervision, Validation, Writing &#x2013; review and editing.</p>
</sec>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was used in the creation of this manuscript. The author(s) verify and take full responsibility for the use of generative AI in the preparation of this manuscript. Generative AI was used: OpenAI&#x2019;s ChatGPT and DeepSeek.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bancelin</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Colas</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Thuillot</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Hestroffer</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Assafin</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Asteroid (99942) apophis: new predictions of Earth encounters for this potentially hazardous asteroid</article-title>. <source>A&#x26;A</source> <volume>544</volume>, <fpage>A15</fpage>. <pub-id pub-id-type="doi">10.1051/0004-6361/201117981</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bertin</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Arnouts</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>1996</year>). <article-title>SExtractor: software for source extraction</article-title>. <source>Astron. Astrophys. Suppl. Ser.</source> <volume>117</volume>, <fpage>393</fpage>&#x2013;<lpage>404</lpage>. <pub-id pub-id-type="doi">10.1051/aas:1996164</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Borgefors</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>1986</year>). <article-title>Distance transformations in digital images</article-title>. <source>Comput. Vis. Graph. Image Process.</source> <volume>34</volume>, <fpage>344</fpage>&#x2013;<lpage>371</lpage>. <pub-id pub-id-type="doi">10.1016/S0734-189X(86)80047-0</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Collaboration</surname>
<given-names>T. A.</given-names>
</name>
<name>
<surname>Robitaille</surname>
<given-names>T. P.</given-names>
</name>
<name>
<surname>Tollerud</surname>
<given-names>E. J.</given-names>
</name>
<name>
<surname>Greenfield</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Droettboom</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Bray</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Astropy: a community python package for astronomy</article-title>. <source>A&#x26;A</source> <volume>558</volume>, <fpage>A33</fpage>. <pub-id pub-id-type="doi">10.1051/0004-6361/201322068</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="web">
<person-group person-group-type="author">
<name>
<surname>Flury</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Contant</surname>
<given-names>J. M.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>The updated IAA position paper on orbital debris</article-title>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://ui.adsabs.harvard.edu/abs/2001ESASP.473..841F">https://ui.adsabs.harvard.edu/abs/2001ESASP.473.841F</ext-link> (Accessed December 23, 2025)</comment>.<fpage>841</fpage>&#x2013;<lpage>849</lpage>.</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Howell</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2006</year>). &#x201c;<article-title>Handbook of CCD astronomy</article-title>,&#x201d; in <source>Cambridge observing handbooks for research astronomers</source>. Editor <person-group person-group-type="editor">
<name>
<surname>Howell</surname>
<given-names>S. B.</given-names>
</name>
</person-group> <edition>2nd ed</edition> (<publisher-loc>Cambridge, UK</publisher-loc>: <publisher-name>Cambridge University Press</publisher-name>), <volume>5</volume>, <fpage>2006</fpage>. <pub-id pub-id-type="doi">10.1017/CBO9780511807909</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jia</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Detection and classification of astronomical targets with deep neural networks in wide-field small aperture telescopes</article-title>. <source>AJ</source> <volume>159</volume>, <fpage>212</fpage>. <pub-id pub-id-type="doi">10.3847/1538-3881/ab800a</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jia</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A deep learning based astronomical target detection framework for multi-colour photometry sky survey projects</article-title>. <source>Astronomy Comput.</source> <volume>42</volume>, <fpage>100687</fpage>. <pub-id pub-id-type="doi">10.1016/j.ascom.2023.100687</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kingma</surname>
<given-names>D. P.</given-names>
</name>
<name>
<surname>Ba</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Adam: a method for stochastic optimization</article-title>. <source>CoRR</source>. <pub-id pub-id-type="doi">10.48550/arXiv.1412.6980</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Krizhevsky</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sutskever</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Hinton</surname>
<given-names>G. E.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>ImageNet classification with deep convolutional neural networks</article-title>. <source>Commun. ACM</source> <volume>60</volume>, <fpage>84</fpage>&#x2013;<lpage>90</lpage>. <pub-id pub-id-type="doi">10.1145/3065386</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Dense nested attention network for infrared small target detection</article-title>. <source>IEEE Trans. Image Process.</source> <volume>32</volume>, <fpage>1745</fpage>&#x2013;<lpage>1758</lpage>. <pub-id pub-id-type="doi">10.1109/TIP.2022.3199107</pub-id>
<pub-id pub-id-type="pmid">35994532</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Long</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Jiarong</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Jiangbin</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Jiayao</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Xiaotian</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Astronomical pointlike source detection <italic>via</italic> deep feature matching</article-title>. <source>Astrophysical J. Suppl. Ser.</source> <volume>276</volume> (<issue>4</issue>), <fpage>4</fpage>. <pub-id pub-id-type="doi">10.3847/1538-4365/ad9244</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Loshchilov</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Hutter</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>SGDR: stochastic gradient descent with warm restarts</article-title>. <pub-id pub-id-type="doi">10.48550/arXiv.1608.03983</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luo</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Mao</surname>
<given-names>Y.-D.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>Z.-H.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>FocusGEO observations of space debris at geosynchronous Earth orbit</article-title>. <source>Adv. Space Res.</source> <volume>64</volume>, <fpage>465</fpage>&#x2013;<lpage>474</lpage>. <pub-id pub-id-type="doi">10.1016/j.asr.2019.04.006</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luo</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>J.-H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>J.-J.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>G.-P.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>FocusGEO II. A telescope with imaging mode based on image overlay for debris at geosynchronous Earth Orbit</article-title>. <source>Adv. Space Res.</source> <volume>69</volume>, <fpage>2618</fpage>&#x2013;<lpage>2628</lpage>. <pub-id pub-id-type="doi">10.1016/j.asr.2021.12.048</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ronneberger</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Fischer</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Brox</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>U-Net: Convolutional networks for biomedical image segmentation</article-title>. <pub-id pub-id-type="doi">10.48550/arXiv.1505.04597</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schiattarella</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Spiller</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Curti</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>A novel star identification technique robust to high presence of false objects: the Multi-poles algorithm</article-title>. <source>Adv. Space Res.</source> <volume>59</volume>, <fpage>2133</fpage>&#x2013;<lpage>2147</lpage>. <pub-id pub-id-type="doi">10.1016/j.asr.2017.01.034</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schildknecht</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Optical surveys for space debris</article-title>. <source>Astron Astrophys. Rev.</source> <volume>14</volume>, <fpage>41</fpage>&#x2013;<lpage>111</lpage>. <pub-id pub-id-type="doi">10.1007/s00159-006-0003-9</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stetson</surname>
<given-names>P. B.</given-names>
</name>
</person-group> (<year>1987</year>). <article-title>DAOPHOT - a computer program for crowded-field stellar photometry</article-title>. <source>PASP</source> <volume>99</volume>, <fpage>191</fpage>. <pub-id pub-id-type="doi">10.1086/131977</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stoppa</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Vreeswijk</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Bloemen</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bhattacharyya</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Caron</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>J&#xf3;hannesson</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>AutoSourceID-Light: fast optical source localization via U-Net and laplacian of gaussian</article-title>. <source>A&#x26;A</source> <volume>662</volume>, <fpage>A109</fpage>. <pub-id pub-id-type="doi">10.1051/0004-6361/202243250</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sudre</surname>
<given-names>C. H.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Vercauteren</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Ourselin</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Jorge Cardoso</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Generalised dice overlap as a deep learning loss function for highly unbalanced segmentations</article-title>. <source>Deep Learn. Med. Image Anal. Multimodal Learn. Clin. Decis. Support.</source> <volume>10553</volume>, <fpage>240</fpage>&#x2013;<lpage>248</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-67558-9_28</pub-id>
<pub-id pub-id-type="pmid">34104926</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="web">
<person-group person-group-type="author">
<name>
<surname>Tanoglidis</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Aleksandra</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Drlica-Wagner</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Nord</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>M. H.</given-names>
</name>
<name>
<surname>Amsellem</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Using mask R-CNN to detect and mask ghosting and scattered-light artifacts in astronomical images</article-title>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.semanticscholar.org/paper/Using-Mask-R-CNN-to-detect-and-mask-ghosting-and-in-Tanoglidis-%C2%B4Ciprijanovi%C2%B4c/0e511e39c3dbf005d869d10cef764e24ad8b1135">https://www.semanticscholar.org/paper/Using-Mask-R-CNN-to-detect-and-mask-ghosting-and-in-Tanoglidis-%C2%B4Ciprijanovi%C2%B4c/0e511e39c3dbf005d869d10cef764e24ad8b1135</ext-link> December 23, 2025).</comment>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vallenari</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Brown</surname>
<given-names>A. G. A.</given-names>
</name>
<name>
<surname>Prusti</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Bruijne</surname>
<given-names>J. H.</given-names>
</name>
<name>
<surname>Arenou</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Babusiaux</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Gaia data release 3 - summary of the content and survey properties</article-title>. <source>A&#x26;A</source> <volume>674</volume>, <fpage>A1</fpage>. <pub-id pub-id-type="doi">10.1051/0004-6361/202243940</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xue</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Dim small target detection based on convolutinal neural network in star image</article-title>. <source>Multimed. Tools Appl.</source> <volume>79</volume>, <fpage>4681</fpage>&#x2013;<lpage>4698</lpage>. <pub-id pub-id-type="doi">10.1007/s11042-019-7412-z</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ye</surname>
<given-names>Q.-Z.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>A preliminary analysis of the Shangri-La bolide on 2017 Oct 4</article-title>. <source>Earth Planet. Phys.</source> <volume>2</volume>, <fpage>170</fpage>&#x2013;<lpage>172</lpage>. <pub-id pub-id-type="doi">10.26464/epp2018017</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yi</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Du</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Bu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Kong</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Automatic detection of low surface brightness galaxies from SDSS images</article-title>.</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Lembeck</surname>
<given-names>M. F.</given-names>
</name>
<name>
<surname>Zhuang</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Shah</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Real-time convolutional neural network-based star detection and centroiding method for CubeSat star tracker</article-title>. <source>IEEE Trans. Aerosp. Electron. Syst.</source> <volume>61</volume>, <fpage>8172</fpage>&#x2013;<lpage>8184</lpage>. <pub-id pub-id-type="doi">10.1109/TAES.2025.3542744</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/875783/overview">Yanxia Zhang</ext-link>, National Astronomical Observatories, Chinese Academy of Sciences (CAS), China</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3341166/overview">Peng Jia</ext-link>, Taiyuan University of Technology, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3347332/overview">Bo Qiu</ext-link>, University of Science and Technology Beijing, China</p>
</fn>
</fn-group>
</back>
</article>