<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article article-type="research-article" dtd-version="1.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Bioinform.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Bioinformatics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Bioinform.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2673-7647</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1758257</article-id>
<article-id pub-id-type="doi">10.3389/fbinf.2026.1758257</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>A clustering method for single-cell RNA sequencing data based on denoising and masking learning</article-title>
<alt-title alt-title-type="left-running-head">Xu et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fbinf.2026.1758257">10.3389/fbinf.2026.1758257</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Xu</surname>
<given-names>Shuang</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yan</surname>
<given-names>Wen</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Bin</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Qi</surname>
<given-names>Hong</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Wang</surname>
<given-names>Kai</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3300505"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal Analysis</role>
</contrib>
</contrib-group>
<aff id="aff1">
<label>1</label>
<institution>Department of Anesthesiology, The Second Hospital of Jilin University</institution>, <city>Changchun</city>, <country country="CN">China</country>
</aff>
<aff id="aff2">
<label>2</label>
<institution>College of Computer Science and Technology, Jilin University</institution>, <city>Changchun</city>, <country country="CN">China</country>
</aff>
<author-notes>
<corresp id="c001">
<label>&#x2a;</label>Correspondence: Kai Wang, <email xlink:href="mailto:wangkai87@jlu.edu.cn">wangkai87@jlu.edu.cn</email>
</corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-03-03">
<day>03</day>
<month>03</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>6</volume>
<elocation-id>1758257</elocation-id>
<history>
<date date-type="received">
<day>01</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>24</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>16</day>
<month>02</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Xu, Yan, Zhang, Qi and Wang.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Xu, Yan, Zhang, Qi and Wang</copyright-holder>
<license>
<ali:license_ref start_date="2026-03-03">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Single-cell RNA sequencing (scRNA-seq) enables high-throughput analysis of gene expression at single-cell resolution and plays a crucial role in studying cellular heterogeneity, tissue development, and disease mechanisms. However, scRNA-seq data are characterized by high dimensionality, sparsity, technical noise, and prevalent dropout events, which pose substantial challenges to conventional clustering approaches.</p>
</sec>
<sec>
<title>Methods</title>
<p>To address these challenges, we propose scDMAC, a novel clustering framework for single-cell RNA sequencing data based on denoising and masking learning. The method integrates a zero-inflated negative binomial (ZINB)-based denoising autoencoder with a masking autoencoder. First, the ZINB-based autoencoder models count distribution and dropout events to denoise gene expression data. Subsequently, a tailored masking strategy is applied to the denoised data to learn gene-wise correlations through reconstruction.</p>
</sec>
<sec>
<title>Results</title>
<p>Extensive experiments conducted on multiple benchmark scRNA-seq datasets demonstrate that scDMAC achieves superior clustering accuracy and stability compared with state-of-the-art methods. The proposed framework consistently improves clustering performance across diverse datasets, highlighting its robustness to noise and sparsity.</p>
</sec>
<sec>
<title>Discussion</title>
<p>By effectively combining probabilistic denoising with masking-based representation learning, scDMAC provides a powerful solution for addressing dropout and sparsity issues in scRNA-seq data. The improved clustering performance suggests that integrating distribution-aware denoising with feature reconstruction enhances the extraction of biologically meaningful representations, making scDMAC a promising tool for single-cell transcriptomic analysis.</p>
</sec>
</abstract>
<kwd-group>
<kwd>cell clustering</kwd>
<kwd>denoising autoencoder</kwd>
<kwd>masked autoencoder</kwd>
<kwd>single-cell RNA sequencing</kwd>
<kwd>zero-inflated negative binomial (ZINB)</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work was supported by the National Natural Science Foundation of China under Grant 62202201.</funding-statement>
</funding-group>
<counts>
<fig-count count="7"/>
<table-count count="5"/>
<equation-count count="27"/>
<ref-count count="26"/>
<page-count count="00"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>RNA Bioinformatics</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>Recent advances in high-throughput sequencing, single-cell isolation, and bioinformatics have enabled multimodal, single-cell-level interrogation of biological systems (<xref ref-type="bibr" rid="B24">Wu et al., 2014</xref>; <xref ref-type="bibr" rid="B12">Hong et al., 2020</xref>; <xref ref-type="bibr" rid="B11">He et al., 2025</xref>). Among single-cell technologies, single-cell RNA sequencing (scRNA-seq) stands out for its ability to resolve transcriptional states at cellular resolution, permitting precise identification of cell types and subpopulations, reconstruction of developmental trajectories, and dissection of molecular mechanisms underlying health and disease (<xref ref-type="bibr" rid="B15">Lopez et al., 2018</xref>; <xref ref-type="bibr" rid="B17">Ranjan et al., 2021</xref>). As single-cell studies scale across tissues, conditions, and laboratories, robust computational methods (<xref ref-type="bibr" rid="B13">Kinker et al., 2020</xref>; <xref ref-type="bibr" rid="B6">Flores et al., 2022</xref>) that can extract meaningful biological signals from noisy, sparse measurements are increasingly essential for biological discovery and translational applications such as biomarker identification and therapeutic target prioritization (<xref ref-type="bibr" rid="B9">Haque et al., 2017</xref>; <xref ref-type="bibr" rid="B19">Su et al., 2022</xref>).</p>
<p>Clustering remains a foundational step in scRNA-seq analysis, but it is challenged by properties inherent to these data: extreme sparsity (many zero or near-zero counts), high dimensionality (tens of thousands of genes per cell), andmixed sources of variation, including true biological heterogeneity and technical noise (dropout events, varying capture efficiency, and limited sequencing depth) (<xref ref-type="bibr" rid="B4">Conesa et al., 2016</xref>; <xref ref-type="bibr" rid="B16">Qi et al., 2020</xref>; <xref ref-type="bibr" rid="B26">Zhao et al., 2021</xref>; <xref ref-type="bibr" rid="B7">Ghorbani et al., 2024</xref>; <xref ref-type="bibr" rid="B8">Gong et al., 2018</xref>). Sparsity and high dimensionality exacerbate the curse of dimensionality, reducing the discriminative power of conventional distance metrics and degrading the performance of classical clustering methods. Moreover, zero inflation and measurement noise obscure subtle but biologically important gene&#x2013;gene relationships that are critical for accurate cell-type separation and downstream interpretation (<xref ref-type="bibr" rid="B18">Stegle et al., 2015</xref>; <xref ref-type="bibr" rid="B1">Camara, 2018</xref>).</p>
<p>Existing ZINB-based models primarily focus on modeling count distributions and dropout events, but often overlook explicit modeling of gene&#x2013;gene dependencies. Conversely, recent deep clustering methods emphasize representation learning but typically rely on generic reconstruction objectives that are insensitive to biological sparsity patterns. As a result, existing approaches struggle to simultaneously address technical noise, zero inflation, and contextual gene relationships within a unified framework.</p>
<p>To address these challenges, we introduce scDMAC, a unified framework that couples principled probabilistic denoising with contextual masked reconstruction to produce compact, biologically informative embeddings for clustering. Specifically, scDMAC differs fundamentally from existing composite methods such as scDeepCluster and scziDesk in three aspects:<list list-type="order">
<list-item>
<p>The ZINB-based denoising module in scDMAC is used as an explicit pre-denoising stage rather than being jointly optimized with clustering, which stabilizes subsequent representation learning;</p>
</list-item>
<list-item>
<p>A gene-wise masked autoencoder is introduced after denoising to explicitly model inter-gene dependencies via contextual reconstruction, which is absent in prior ZINB-based clustering frameworks;</p>
</list-item>
<list-item>
<p>scDMAC incorporates an adaptive mask prediction and weighted reconstruction strategy, enabling the model to focus learning capacity on corrupted genes while preserving biological signal, rather than treating all reconstruction errors equally.</p>
</list-item>
</list>
</p>
<p>Together, these design choices allow scDMAC to address both zero inflation and gene dependency learning in a coordinated manner, going beyond architectural variations of existing deep clustering approaches. We evaluate scDMAC on multiple widely used scRNA-seq benchmarks and demonstrate consistent improvements in clustering accuracy, stability, and robustness to dropout compared with state-of-the-art methods. scDMAC delivers clearer separation of canonical cell types, more reliable identification of rare populations, and improved reproducibility across noisy conditions. Collectively, these results show that combining a statistically grounded noise model with masked contextual learning is an effective strategy for extracting biologically meaningful embeddings from scRNA-seq data, thereby improving downstream tasks such as cell-type annotation, trajectory inference, and differential expression analysis.</p>
</sec>
<sec sec-type="methods" id="s2">
<label>2</label>
<title>Methods</title>
<p>
<xref ref-type="fig" rid="F1">Figure 1</xref> shows the flowchart of the single-cell RNA sequencing cluster method based on denoising and masking learning. To optimize single-cell RNA sequencing (scRNA-seq) data for contrastive learning models, the data first undergoes normalization and log transformation, followed by gene filtering, and finally construction of a k-nearest neighbor (KNN) graph based on cosine distance.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>The flowchart of single-cell RNA sequencing data based on denoising and masking learning (scDMAC).</p>
</caption>
<graphic xlink:href="fbinf-06-1758257-g001.tif">
<alt-text content-type="machine-generated">Flowchart diagram depicting a gene expression data processing pipeline. Steps include data preprocessing, ZINB noise reduction, generation of disturbed and mask data, mask encoder-decoder, and weighted clustering of three cell types shown by colored dots.</alt-text>
</graphic>
</fig>
<p>scRNA-seq data often exhibits substantial variation in total gene expression per cell due to differences in sequencing depth, which compromises the comparability of expression values across cells. To mitigate this technical bias, expression values are normalized. Specifically, for the expression value <italic>X<sub>ij</sub>
</italic> of gene <italic>j</italic> in cell <italic>i</italic>, the normalized value is calculated in <xref ref-type="disp-formula" rid="e1">Equation 1</xref>.<disp-formula id="e1">
<mml:math id="m1">
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>G</mml:mi>
</mml:msubsup>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#xb7;</mml:mo>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>where <italic>s</italic>
<sub>0</sub> is a scaling factor (set to 10,000 in this study). This adjusts the total expression of each cell to a common scale, reducing the impact of sequencing depth.</p>
<p>Log1p normalization, which is defined as <xref ref-type="disp-formula" rid="e2">Equation 2</xref>, is applied after denoising and is only used for downstream masked representation learning and clustering. This separation ensures both statistical validity of the ZINB likelihood and numerical stability for deep representation learning.<disp-formula id="e2">
<mml:math id="m2">
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>log</mml:mi>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>ln</mml:mi>
<mml:mtext> </mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
</p>
<p>This helps approximate a normal distribution, making the data more suitable for contrastive learning. Subsequently, highly variable genes are selected using the Scanpy package to minimize the influence of uninformative features. The resulting preprocessed matrix <italic>X</italic> serves as input to the model. Finally, a KNN graph is constructed using cosine distance to represent cell neighborhoods.</p>
<p>At the same time, this study uses data augmentation to enhance model performance by generating variations of the original data. For scRNA-seq data, designed augmentation helps simulate real-world data distribution, improving imputation and downstream tasks such as clustering. This section applies three augmentation strategies:</p>
<p>Masking Gene Expressions: Randomly selected genes (10%) have their expressions set to zero. This mimics &#x201c;dropout&#x201d; events common in scRNA-seq data, encouraging the model to learn contextual relationships for predicting masked values.</p>
<p>Adding Gaussian Noise: To simulate technical variability from sequencing or handling, Gaussian noise with a variance of 0.6 is added, which improves model robustness to noise, especially useful in datasets with high technical variation.</p>
<p>Swapping Expressions Between Neighboring Cells: Based on the KNN graph, a cell&#x2019;s expressions are randomly swapped with those of its neighbors at a ratio of 0.2. This promotes local structural variability and reduces over-reliance on fixed neighborhood patterns.</p>
<p>To denoise the gene expression matrix <italic>X</italic> and capture key characteristics of scRNA-seq data, such as high sparsity and overdispersion. A Zero-Inflated Negative Binomial (ZINB)-based autoencoder is employed, as shown in <xref ref-type="fig" rid="F2">Figure 2</xref>. This model integrates an encoder with a denoising autoencoder architecture inspired by DCA, enhancing its ability to handle scRNA-seq noise and dropout effects. The ZINB module probabilistically models dropout events via the zero-inflation parameter, capturing technical zeros, while the masking strategy serves as a self-supervised regularization mechanism rather than an explicit zero generator. Masked values are only introduced during training and are not interpreted as biological zeros. By decoupling probabilistic dropout modeling from masking-induced perturbations, The preprocessed expression matrix <italic>X</italic> is input into a deep count autoencoder, which uses a ZINB-based loss to reconstruct a denoised expression matrix <italic>X</italic>
<sub>
<italic>z</italic>
</sub>. The ZINB distribution is parameterized by three components: mean (<italic>&#x3bc;</italic>), dispersion (<italic>&#x3b8;</italic>), and dropout probability (<italic>&#x3c0;</italic>). The probability of an observed count is defined as <xref ref-type="disp-formula" rid="e3">Equations 3</xref>, <xref ref-type="disp-formula" rid="e4">4</xref>. <disp-formula id="e3">
<mml:math id="m3">
<mml:mrow>
<mml:mtext>NB</mml:mtext>
<mml:mtext> </mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="normal">&#x393;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>!</mml:mo>
<mml:mi mathvariant="normal">&#x393;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mfrac>
<mml:mi>&#x3b8;</mml:mi>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:msup>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mfrac>
<mml:mi>&#x3bc;</mml:mi>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>X</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
<disp-formula id="e4">
<mml:math id="m4">
<mml:mrow>
<mml:mtext>ZINB</mml:mtext>
<mml:mtext> </mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>&#x3c0;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c0;</mml:mi>
<mml:mi>&#x3b4;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mtext> </mml:mtext>
<mml:mtext>NB</mml:mtext>
<mml:mtext> </mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>where &#x393; denotes the gamma function and &#x3b4;(<italic>X</italic>)represents a point mass at zero. Unlike standard autoencoders, the ZINB model uses three separate fully connected output layers connected to the decoder&#x2019;s final hidden layer to estimate the three parameters, as <xref ref-type="disp-formula" rid="e5">Equations 5</xref>&#x2013;<xref ref-type="disp-formula" rid="e7">7</xref>.<disp-formula id="e5">
<mml:math id="m5">
<mml:mrow>
<mml:mi mathvariant="normal">&#x398;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>exp</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>&#x3b8;</mml:mi>
</mml:msub>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
<disp-formula id="e6">
<mml:math id="m6">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mtext>diag</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>exp</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msub>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
<disp-formula id="e7">
<mml:math id="m7">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a0;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mtext>sigmoid</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>&#x3c0;</mml:mi>
</mml:msub>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>
</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>The structure of ZINB noise reduction module.</p>
</caption>
<graphic xlink:href="fbinf-06-1758257-g002.tif">
<alt-text content-type="machine-generated">Neural network diagram illustrating an autoencoder architecture with an encoder on the left, a decoder on the right, and multilayer connections between them. The output is a reconstructed matrix labeled XZ, with colored circles representing various encoded features such as dropout rate tau, dispersion theta, and mean mu.</alt-text>
</graphic>
</fig>
<p>Here, &#x398;, <italic>M</italic>, and <italic>&#x3a0;</italic> denote the matrices of dispersion, mean, and dropout probability, respectively. <italic>W</italic>
<sub>
<italic>&#x3bc;</italic>
</sub>, <italic>W</italic>
<sub>
<italic>&#x3b8;</italic>
</sub> and <italic>W</italic>
<sub>
<italic>&#x3c0;</italic>
</sub> are the weight matrices for each parameter head, and <italic>D</italic> is the output from the last hidden layer of the decoder. The exponential function ensures non-negativity for mean and dispersion, while sigmoid constrains the dropout probability to [0,1]. The scaling factor <italic>s</italic>
<sub>
<italic>i</italic>
</sub>, obtained during preprocessing, adjusts for cell-specific library size.</p>
<p>The loss function for denoising is the negative log-likelihood of the ZINB distribution as <xref ref-type="disp-formula" rid="e8">Equation 8</xref>.<disp-formula id="e8">
<mml:math id="m8">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mrow>
<mml:mi>z</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>log</mml:mi>
<mml:mtext> </mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mtext>ZINB</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>&#x3c0;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>
</p>
<p>Following the denoising step, variability and perturbation are introduced into the denoised gene expression matrix <italic>X</italic>
<sub>
<italic>z</italic>
</sub> through the following procedure:</p>
<p>First, the expression values of each gene are randomly shuffled within the matrix to preserve intra-gene correlations, resulting in a perturbed matrix <italic>X&#x27;</italic>.</p>
<p>Next, a masking matrix <italic>M</italic> is generated using a Bernoulli distribution <italic>Bernoulli</italic> (<italic>p</italic>
<sub>
<italic>j</italic>
</sub>) for each gene as <xref ref-type="disp-formula" rid="e9">Equation 9</xref>, where <italic>p</italic>
<sub>
<italic>j</italic>
</sub> controls the masking probability for the <italic>j</italic>th gene:<disp-formula id="e9">
<mml:math id="m9">
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:mtext>Bernoulli</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>
</p>
<p>Here, <italic>M</italic>
<sub>
<italic>ij</italic>
</sub> represents the element in the <italic>i</italic>th row and <italic>j</italic>th column of the mask.</p>
<p>Finally, the masked gene expression matrix <italic>X</italic>
<sub>
<italic>M</italic>
</sub> is obtained via element-wise operations as <xref ref-type="disp-formula" rid="e10">Equation 10</xref>.<disp-formula id="e10">
<mml:math id="m10">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
<mml:mo>&#xb7;</mml:mo>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>z</mml:mi>
</mml:msub>
<mml:mo>&#xb7;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>where <inline-formula id="inf1">
<mml:math id="m11">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is an element of the masked matrix, and <inline-formula id="inf2">
<mml:math id="m12">
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <italic>X</italic>
<sub>
<italic>z</italic>
</sub> are elements from the shuffled and denoised matrices, respectively.</p>
<p>Importantly, the ZINB-based denoising autoencoder is trained on raw count data prior to masking, and masked entries are excluded from the ZINB likelihood. Masking is applied only to the denoised output for subsequent self-supervised representation learning.</p>
<p>
<xref ref-type="fig" rid="F3">Figure 3</xref> shows the masking autoencoder, which consists of three main components: an encoder, a mask predictor, and a decoder. The encoder transforms the masked gene expression matrix <italic>X</italic>
<sub>
<italic>M</italic>
</sub> into a low-dimensional embedding <italic>Z</italic>. For an encoder with <italic>F</italic> layers, the output of the <italic>f</italic>th layer is computed as <xref ref-type="disp-formula" rid="e11">Equation 11</xref>.<disp-formula id="e11">
<mml:math id="m13">
<mml:mrow>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mtext> </mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>where &#x3c3; is the activation function. The final layer applies a linear transformation (i.e., identity activation), and its output <italic>Z</italic>
<sub>
<italic>F</italic>
</sub> serves as the embedding <italic>Z</italic>.</p> <fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>The structure of Mask Encoder-Decoder module.</p>
</caption>
<graphic xlink:href="fbinf-06-1758257-g003.tif">
<alt-text content-type="machine-generated">Flowchart-style diagram illustrating a gene expression model workflow. Gene data is input, processed through an encoder to generate embedding Z, passed to a decoder via concatenation, and produces a prediction result matrix labeled Lmask. Arrows indicate data flow, and key components are boxed and labeled, including masking prediction data.</alt-text>
</graphic>
</fig>
<p>To address potential inaccuracies in the masked input, the model first uses a mask predictor to estimate which expression values have been modified, producing a predicted mask matrix <italic>M&#x27;</italic>. It is implemented as a linear layer trained with cross-entropy loss as <xref ref-type="disp-formula" rid="e12">Equation 12</xref>.<disp-formula id="e12">
<mml:math id="m14">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>log</mml:mi>
<mml:mtext> </mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msubsup>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>
</p>
<p>The decoder reconstructs the gene expression matrix using the embedding Zand the predicted mask <italic>M&#x27;</italic>. A weighted mean squared error (MSE) loss is applied to emphasize masked genes as <xref ref-type="disp-formula" rid="e13">Equation 13</xref>. And weight <italic>W<sub>ij</sub>
</italic> is defined as <xref ref-type="disp-formula" rid="e14">Equation 14</xref>.<disp-formula id="e13">
<mml:math id="m15">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#xb7;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>X</mml:mi>
<mml:mo>&#x223c;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
<label>(13)</label>
</disp-formula>
<disp-formula id="e14">
<mml:math id="m16">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:mo>&#xb7;</mml:mo>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xb7;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(14)</label>
</disp-formula>
</p>
<p>Here, <italic>&#x3bb;</italic> is a hyperparameter assigning a higher weight to masked genes. The total loss is a weighted combination of the two objectives as <xref ref-type="disp-formula" rid="e15">Equation 15</xref>.<disp-formula id="e15">
<mml:math id="m17">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>&#x3b3;</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#xb7;</mml:mo>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x3b3;</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xb7;</mml:mo>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(15)</label>
</disp-formula>where <italic>&#x3b3;</italic>
<sub>
<italic>m</italic>
</sub> balances the two terms. During clustering, only the embedding <italic>Z</italic> generated by the encoder is used.</p>
<p>To enhance the clustering performance of the model, a weighted soft clustering module is introduced. This module employs a weighted K-means approach to assign data points (cells) to cluster centers while preserving local similarity structures among cells with comparable gene expression profiles. The weighted K-means loss is defined as <xref ref-type="disp-formula" rid="e16">Equation 16</xref>.<disp-formula id="e16">
<mml:math id="m18">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>C</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msup>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
<label>(16)</label>
</disp-formula>where <italic>w</italic>
<sub>
<italic>ik</italic>
</sub> is the weight for cell and cluster, <italic>z</italic>
<sub>
<italic>i</italic>
</sub> is the embedding of cell <italic>i</italic>, and <italic>c</italic>
<sub>
<italic>k</italic>
</sub> is the center of cluster <italic>k</italic>, updated by <xref ref-type="disp-formula" rid="e17">Equation 17</xref>.<disp-formula id="e17">
<mml:math id="m19">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(17)</label>
</disp-formula>
</p>
<p>The weight is computed using <xref ref-type="disp-formula" rid="e18">Equation 18</xref>.<disp-formula id="e18">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>w</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>exp</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>C</mml:mi>
</mml:msubsup>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>exp</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(18)</label>
</disp-formula>
</p>
<p>Subsequently, the weights are sharpened using a Markov-like inflation step by <xref ref-type="disp-formula" rid="e19">Equation 19</xref>.<disp-formula id="e19">
<mml:math id="m21">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>w</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:msubsup>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>C</mml:mi>
</mml:msubsup>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>w</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(19)</label>
</disp-formula>where is a hyperparameter (default 1). To better capture similarity relationships, a student&#x2019;s t-distribution is used to model pairwise cell similarities. The soft assignment probability <italic>q</italic>
<sub>
<italic>ij</italic>
</sub> is given by <xref ref-type="disp-formula" rid="e20">Equation 20</xref>:<disp-formula id="e20">
<mml:math id="m22">
<mml:mrow>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>/</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:mfrac>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>/</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:mfrac>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(20)</label>
</disp-formula>with <italic>t</italic> set to 1. A target distribution <italic>p</italic> is derived from <italic>q</italic> to strengthen high-confidence assignments as <xref ref-type="disp-formula" rid="e21">Equation 21</xref>.<disp-formula id="e21">
<mml:math id="m23">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>/</mml:mo>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msubsup>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>/</mml:mo>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(21)</label>
</disp-formula>
</p>
<p>The <xref ref-type="disp-formula" rid="e22">Equation 22</xref> defines the clustering loss, the KL divergence between <italic>p</italic> and <italic>q</italic>:<disp-formula id="e22">
<mml:math id="m24">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>log</mml:mi>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(22)</label>
</disp-formula>
</p>
<p>The overall training objective combines all losses as <xref ref-type="disp-formula" rid="e23">Equation 23</xref>.<disp-formula id="e23">
<mml:math id="m25">
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mrow>
<mml:mi>z</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b2;</mml:mi>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3c6;</mml:mi>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(23)</label>
</disp-formula>where <italic>&#x3b1;</italic>, <italic>&#x3b2;</italic>, <italic>&#x3c6;</italic>, <italic>&#x3b8;</italic> are tunable hyperparameters.</p>
</sec>
<sec id="s3">
<label>3</label>
<title>Experiment</title>
<p>This study evaluates the model&#x2019;s clustering and classification performance using six annotated scRNA-seq datasets from both mouse and human. These datasets cover multiple biological systems, diverse cell types, and varying cluster sizes, and were generated using different RNA extraction protocols and sequencing platforms (e.g., Smart-seq, Drop-seq). The datasets&#x2014;Adam, Deng, Muraro, Pollen, Chen, and Zeisel&#x2014;are summarized in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Single-cell sequencing dataset used in the experiment.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Datasets</th>
<th align="center">Cell count</th>
<th align="center">Numbers of genes</th>
<th align="center">Number of cell categories</th>
<th align="center">Sequencing platform</th>
<th align="center">Geo accession number</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Adam</td>
<td align="center">3,660</td>
<td align="center">23,797</td>
<td align="center">8</td>
<td align="center">Drop-seq</td>
<td align="center">GSE94333</td>
</tr>
<tr>
<td align="center">Deng</td>
<td align="center">268</td>
<td align="center">22,431</td>
<td align="center">6</td>
<td align="center">Smart-seq</td>
<td align="center">GSE45719</td>
</tr>
<tr>
<td align="center">Muraro</td>
<td align="center">2,126</td>
<td align="center">19,127</td>
<td align="center">10</td>
<td align="center">CEL-Seq2</td>
<td align="center">GSE85241</td>
</tr>
<tr>
<td align="center">Pollen</td>
<td align="center">301</td>
<td align="center">23,730</td>
<td align="center">11</td>
<td align="center">SMARTer</td>
<td align="center">GSE124299</td>
</tr>
<tr>
<td align="center">Chen</td>
<td align="center">12,089</td>
<td align="center">23,284</td>
<td align="center">46</td>
<td align="center">Drop-seq</td>
<td align="center">GSE87544</td>
</tr>
<tr>
<td align="center">Zeisel</td>
<td align="center">3,005</td>
<td align="center">19,958</td>
<td align="center">12</td>
<td align="center">STRT-seq</td>
<td align="center">GSE60361</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>For fair comparison, all baseline methods were implemented using recommended settings from their original publications or official repositories. Highly variable genes (HVGs) were selected using Scanpy with default parameters unless otherwise specified. Latent dimensionality was set to 10&#x2013;32 depending on method defaults.</p>
<p>Graph-based methods (Seurat, graph-sc) used k-nearest neighbor graphs with k &#x3d; 15&#x2013;30 and Leiden clustering with default resolution. Deep clustering baselines were run with identical train/validation splits and optimized using Adam. For stochastic methods, each experiment was repeated multiple times with different random seeds, and the best-performing configuration was reported.</p>
<p>The base environment of the experimental platform utilizes an Intel Xeon E5-2,630 v4 CPU, a Tesla 2080Ti GPU with 24 GB of VRAM, and 64 GB of memory. The operating system is Ubuntu 18.04.6, PyTorch version 1.13.1, and Python 3.8.16.</p>
<sec id="s3-1">
<label>3.1</label>
<title>Comparative clustering methods</title>
<p>This section compares the scDMAC method with PCA (<xref ref-type="bibr" rid="B21">Todorov et al., 2018</xref>) &#x2b;K-means (<xref ref-type="bibr" rid="B10">Hartigan and Wong, 1979</xref>), Seurat (<xref ref-type="bibr" rid="B20">Tian et al., 2019</xref>), scDeepCluster (<xref ref-type="bibr" rid="B5">Eraslan et al., 2019</xref>), scziDesk (<xref ref-type="bibr" rid="B2">Chen et al., 2020</xref>), scVI (<xref ref-type="bibr" rid="B15">Lopez et al., 2018</xref>), graph-sc (<xref ref-type="bibr" rid="B3">Ciortan and Defrance, 2022</xref>), AutoClass (<xref ref-type="bibr" rid="B14">Li et al., 2022</xref>), scDCCA (<xref ref-type="bibr" rid="B22">Wang et al., 2023</xref>), CellBRF (<xref ref-type="bibr" rid="B25">Xu et al., 2023</xref>), and CTEC (<xref ref-type="bibr" rid="B23">Wang et al., 2024</xref>). Validation is performed across six public datasets, with results from some papers being reproducible.</p>
<p>PCA &#x2b; K-means (<xref ref-type="bibr" rid="B21">Todorov et al., 2018</xref>; <xref ref-type="bibr" rid="B10">Hartigan and Wong, 1979</xref>) applies principal component analysis (PCA) to project high-dimensional scRNA-seq data into a lower-dimensional subspace, reducing noise and redundancy. K-means clustering is then performed in the reduced space to partition cells into groups by iteratively optimizing cluster centroids.</p>
<p>Seurat (<xref ref-type="bibr" rid="B20">Tian et al., 2019</xref>) first constructs a k-nearest neighbor graph based on gene expression profiles, then builds a shared nearest neighbor (SNN) graph to refine cell-to-cell similarities. Community detection is applied on the SNN graph to identify cell clusters.</p>
<p>scDeepCluster (<xref ref-type="bibr" rid="B5">Eraslan et al., 2019</xref>) employs a denoising autoencoder that injects Gaussian noise into the encoder to improve robustness. The model jointly learns a low-dimensional latent representation and cluster assignments using a KL divergence-based clustering loss, with a decoder utilizing a ZINB loss to model scRNA-seq data characteristics.</p>
<p>scziDesk (<xref ref-type="bibr" rid="B2">Chen et al., 2020</xref>) integrates a denoising autoencoder with a clustering module that alternates between data reconstruction and soft clustering. It applies a soft self-training K-means approach to iteratively refine cluster labels in the latent space.</p>
<p>scVI (<xref ref-type="bibr" rid="B15">Lopez et al., 2018</xref>) is based on a variational autoencoder (VAE) framework that uses a zero-inflated negative binomial (ZINB) likelihood to model scRNA-seq data, explicitly accounting for dropout events and over-dispersion. It infers a latent representation that is used for downstream clustering.</p>
<p>Graph-SC (<xref ref-type="bibr" rid="B3">Ciortan and Defrance, 2022</xref>) utilizes a graph autoencoder structure to model relationships between cells and genes. It can incorporate external biological networks (e.g., gene-gene interaction networks) to enhance the graph representation and improve clustering performance.</p>
<p>AutoClass (<xref ref-type="bibr" rid="B14">Li et al., 2022</xref>) adopts a dual-network architecture consisting of an autoencoder for denoising and feature extraction, and a classifier that promotes discriminative latent structures. The model is trained to preserve biological information while reducing technical noise.</p>
<p>scDCCA (<xref ref-type="bibr" rid="B22">Wang et al., 2023</xref>) applies deep canonical correlation analysis (DCCA) with a dual contrastive learning module to integrate multi-view information and learn invariant features. It aims to improve clustering by maximizing agreement between augmented views of the data.</p>
<p>CellBRF (<xref ref-type="bibr" rid="B25">Xu et al., 2023</xref>) introduces a random forest-based feature selection method within a spectral clustering pipeline. It employs a class-balancing strategy to mitigate the impact of imbalanced cell type distributions on gene importance estimation.</p>
<p>CTEC (<xref ref-type="bibr" rid="B23">Wang et al., 2024</xref>) is a cross-table ensemble clustering approach that combines multiple base clustering results using two refinement strategies: distribution-based and outlier-based reclustering, leading to a robust consensus partition.</p>
<p>These methods represent a range of classical and state-of-the-art approaches in scRNA-seq clustering, encompassing linear models, graph-based techniques, deep learning architectures, and ensemble strategies.</p>
</sec>
<sec id="s3-2">
<label>3.2</label>
<title>Evaluation metrics</title>
<p>In this study, Clustering performance is evaluated using external metrics that leverage known ground truth labels. This study employs the Adjusted Rand Index (ARI) and Normalized Mutual Information (NMI) for this purpose. The Rand Index (RI) measures similarity between the clustering result and true labels, defined as:<disp-formula id="e24">
<mml:math id="m26">
<mml:mrow>
<mml:mtext>RI</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(24)</label>
</disp-formula>where TP, TN, FP, and FN represent the numbers of true positives, true negatives, false positives, and false negatives in pairwise cluster assignments. To correct for chance agreement, the Adjusted Rand Index (ARI) is used:<disp-formula id="e25">
<mml:math id="m27">
<mml:mrow>
<mml:mtext>ARI</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>I</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>max</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(25)</label>
</disp-formula>
</p>
<p>ARI values near 0 indicate random clustering, while higher values reflect better alignment with true labels.</p>
<p>NMI assesses the mutual dependence between the clustering result <italic>V</italic> and true labels <italic>U</italic>, normalized by their entropies:<disp-formula id="e26">
<mml:math id="m28">
<mml:mrow>
<mml:mtext>NMI</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>U</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mo>&#xb7;</mml:mo>
<mml:mtext>MI</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>U</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">H</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="normal">H</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(26)</label>
</disp-formula>where MI(<italic>U</italic>, <italic>V</italic>)is the mutual information between <italic>U</italic> and <italic>V</italic>, and H(<italic>U</italic>), H(<italic>V</italic>) denote their entropies. NMI ranges between 0 (independent) and one (perfect match).<disp-formula id="e27">
<mml:math id="m29">
<mml:mrow>
<mml:mtext>MI</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>U</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>log</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(27)</label>
</disp-formula>
</p>
<p>Where <italic>p</italic> (<italic>u</italic>,<italic>v</italic>) is the joint probability distribution function of <italic>u</italic> and <italic>v</italic>, <italic>p</italic>(<italic>u</italic>) denotes the probability of a data point belonging to the true class, <italic>p</italic>(<italic>v</italic>) denotes the probability of a data point belonging to the clustered class. Both ARI and NMI provide robust, normalized measures for comparing clustering performance across datasets.</p>
</sec>
<sec id="s3-3">
<label>3.3</label>
<title>Clustering results analysis</title>
<p>The ARI and NMI results of different methods across the datasets are summarized in <xref ref-type="table" rid="T2">Tables 2</xref>, <xref ref-type="table" rid="T3">3</xref>, with the best and second-best performances highlighted in bold and underlined, respectively. We performed repeated runs with different random seeds and observed low variance across runs (typically &#x3c;0.01 in ARI).</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>ARI values for different methods on sequencing datasets.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Method</th>
<th align="center">Adam</th>
<th align="center">Deng</th>
<th align="center">Muraro</th>
<th align="center">Pollen</th>
<th align="center">Chen</th>
<th align="center">Zeisel</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">PCA &#x2b; K-means</td>
<td align="center">0.5354</td>
<td align="center">0.6013</td>
<td align="center">0.6810</td>
<td align="center">0.7563</td>
<td align="center">0.2284</td>
<td align="center">0.4732</td>
</tr>
<tr>
<td align="center">Seurat (<xref ref-type="bibr" rid="B20">Tian et al., 2019</xref>)</td>
<td align="center">0.4973</td>
<td align="center">0.3249</td>
<td align="center">0.4463</td>
<td align="center">0.7668</td>
<td align="center">0.6213</td>
<td align="center">0.3271</td>
</tr>
<tr>
<td align="center">scDeepCluster (<xref ref-type="bibr" rid="B5">Eraslan et al., 2019</xref>)</td>
<td align="center">0.8187</td>
<td align="center">0.7294</td>
<td align="center">0.7442</td>
<td align="center">0.8832</td>
<td align="center">0.3715</td>
<td align="center">0.5164</td>
</tr>
<tr>
<td align="center">scziDesks (<xref ref-type="bibr" rid="B2">Chen et al., 2020</xref>)</td>
<td align="center">0.7894</td>
<td align="center">0.8487</td>
<td align="center">0.7973</td>
<td align="center">0.8701</td>
<td align="center">0.7902</td>
<td align="center">0.6261</td>
</tr>
<tr>
<td align="center">scVI (<xref ref-type="bibr" rid="B15">Lopez et al., 2018</xref>)</td>
<td align="center">0.6197</td>
<td align="center">0.3243</td>
<td align="center">0.5017</td>
<td align="center">0.8901</td>
<td align="center">0.4796</td>
<td align="center">0.3552</td>
</tr>
<tr>
<td align="center">graph-sc (<xref ref-type="bibr" rid="B3">Ciortan and Defrance, 2022</xref>)</td>
<td align="center">0.6217</td>
<td align="center">0.8603</td>
<td align="center">0.8042</td>
<td align="center">0.8841</td>
<td align="center">0.5361</td>
<td align="center">0.6013</td>
</tr>
<tr>
<td align="center">AutoClass (<xref ref-type="bibr" rid="B14">Li et al., 2022</xref>)</td>
<td align="center">0.5321</td>
<td align="center">0.8577</td>
<td align="center">0.7654</td>
<td align="center">0.8664</td>
<td align="center">0.8527</td>
<td align="center">0.5791</td>
</tr>
<tr>
<td align="center">scDCCA (<xref ref-type="bibr" rid="B22">Wang et al., 2023</xref>)</td>
<td align="center">0.9201</td>
<td align="center">0.8794</td>
<td align="center">0.8321</td>
<td align="center">0.8967</td>
<td align="center">0.8436</td>
<td align="center">0.6257</td>
</tr>
<tr>
<td align="center">CellBRF (<xref ref-type="bibr" rid="B25">Xu et al., 2023</xref>)</td>
<td align="center">0.8193</td>
<td align="center">0.8697</td>
<td align="center">0.8127</td>
<td align="center">0.7891</td>
<td align="center">0.8211</td>
<td align="center">0.6154</td>
</tr>
<tr>
<td align="center">CTEC (<xref ref-type="bibr" rid="B23">Wang et al., 2024</xref>)</td>
<td align="center">0.7087</td>
<td align="center">0.8591</td>
<td align="center">0.8191</td>
<td align="center">0.8021</td>
<td align="center">0.7993</td>
<td align="center">0.6346</td>
</tr>
<tr>
<td align="center">scCGC (<xref ref-type="bibr" rid="B22">Wang et al., 2023</xref>)</td>
<td align="center">0.9311</td>
<td align="center">0.8651</td>
<td align="center">0.8541</td>
<td align="center">0.9107</td>
<td align="center">
<bold>0.8913</bold>
</td>
<td align="center">0.7911</td>
</tr>
<tr>
<td align="center">scDMAC (ours)</td>
<td align="center">
<bold>0.9354</bold>
</td>
<td align="center">
<bold>0.8896</bold>
</td>
<td align="center">
<bold>0.8698</bold>
</td>
<td align="center">
<bold>0.9151</bold>
</td>
<td align="center">0.8821</td>
<td align="center">
<bold>0.7983</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The best values highlighted in bold.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>NMI for different methods on sequencing datasets.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Method</th>
<th align="center">Adam</th>
<th align="center">Deng</th>
<th align="center">Muraro</th>
<th align="center">Pollen</th>
<th align="center">Chen</th>
<th align="center">Zeisel</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">PCA &#x2b; K-means</td>
<td align="center">0.6871</td>
<td align="center">0.6140</td>
<td align="center">0.6882</td>
<td align="center">0.8663</td>
<td align="center">0.6987</td>
<td align="center">0.4732</td>
</tr>
<tr>
<td align="center">Seurat (<xref ref-type="bibr" rid="B20">Tian et al., 2019</xref>)</td>
<td align="center">0.7384</td>
<td align="center">0.6595</td>
<td align="center">0.7055</td>
<td align="center">0.9011</td>
<td align="center">0.7544</td>
<td align="center">0.5817</td>
</tr>
<tr>
<td align="center">scDeepCluster (<xref ref-type="bibr" rid="B5">Eraslan et al., 2019</xref>)</td>
<td align="center">0.8401</td>
<td align="center">0.7525</td>
<td align="center">0.8110</td>
<td align="center">0.8562</td>
<td align="center">0.8237</td>
<td align="center">0.6106</td>
</tr>
<tr>
<td align="center">scziDesks (<xref ref-type="bibr" rid="B2">Chen et al., 2020</xref>)</td>
<td align="center">0.8373</td>
<td align="center">0.8657</td>
<td align="center">0.8159</td>
<td align="center">0.9022</td>
<td align="center">0.7944</td>
<td align="center">0.6368</td>
</tr>
<tr>
<td align="center">scVI (<xref ref-type="bibr" rid="B15">Lopez et al., 2018</xref>)</td>
<td align="center">0.7624</td>
<td align="center">0.8545</td>
<td align="center">0.8032</td>
<td align="center">0.9231</td>
<td align="center">0.7795</td>
<td align="center">0.6242</td>
</tr>
<tr>
<td align="center">graph-sc (<xref ref-type="bibr" rid="B3">Ciortan and Defrance, 2022</xref>)</td>
<td align="center">0.7308</td>
<td align="center">0.7473</td>
<td align="center">0.7980</td>
<td align="center">0.9304</td>
<td align="center">0.7852</td>
<td align="center">0.6417</td>
</tr>
<tr>
<td align="center">AutoClass (<xref ref-type="bibr" rid="B14">Li et al., 2022</xref>)</td>
<td align="center">0.6996</td>
<td align="center">0.8631</td>
<td align="center">0.7801</td>
<td align="center">0.9283</td>
<td align="center">0.7312</td>
<td align="center">0.6391</td>
</tr>
<tr>
<td align="center">scDCCA (<xref ref-type="bibr" rid="B22">Wang et al., 2023</xref>)</td>
<td align="center">0.9021</td>
<td align="center">0.8613</td>
<td align="center">0.8207</td>
<td align="center">
<bold>0.9431</bold>
</td>
<td align="center">0.8476</td>
<td align="center">0.7402</td>
</tr>
<tr>
<td align="center">CellBRF (<xref ref-type="bibr" rid="B25">Xu et al., 2023</xref>)</td>
<td align="center">0.8193</td>
<td align="center">0.8794</td>
<td align="center">0.8267</td>
<td align="center">0.9197</td>
<td align="center">0.8562</td>
<td align="center">0.7297</td>
</tr>
<tr>
<td align="center">CTEC (<xref ref-type="bibr" rid="B23">Wang et al., 2024</xref>)</td>
<td align="center">0.7771</td>
<td align="center">0.8752</td>
<td align="center">0.8039</td>
<td align="center">0.9064</td>
<td align="center">0.7993</td>
<td align="center">0.6915</td>
</tr>
<tr>
<td align="center">scCGC (<xref ref-type="bibr" rid="B22">Wang et al., 2023</xref>)</td>
<td align="center">0.9107</td>
<td align="center">0.8691</td>
<td align="center">
<bold>0.8541</bold>
</td>
<td align="center">0.9427</td>
<td align="center">0.8917</td>
<td align="center">
<bold>0.7751</bold>
</td>
</tr>
<tr>
<td align="center">scDMAC (ours)</td>
<td align="center">
<bold>0.9224</bold>
</td>
<td align="center">
<bold>0.8802</bold>
</td>
<td align="center">0.8459</td>
<td align="center">0.9363</td>
<td align="center">
<bold>0.8973</bold>
</td>
<td align="center">0.7692</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The best values highlighted in bold.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>As shown, scDMAC achieves the highest ARI values on five datasets (Adam, Deng, Muraro, Pollen, and Zeisel), demonstrating its robustness and accuracy in clustering cells from diverse platforms, tissues, and organisms. This result underscores the effectiveness of its masked autoencoder in feature learning and the advantage of weighted soft clustering. Compared to scCGC, scDMAC&#x2019;s improved feature reconstruction loss leads to better noise suppression, particularly on the Deng and Muraro datasets, where the ARI improvement exceeds 0.01. On the Chen dataset, scDMAC&#x2019;s ARI is less than 0.01 lower than the top method, which may be attributed to the presence of continuous or transitional cell types in this dataset, making clear cluster separation challenging.</p>
<p>In terms of average ranking based on ARI, scDMAC achieves the highest overall ranking, followed by scCGC, scDCCA, and CellBRF.</p>
<p>Regarding NMI, scDMAC also performs strongly, obtaining the highest scores on the Adam, Deng, and Chen datasets. On the Pollen dataset, scDCCA slightly outperforms scDMAC, likely due to its distributional regularization benefiting feature modeling on this data. Although scDMAC&#x2019;s NMI is marginally lower than scCGC on the Muraro and Zeisel datasets, it remains competitive, affirming the overall efficacy of the proposed approach.</p>
<p>As shown in <xref ref-type="table" rid="T4">Table 4</xref>, scDMAC consistently achieves the highest silhouette scores on five out of six datasets (Adam, Deng, Muraro, Chen, and Zeisel), indicating superior intrinsic clustering quality compared to representative baseline methods. In particular, scDMAC shows clear improvements over PCA &#x2b; K-means and scDeepCluster across all datasets, highlighting the benefit of deep representation learning combined with denoising and masking strategies.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Silhouette score for different methods on sequencing datasets.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Method</th>
<th align="center">Adam</th>
<th align="center">Deng</th>
<th align="center">Muraro</th>
<th align="center">Pollen</th>
<th align="center">Chen</th>
<th align="center">Zeisel</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">PCA &#x2b; K-means</td>
<td align="center">0.4766</td>
<td align="center">0.4204</td>
<td align="center">0.3909</td>
<td align="center">0.5663</td>
<td align="center">0.4842</td>
<td align="center">0.3606</td>
</tr>
<tr>
<td align="center">scDeepCluster (<xref ref-type="bibr" rid="B5">Eraslan et al., 2019</xref>)</td>
<td align="center">0.6001</td>
<td align="center">0.4708</td>
<td align="center">0.6110</td>
<td align="center">0.5562</td>
<td align="center">0.5585</td>
<td align="center">0.4707</td>
</tr>
<tr>
<td align="center">scDCCA (<xref ref-type="bibr" rid="B22">Wang et al., 2023</xref>)</td>
<td align="center">0.6233</td>
<td align="center">0.5701</td>
<td align="center">0.7211</td>
<td align="center">
<bold>0.6428</bold>
</td>
<td align="center">0.5725</td>
<td align="center">0.5438</td>
</tr>
<tr>
<td align="center">scDMAC (ours)</td>
<td align="center">
<bold>0.6477</bold>
</td>
<td align="center">
<bold>0.5908</bold>
</td>
<td align="center">
<bold>0.7374</bold>
</td>
<td align="center">0.6230</td>
<td align="center">
<bold>0.5942</bold>
</td>
<td align="center">
<bold>0.5521</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The best values highlighted in bold.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Compared with scDCCA, scDMAC achieves comparable or higher silhouette scores on most datasets, with the only exception being the Pollen dataset, where scDCCA slightly outperforms scDMAC. This minor difference may be attributed to dataset-specific characteristics, such as a small number of cells and strong inter-gene correlations, which can favor contrastive learning&#x2013;based representations. Overall, these results demonstrate that scDMAC produces more compact and well-separated clusters in the latent space, providing strong internal validation independent of external annotations.</p>
<p>To validate the clustering performance, we applied t-SNE to visualize the six scRNA-seq datasets. As shown in <xref ref-type="fig" rid="F4">Figure 4</xref>, the visualization results exhibit improved cluster separation and fewer outlier cells, indicating enhanced discrimination and denoising capability of the proposed model.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Visualization of scDMAC&#x2019;s clustering results. <bold>(a)</bold> Adam, <bold>(b)</bold> Deng, <bold>(c)</bold> Muraro, <bold>(d)</bold> Pollen, <bold>(e)</bold> Chen, <bold>(f)</bold> Zeisel.</p>
</caption>
<graphic xlink:href="fbinf-06-1758257-g004.tif">
<alt-text content-type="machine-generated">Six-part figure showing scatter plots labeled a through f, each with different multi-color clusters on an XY grid ranging from negative one hundred to positive one hundred. Each plot appears to represent variations in cluster separation or data groupings across different methods or conditions.</alt-text>
</graphic>
</fig>
<p>We further evaluated the impact of the masking ratio on model performance. <xref ref-type="fig" rid="F5">Figure 5</xref> shows the ARI and NMI scores under masking ratios ranging from 0.1 to 0.8.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>ARI and NMI values of scDMAC under different disturbance ratios. <bold>(a)</bold> Adam, <bold>(b)</bold> Deng, <bold>(c)</bold> Muraro, <bold>(d)</bold> Pollen, <bold>(e)</bold> Chen, <bold>(f)</bold> Zeisel.</p>
</caption>
<graphic xlink:href="fbinf-06-1758257-g005.tif">
<alt-text content-type="machine-generated">Six-panel figure displaying line graphs labeled a through f, each showing ARI and NMI scores versus weight from 0.1 to 0.8. Both ARI (red squares) and NMI (blue circles) generally peak around weights 0.2 to 0.3 before declining, with all scores ranging between 0.6 and 1.0. Each subplot uses similar axes and legend, enabling direct visual comparison of trends across different datasets or conditions.</alt-text>
</graphic>
</fig>
<p>Performance improves as the ratio increases to 0.3, but declines with higher ratios, suggesting that excessive masking may hinder the model&#x2019;s ability to reconstruct meaningful gene expression patterns. At low masking ratios (0.1&#x2013;0.3), the model achieves peak or near-peak performance, with both ARI and NMI exhibiting their highest values in this range. For instance, ARI on the Adam, Deng, Muraro, and Pollen datasets reaches maxima around a masking ratio of 0.2&#x2013;0.3, indicating that a moderate level of input perturbation effectively regularizes the model and enhances generalization. Performance at 0.2 and 0.3 is notably stable, with ARI improvements of &#x223c;1&#x2013;3% compared to 0.1 across most datasets.</p>
<p>When the masking ratio increases beyond 0.4, both ARI and NMI show a gradual decline, becoming substantial at masking ratios &#x2265;0.6. This degradation suggests that excessive masking removes too much biological signal, making it difficult for the model to reconstruct informative representations needed for accurate clustering. The results indicate that a masking ratio in the range of 0.2&#x2013;0.3 provides the optimal balance between regularization and information retention.</p>
<p>The weighting of the reconstruction loss also influences model behavior. <xref ref-type="fig" rid="F6">Figure 6</xref> illustrates the effect of varying the weight assigned to corrupted genes (from 0.6 to 0.9).</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>ARI and NMI values of scDMAC under Different Reconstruction Weights. <bold>(a)</bold> Adam, <bold>(b)</bold> Deng, <bold>(c)</bold> Muraro, <bold>(d)</bold> Pollen, <bold>(e)</bold> Chen, <bold>(f)</bold> Zeisel.</p>
</caption>
<graphic xlink:href="fbinf-06-1758257-g006.tif">
<alt-text content-type="machine-generated">Panel of six line charts labeled a to f compares ARI (red squares) and NMI (blue circles) scores versus Weight on the x-axis, all showing peaks around mid-range weights and declines at higher weights.</alt-text>
</graphic>
</fig>
<p>Across nearly all datasets, both ARI and NMI improve steadily from 0.6 to 0.7, reaching a performance peak around 0.7, where the model achieves its best or near-best scores, for example, Adam (ARI 0.93494, NMI 0.92289), Deng (ARI 0.88916, NMI 0.88072), and Pollen (ARI 0.91566, NMI 0.93614). Increasing the reconstruction loss beyond this range (&#x2265;0.8) leads to a noticeable and consistent decline in performance, suggesting that overly strong reconstruction constraints may hinder the model&#x2019;s ability to learn discriminative low-dimensional features for clustering.</p>
<p>Importantly, the optimal region (0.65&#x2013;0.75) is robust across datasets, indicating that the balance between reconstruction and clustering objectives is stable and not dataset-specific. Overall, these results demonstrate that selecting an appropriate reconstruction loss coefficient is critical, and a coefficient around 0.7 provides the best trade-off between representation fidelity and clustering separability.</p>
<p>Finally, the weight of the masking prediction loss was tuned between 0.5 and 0.8. As shown in <xref ref-type="fig" rid="F7">Figure 7</xref>, the best overall performance is consistently observed at a masking-prediction loss of 0.65, which yields the highest or near-highest ARI values for all datasets, including Adam (0.935), Deng (0.89), Muraro (0.87), Pollen (0.91), Chen (0.88), and Zeisel (0.79). This suggests that a moderate masking-prediction strength provides an optimal balance between learning robust masked-feature representations and maintaining sufficient information for discriminative clustering. In contrast, further increasing the loss beyond 0.7 leads to monotonic degradation in clustering accuracy across datasets, indicating that excessive masking-prediction pressure may distort the learned latent space and weaken cluster separability. The consistent cross-dataset trend demonstrates that the optimal setting (approximately 0.65) is stable and generalizable.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>The curve of weight loss and ARI value. <bold>(a)</bold> Adam, <bold>(b)</bold> Deng, <bold>(c)</bold> Muraro, <bold>(d)</bold> Pollen, <bold>(e)</bold> Chen, <bold>(f)</bold> Zeisel.</p>
</caption>
<graphic xlink:href="fbinf-06-1758257-g007.tif">
<alt-text content-type="machine-generated">Line graph comparing ARI values for six datasets (Adam, Muraro, Cheng, Deng, Pollen, Zeisel) across weight values from 0.50 to 0.80. Adam consistently achieves the highest ARI, while Zeisel has the lowest.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3-4">
<label>3.4</label>
<title>Ablation experiments</title>
<p>To evaluate the contribution of each component in scDMAC, we designed three ablated variants: scDMAC-Z: without the denoising module; scDMAC-W: without the weighted reconstruction in the masking autoencoder; scDMAC-P: without the mask prediction module.</p>
<p>As shown in <xref ref-type="table" rid="T5">Table 5</xref>, removing the denoising module (scDMAC-Z) led to noticeable performance degradation, particularly on the Deng, Chen, and Zeisel datasets. For example, on Zeisel, ARI dropped from 0.7983 to 0.7694 and NMI from 0.7692 to 0.7413, indicating that denoising is essential for handling datasets with high technical noise. Removing weighted reconstruction (scDMAC-W) resulted in a moderate decline across most datasets. The most significant drop occurred on Pollen (ARI from 0.9151 to 0.9021), suggesting that the weighting mechanism helps capture key features in datasets with complex cell types. When the mask prediction module was ablated (scDMAC-P), performance decreased substantially, e.g., on Muraro, ARI fell from 0.8698 to 0.8426. This demonstrates the importance of adaptively estimating masked regions for robust feature learning. In summary, the full scDMAC model benefits from the synergistic effect of its denoising, weighted reconstruction, and mask prediction components, with each playing a distinct role in improving clustering accuracy and stability across diverse scRNA-seq datasets.</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Ablation experiments.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Method</th>
<th align="center">Evaluation criteria</th>
<th align="center">Adam</th>
<th align="center">Deng</th>
<th align="center">Muraro</th>
<th align="center">Pollen</th>
<th align="center">Chen</th>
<th align="center">Zeisel</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="2" align="center">scDMAC-Z</td>
<td align="center">ARI</td>
<td align="center">0.9211</td>
<td align="center">0.8412</td>
<td align="center">0.8264</td>
<td align="center">0.8973</td>
<td align="center">0.8315</td>
<td align="center">0.7694</td>
</tr>
<tr>
<td align="center">NMI</td>
<td align="center">0.9102</td>
<td align="center">0.8351</td>
<td align="center">0.8127</td>
<td align="center">0.9181</td>
<td align="center">0.8434</td>
<td align="center">0.7413</td>
</tr>
<tr>
<td rowspan="2" align="center">scDMAC-W</td>
<td align="center">ARI</td>
<td align="center">0.9268</td>
<td align="center">0.8719</td>
<td align="center">0.8507</td>
<td align="center">0.9021</td>
<td align="center">0.8643</td>
<td align="center">0.7912</td>
</tr>
<tr>
<td align="center">NMI</td>
<td align="center">0.9167</td>
<td align="center">0.8658</td>
<td align="center">0.8305</td>
<td align="center">0.9217</td>
<td align="center">0.8712</td>
<td align="center">0.7624</td>
</tr>
<tr>
<td rowspan="2" align="center">scDMAC-P</td>
<td align="center">ARI</td>
<td align="center">0.9294</td>
<td align="center">0.8543</td>
<td align="center">0.8426</td>
<td align="center">0.8918</td>
<td align="center">0.8671</td>
<td align="center">0.7881</td>
</tr>
<tr>
<td align="center">NMI</td>
<td align="center">0.9183</td>
<td align="center">0.8524</td>
<td align="center">0.8231</td>
<td align="center">0.9108</td>
<td align="center">0.8781</td>
<td align="center">0.7589</td>
</tr>
<tr>
<td rowspan="2" align="center">scDMAC</td>
<td align="center">ARI</td>
<td align="center">0.9354</td>
<td align="center">0.8896</td>
<td align="center">0.8698</td>
<td align="center">0.9151</td>
<td align="center">0.8821</td>
<td align="center">0.7983</td>
</tr>
<tr>
<td align="center">NMI</td>
<td align="center">0.9224</td>
<td align="center">0.8802</td>
<td align="center">0.8459</td>
<td align="center">0.9363</td>
<td align="center">0.8973</td>
<td align="center">0.7692</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec sec-type="conclusion" id="s4">
<label>4</label>
<title>Conclusion</title>
<p>This paper proposes scDMAC, a clustering model for single-cell RNA sequencing data that integrates a denoising autoencoder with a masking autoencoder. The model first denoises scRNA-seq data using a ZINB-based denoising autoencoder to better approximate the underlying expression distribution. It then introduces variability by randomly shuffling expression values within genes and applies a Bernoulli-based masking strategy to generate perturbed gene expression profiles. These are encoded into low-dimensional embeddings through a masking autoencoder, which jointly optimizes feature reconstruction and mask prediction. Finally, a weighted soft clustering mechanism is applied to produce the clustering results.</p>
<p>Experimental results demonstrate that scDMAC achieves improved performance by effectively capturing gene-wise relationships and enhancing feature robustness. While simulation studies provide controlled ground truth, they often fail to capture the complex noise structure and biological heterogeneity of real scRNA-seq data. In this work, we prioritize evaluation on well-annotated benchmark datasets that are widely used in literature. Nevertheless, we acknowledge this limitation and plan to include simulation-based validation in future work.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="s6">
<title>Author contributions</title>
<p>SX: Writing &#x2013; review &#x26; editing, Writing &#x2013; original draft, Methodology, Data curation, Validation, Funding acquisition. WY: Supervision, Methodology, Writing &#x2013; original draft. BZ: Methodology, Validation, Software, Writing &#x2013; original draft. HQ: Data curation, Writing &#x2013; review and editing, Conceptualization. KW: Resources, Writing &#x2013; review and editing, Project administration, Visualization, Formal Analysis.</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s9">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Camara</surname>
<given-names>P. G.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Methods and challenges in the analysis of single-cell RNA-Sequencing data</article-title>. <source>Curr. Opin. Syst. Biol.</source> <volume>7</volume>, <fpage>47</fpage>&#x2013;<lpage>53</lpage>. <pub-id pub-id-type="doi">10.1016/j.coisb.2017.12.007</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhai</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Deep soft K-Means clustering with self-training for single-cell RNA sequence data</article-title>. <source>NAR Genomics Bioinformatics</source> <volume>2</volume> (<issue>2</issue>), <fpage>lqaa039</fpage>. <pub-id pub-id-type="doi">10.1093/nargab/lqaa039</pub-id>
<pub-id pub-id-type="pmid">33575592</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ciortan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Defrance</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>GNN-based embedding for clustering scRNA-Seq data</article-title>. <source>Bioinformatics</source> <volume>38</volume> (<issue>4</issue>), <fpage>1037</fpage>&#x2013;<lpage>1044</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btab787</pub-id>
<pub-id pub-id-type="pmid">34850828</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Conesa</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Madrigal</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Tarazona</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Gomez-Cabrero</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Cervera</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>McPherson</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>A survey of best practices for RNA-Seq data analysis</article-title>. <source>Genome Biology</source> <volume>17</volume> (<issue>1</issue>), <fpage>13</fpage>. <pub-id pub-id-type="doi">10.1186/s13059-016-0881-8</pub-id>
<pub-id pub-id-type="pmid">26813401</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Eraslan</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Simon</surname>
<given-names>L. M.</given-names>
</name>
<name>
<surname>Mircea</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mueller</surname>
<given-names>N. S.</given-names>
</name>
<name>
<surname>Theis</surname>
<given-names>F. J.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Single-cell RNA-Seq denoising using a deep count autoencoder</article-title>. <source>Nat. Communications</source> <volume>10</volume> (<issue>1</issue>), <fpage>390</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-018-07931-2</pub-id>
<pub-id pub-id-type="pmid">30674886</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Flores</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Hasib</surname>
<given-names>Md M.</given-names>
</name>
<name>
<surname>Chiu</surname>
<given-names>Y.-C.</given-names>
</name>
<name>
<surname>Ye</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Deep learning tackles single-Cell Analysis&#x2014;A survey of deep learning for scRNA-Seq analysis</article-title>. <source>Briefings Bioinformatics</source> <volume>23</volume> (<issue>1</issue>), <fpage>bbab531</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbab531</pub-id>
<pub-id pub-id-type="pmid">34929734</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ghorbani</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Rostami</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Guzzi</surname>
<given-names>P. H.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>AI-Enabled pipeline for virus detection, validation, and SNP discovery from next-generation sequencing data</article-title>. <source>Front. Genet.</source> <volume>15</volume>, <fpage>1492752</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2024.1492752</pub-id>
<pub-id pub-id-type="pmid">39588519</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gong</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Kwak</surname>
<given-names>I.-Y.</given-names>
</name>
<name>
<surname>Pota</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Koyano-Nakagawa</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Garry</surname>
<given-names>D. J.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>DrImpute: imputing dropout events in single cell RNA sequencing data</article-title>. <source>BMC Bioinformatics</source> <volume>19</volume> (<issue>1</issue>), <fpage>220</fpage>. <pub-id pub-id-type="doi">10.1186/s12859-018-2226-y</pub-id>
<pub-id pub-id-type="pmid">29884114</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Haque</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Engel</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Teichmann</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>L&#xf6;nnberg</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>A practical guide to single-cell RNA-sequencing for biomedical research and clinical applications</article-title>. <source>Genome Medicine</source> <volume>9</volume> (<issue>1</issue>), <fpage>75</fpage>. <pub-id pub-id-type="doi">10.1186/s13073-017-0467-4</pub-id>
<pub-id pub-id-type="pmid">28821273</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hartigan</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Wong</surname>
<given-names>M. A.</given-names>
</name>
</person-group> (<year>1979</year>). <article-title>Algorithm as 136: a K-Means clustering Algorithm</article-title>. <source>J. Royal Statistical Society. Series C Appl. Statistics</source> <volume>28</volume> (<issue>1</issue>), <fpage>100</fpage>&#x2013;<lpage>108</lpage>. <pub-id pub-id-type="doi">10.2307/2346830</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>He</surname>
<given-names>Z.-C.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>Z.-Z.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>P.-F.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.-X.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Identification of three T cell-related genes as diagnostic and prognostic biomarkers for triple-negative breast cancer and exploration of potential mechanisms</article-title>. <source>Front. Genet.</source> <volume>16</volume>, <fpage>1584334</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2025.1584334</pub-id>
<pub-id pub-id-type="pmid">40606662</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hong</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Tao</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Diao</surname>
<given-names>L.-T.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>RNA sequencing: new technologies and applications in cancer research</article-title>. <source>J. Hematology &#x26; Oncology</source> <volume>13</volume> (<issue>1</issue>), <fpage>166</fpage>. <pub-id pub-id-type="doi">10.1186/s13045-020-01005-x</pub-id>
<pub-id pub-id-type="pmid">33276803</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kinker</surname>
<given-names>G. S.</given-names>
</name>
<name>
<surname>Greenwald</surname>
<given-names>A. C.</given-names>
</name>
<name>
<surname>Tal</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Orlova</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Cuoco</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>McFarland</surname>
<given-names>J. M.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Pan-Cancer single-cell RNA-Seq identifies recurring programs of cellular heterogeneity</article-title>. <source>Nat. Genetics</source> <volume>52</volume> (<issue>11</issue>), <fpage>1208</fpage>&#x2013;<lpage>1218</lpage>. <pub-id pub-id-type="doi">10.1038/s41588-020-00726-6</pub-id>
<pub-id pub-id-type="pmid">33128048</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Brouwer</surname>
<given-names>C. R.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A universal deep neural network for in-Depth cleaning of single-cell RNA-seq data</article-title>. <source>Nat. Commun.</source> <volume>13</volume> (<issue>1</issue>), <fpage>1901</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-022-29576-y</pub-id>
<pub-id pub-id-type="pmid">35393428</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lopez</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Jeffrey</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Cole</surname>
<given-names>M. B.</given-names>
</name>
<name>
<surname>Jordan</surname>
<given-names>M. I.</given-names>
</name>
<name>
<surname>Yosef</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Deep generative modeling for single-cell transcriptomics</article-title>. <source>Nat. Methods</source> <volume>15</volume> (<issue>12</issue>), <fpage>1053</fpage>&#x2013;<lpage>1058</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-018-0229-2</pub-id>
<pub-id pub-id-type="pmid">30504886</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qi</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Zou</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Clustering and classification methods for single-cell RNA-sequencing data</article-title>. <source>Briefings Bioinformatics</source> <volume>21</volume> (<issue>4</issue>), <fpage>1196</fpage>&#x2013;<lpage>1208</lpage>. <pub-id pub-id-type="doi">10.1093/bib/bbz062</pub-id>
<pub-id pub-id-type="pmid">31271412</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ranjan</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Schmidt</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Park</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Amin Honardoost</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>scConsensus: combining supervised and unsupervised clustering for cell type identification in single-cell RNA sequencing data</article-title>. <source>BMC Bioinformatics</source> <volume>22</volume> (<issue>1</issue>), <fpage>186</fpage>. <pub-id pub-id-type="doi">10.1186/s12859-021-04028-4</pub-id>
<pub-id pub-id-type="pmid">33845760</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stegle</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Teichmann</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Marioni</surname>
<given-names>J. C.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Computational and analytical challenges in single-cell transcriptomics</article-title>. <source>Nat. Rev. Genet.</source> <volume>16</volume> (<issue>3</issue>), <fpage>133</fpage>&#x2013;<lpage>145</lpage>. <pub-id pub-id-type="doi">10.1038/nrg3833</pub-id>
<pub-id pub-id-type="pmid">25628217</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Su</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Q.-Z.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>W.-W.</given-names>
</name>
<name>
<surname>Gong</surname>
<given-names>Yi</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Data analysis guidelines for single-cell RNA-Seq in biomedical studies and clinical applications</article-title>. <source>Mil. Med. Res.</source> <volume>9</volume> (<issue>1</issue>), <fpage>68</fpage>. <pub-id pub-id-type="doi">10.1186/s40779-022-00434-8</pub-id>
<pub-id pub-id-type="pmid">36461064</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tian</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Wan</surname>
<given-names>Ji</given-names>
</name>
<name>
<surname>Qi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhi</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Clustering single-cell RNA-Seq data with a model-based deep learning approach</article-title>. <source>Nat. Mach. Intell.</source> <volume>1</volume> (<issue>4</issue>), <fpage>191</fpage>&#x2013;<lpage>198</lpage>. <pub-id pub-id-type="doi">10.1038/s42256-019-0037-0</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Todorov</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Fournier</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Gerber</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Principal components analysis: theory and application to gene expression data analysis</article-title>. <source>Genomics Comput. Biol.</source> <volume>4</volume> (<issue>2</issue>), <fpage>e100041</fpage>. <pub-id pub-id-type="doi">10.18547/gcb.2018.vol4.iss2.e100041</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Xia</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>C.-H.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>scDCCA: deep contrastive clustering for single-cell RNA-Seq data based on auto-encoder network</article-title>. <source>Briefings Bioinforma.</source> <volume>24</volume> (<issue>1</issue>), <fpage>bbac625</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbac625</pub-id>
<pub-id pub-id-type="pmid">36631401</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Hong</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>CTEC: a cross-tabulation ensemble clustering approach for single-cell RNA sequencing data analysis</article-title>. <source>Bioinformatics</source> <volume>40</volume> (<issue>4</issue>), <fpage>btae130</fpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btae130</pub-id>
<pub-id pub-id-type="pmid">38552307</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>A. R.</given-names>
</name>
<name>
<surname>Neff</surname>
<given-names>N. F.</given-names>
</name>
<name>
<surname>Kalisky</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Dalerba</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Treutlein</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Rothenberg</surname>
<given-names>M. E.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>Quantitative assessment of single-cell RNA-Sequencing methods</article-title>. <source>Nat. Methods</source> <volume>11</volume> (<issue>1</issue>), <fpage>41</fpage>&#x2013;<lpage>46</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.2694</pub-id>
<pub-id pub-id-type="pmid">24141493</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hong-Dong</surname>
<given-names>Li</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>C.-X.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>CellBRF: a feature selection method for single-cell clustering using cell balance and random Forest</article-title>. <source>Bioinformatics</source> <volume>39</volume> (<issue>Suppl. ment_1</issue>), <fpage>i368</fpage>&#x2013;<lpage>i376</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btad216</pub-id>
<pub-id pub-id-type="pmid">37387178</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Fang</surname>
<given-names>Z-Y.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>C.-X.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Y.-P.</given-names>
</name>
<name>
<surname>Hong-Dong</surname>
<given-names>Li</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>RFCell: a gene selection approach for scRNA-Seq clustering based on permutation and random forest</article-title>. <source>Front. Genetics</source> <volume>12</volume>, <fpage>665843</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2021.665843</pub-id>
<pub-id pub-id-type="pmid">34386033</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1214915/overview">Vinay Randhawa</ext-link>, Brigham and Women&#x2019;s Hospital, United States</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/632832/overview">Ping Luo</ext-link>, Algoma University, Canada</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1132402/overview">Xinyi Liu</ext-link>, University of Illinois Chicago, United States</p>
</fn>
</fn-group>
</back>
</article>