<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="brief-report" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Bioinform.</journal-id>
<journal-title>Frontiers in Bioinformatics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Bioinform.</abbrev-journal-title>
<issn pub-type="epub">2673-7647</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1604418</article-id>
<article-id pub-id-type="doi">10.3389/fbinf.2025.1604418</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Bioinformatics</subject>
<subj-group>
<subject>Brief Research Report</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>ICARus: a pipeline to extract robust gene expression signatures from transcriptome datasets</article-title>
<alt-title alt-title-type="left-running-head">Li and Fuxman Bass</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fbinf.2025.1604418">10.3389/fbinf.2025.1604418</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Zhaorong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1270525/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Fuxman Bass</surname>
<given-names>Juan I.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/468581/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Bioinformatics Program</institution>, <institution>Boston University</institution>, <addr-line>Boston</addr-line>, <addr-line>MA</addr-line>, <country>United States</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Biology</institution>, <institution>Boston University</institution>, <addr-line>Boston</addr-line>, <addr-line>MA</addr-line>, <country>United States</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Program in Molecular Biology, Cell Biology and Biochemistry</institution>, <institution>Boston University</institution>, <addr-line>Boston</addr-line>, <addr-line>MA</addr-line>, <country>United States</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Biological Design Center</institution>, <institution>Boston University</institution>, <addr-line>Boston</addr-line>, <addr-line>MA</addr-line>, <country>United States</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/306576/overview">Keith A. Crandall</ext-link>, George Washington University, United States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1390624/overview">Youtao Lu</ext-link>, University of Pennsylvania, United States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1481132/overview">Guanjue Xiang</ext-link>, Dana&#x2013;Farber Cancer Institute, United States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1915290/overview">Bo-Wei Zhao</ext-link>, Zhejiang University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1969482/overview">Jianlei Gu</ext-link>, Yale University, United States</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Juan I. Fuxman Bass, <email>fuxman@bu.edu</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>19</day>
<month>06</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>5</volume>
<elocation-id>1604418</elocation-id>
<history>
<date date-type="received">
<day>01</day>
<month>04</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>05</day>
<month>06</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Li and Fuxman Bass.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Li and Fuxman Bass</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Gene signature extraction from transcriptomics datasets has been instrumental to identify sets of co-regulated genes, identify associations with prognosis, and for biomarker discovery. Independent component analysis (ICA) is a powerful tool to extract such signatures to uncover hidden patterns in complex data and identify coherent gene sets. The ICARus package offers a robust pipeline to perform ICA on transcriptome datasets. While other packages perform ICA using one value of the main parameter (i.e., the number of signatures), ICARus identifies a range of near-optimal parameter values, iterates through these values, and assesses the robustness and reproducibility of the signature components identified. To test the performance of ICARus, we analyzed transcriptome datasets obtained from COVID-19 patients with different outcomes and from lung adenocarcinoma. We identified several reproducible gene expression signatures significantly associated with prognosis, temporal patterns, and cell type composition. The GSEA of these signatures matched findings from previous clinical studies and revealed potentially new biological mechanisms. ICARus with a vignette is available on Github <ext-link ext-link-type="uri" xlink:href="https://github.com/Zha0rong/ICArus">https://github.com/Zha0rong/ICArus</ext-link>.</p>
</abstract>
<kwd-group>
<kwd>independent component analysis</kwd>
<kwd>transcriptomics</kwd>
<kwd>signatures</kwd>
<kwd>machine learning</kwd>
<kwd>robustness</kwd>
</kwd-group>
<contract-sponsor id="cn001">National Institutes of Health<named-content content-type="fundref-id">10.13039/100000002</named-content>
</contract-sponsor>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Genomic Analysis</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Introduction</title>
<p>Transcriptomic data plays a crucial role in understanding the variation in gene expression patterns across diverse biological conditions and phenotypes. Common approaches to analyze such data involve conducting a differential expression and gene expression pattern analyses, which evaluate changes in expression across groups (<xref ref-type="bibr" rid="B10">Conesa et al., 2016</xref>). However, challenges arise when analyzing large transcriptomic datasets from sources like Genotype-Tissue Expression (GTEx) (<xref ref-type="bibr" rid="B20">Lonsdale et al., 2013</xref>) and the Cancer Genome Atlas (TCGA), since these data can often be classified according to multiple known (e.g., tissue, sex, age, tumor type, tumor stage, <italic>etc.</italic>) as well as unknown variables. This complicates identifying the contribution of the different variables to the differences in expression observed across samples. To address these issues and enable the analysis of such large datasets, unsupervised algorithms like principal component analysis (PCA), Weighted Gene Co-Expression Network Analysis (WGCNA) (<xref ref-type="bibr" rid="B16">Langfelder and Horvath, 2008</xref>), non-negative matrix factorization (NMF) (<xref ref-type="bibr" rid="B12">Jia et al., 2015</xref>), and independent component analyses (ICA) (<xref ref-type="bibr" rid="B2">Anglada-Girotto et al., 2022</xref>) have been developed. Unlike methods that compare gene expression between groups, these unsupervised algorithms identify gene expression modules or signatures associated with the phenotype labels of the samples.</p>
<p>ICA has been widely used to identify gene expression signatures in large transcriptomic datasets, including cancer, development, and exposure to treatments (<xref ref-type="bibr" rid="B3">Biton et al., 2014</xref>). ICA separates a multivariate signal, in this case gene expression, into additive subcomponents or signatures which are positive and negative contributions of each gene in the dataset. One key parameter in ICA is determining the optimal number of signatures to extract in a dataset as there is no ground truth for the actual number of independent contributing variables. Most pipelines, such as RobustICA (<xref ref-type="bibr" rid="B2">Anglada-Girotto et al., 2022</xref>) and BIODICA (<xref ref-type="bibr" rid="B13">Kairov et al., 2012</xref>) select this optimal parameter based on the number of components needed in PCA to explain a percentage of variance in the dataset. These studies often increase robustness by iterating the analysis using the same parameter; however, signatures often vary widely across parameter values. This can lead to the identification of low-confidence, non-reproducible signatures.</p>
<p>Here, we introduce the R package ICARus (<xref ref-type="fig" rid="F1">Figure 1A</xref>), designed to streamline the application of ICA and extraction of high-confidence expression signatures that are robust across iterations and reproducible across parameter values. ICARus leverages the proportion of variance explained obtained from PCA to provide a range of near-optimal parameters for the ICA algorithm. Subsequently, for each parameter the ICA algorithm is applied, and the results are clustered and evaluated using the stability index proposed by Icasso to identify robust signatures for each parameter (<xref ref-type="bibr" rid="B11">Himberg and Hyvarinen, 2003</xref>). ICARus then clusters the robust signatures obtained to identify reproducible signatures across parameters. Finally, the gene expression signatures with the highest reproducibility scores are combined into meta-signatures and subjected to further analysis through Gene Set Enrichment Analysis (GSEA) or Fisher&#x2019;s Exact test to functionally interpret the signatures identified.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Overview of ICARus pipeline. <bold>(A)</bold> Pipeline diagram overview of ICARus. <bold>(B)</bold> The input for ICARus is a (Genes x Samples) normalized gene expression matrix where the rows are gene symbols (IDs) and columns are samples. <bold>(C)</bold> PCA plot of samples based on normalized gene expression. <bold>(D)</bold> Left &#x3d; Standard deviation of each principal component sorted by principal component rank order. Right &#x3d; Cumulative proportion of variance explained across principal components. The elbow and knee-points in these plots are used to identify the initiation point n for ICA. <bold>(E)</bold> For each parameter between n and n &#x2b; k (k is defined by user) ICA is performed 100 times, and Icasso quality index is used to assess the robustness of independent components. <bold>(F)</bold> The independent components that pass the user defined robustness threshold for each tested parameter value are clustered. The sizes of clusters indicate the reproducibility of signatures across different parameter values. Signatures that pass the user defined reproducibility scores are output as genes x signatures and signatures x samples matrices.</p>
</caption>
<graphic xlink:href="fbinf-05-1604418-g001.tif">
<alt-text content-type="machine-generated">Flowchart for metagene and metascore matrix creation from expression data using Independent Component Analysis (ICA). Includes PCA plots, heatmaps for signatures, and reproducibility matrices. Panels: A details workflow; B-D show data visualizations; E-F illustrate analysis and result synthesis.</alt-text>
</graphic>
</fig>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>Materials and methods</title>
<sec id="s2-1">
<title>Input data format for ICARus</title>
<p>The input data for ICARus is a normalized transcriptome dataset in matrix format, with rows being gene names and columns being samples (<xref ref-type="fig" rid="F1">Figure 1B</xref>). Normalization methods such as Counts-per-Million (CPM) (<xref ref-type="bibr" rid="B7">Chen et al., 2025</xref>) and Ratio of median (<xref ref-type="bibr" rid="B1">Anders and Huber, 2010</xref>) are recommended. Different normalization method strategies will introduce differences in the final results of ICARus; however, most of the signatures are reproducible in the results obtained from different methods (<xref ref-type="sec" rid="s11">Supplementary Figure S1</xref>).</p>
<p>Prefiltering of sparsely expressed genes in the input data is recommended as these genes introduce noise in the analysis, but since the filtering strategy varies between different datasets, it is not included in the pipeline.</p>
</sec>
<sec id="s2-2">
<title>Estimating the set of near-optimal parameters for the ICA algorithm</title>
<p>To estimate the set of near-optimal parameters, ICARus first performs PCA for the input dataset (<xref ref-type="fig" rid="F1">Figure 1C</xref>). Prior work has used an optimal parameter N as the number of top principal components that collectively account for 99% of the variance observed in the dataset (<xref ref-type="bibr" rid="B23">Sastry et al., 2019</xref>). ICARus, also relies on the variance explained by PCA, but identifies the range of near-optimal values for n. After performing PCA, users can select whether to use: 1) the ranked distribution of standard deviations of each principal component, or 2) the cumulative proportion of variance explained by a certain number of principal components to determine the lower bound for the parameter set. In the first option, the standard deviation of each principal component is plotted against the ranked order of the principal components which takes the form of an elbow plot; whereas in the second option, the cumulative proportion of variance explained against the order of principal components takes the form of a knee plot. The elbow-point in the first plot and the knee-point in the second plot indicates the top n principal components that explain a large fraction of the variance in the data (<xref ref-type="fig" rid="F1">Figure 1D</xref>), i.e., including more principal components does not lead to a marked increase in variance explained.</p>
<p>To pinpoint this critical elbow/knee point, the Kneedle Algorithm (<xref ref-type="bibr" rid="B24">Satopaa et al., 2011</xref>) is used, and this identified point is designated as the minimum number n for the near-optimal parameter set for subsequent ICA analysis. The Kneedle algorithm was implemented in an R package<xref ref-type="fn" rid="fn1">
<sup>1</sup>
</xref>. This set of parameters is then selected as every integer (n, n &#x2b; k) where k can be user defined and is set as default to be 10.</p>
</sec>
<sec id="s2-3">
<title>Generating reproducible gene signatures</title>
<p>Following the identification of the near-optimal parameter set, ICARus initiates the generation of reproducible gene signatures employing two sequential strategies: intra-parameter iterations and inter-parameter iterations. For the intra-parameter iterations, ICARus conducts the ICA algorithm 100 times for each n value. Subsequently, the resulting signatures undergo sign correction suggested by a previous study (<xref ref-type="bibr" rid="B2">Anglada-Girotto et al., 2022</xref>) and hierarchical clustering to identify sets of robust signatures for each specific n. Within each cluster, the medoid is extracted and employed as the representative signature, while the stability of the signature cluster is assessed using the stability index proposed by Icasso (<xref ref-type="fig" rid="F1">Figure 1E</xref>) (<xref ref-type="bibr" rid="B11">Himberg and Hyvarinen, 2003</xref>). To calculate the stability index, the similarities between signatures from different runs are calculated using the absolute value of the Pearson correlation coefficient <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. Then the stability index of given cluster M is calculated using the following function in <xref ref-type="disp-formula" rid="e1">Equation 1</xref>:<disp-formula id="e1">
<mml:math id="m2">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>M</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>M</mml:mi>
</mml:msub>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>M</mml:mi>
</mml:msub>
<mml:mo>&#x2016;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>M</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>M</mml:mi>
</mml:msub>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>M</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>where <inline-formula id="inf2">
<mml:math id="m3">
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>M</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf3">
<mml:math id="m4">
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>M</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> are the size of cluster M and the number of signatures not in cluster M (<xref ref-type="bibr" rid="B11">Himberg and Hyvarinen, 2003</xref>). The stability index calculated by this function ranges from 0 to 1, from least to most stable. The signatures with stability indices &#x3e;0.75 are evaluated for reproducibility across values of n (<xref ref-type="fig" rid="F1">Figure 1F</xref>). These robust signatures are subjected to hierarchical clustering. A signature obtained with one value of the parameter is considered reproducible if it clusters together with signatures obtained across multiple other n values within the near-optimal set. The user can specify whether to only keep the reproducible signatures originated from the starting point n, or to also keep the reproducible signatures originated from a higher parameter within the near-optimal set, as long as they can be reproduced in more than half of the remaining tested parameters.</p>
<p>ICARus outputs the number of near-optimal values that contribute a signature to the cluster and the average distance between these signatures for each cluster. These values can then be used to select reproducible signatures across many parameter values (<xref ref-type="fig" rid="F1">Figure 1F</xref>).</p>
</sec>
<sec id="s2-4">
<title>Output signatures and downstream analysis of the gene signatures</title>
<p>The reproducible signatures extracted by ICARus consist of two parts: 1) a matrix of genes by signatures, where each value indicates the contribution of the gene to the signature (the distribution in scores of the signatures follows the normal distribution, with the mean of 0); and 2) a matrix of signatures by samples where each value indicates the contribution of the signature to the expression profile of the sample (<xref ref-type="fig" rid="F1">Figure 1F</xref>). The gene scores of a particular signature can be used to perform Gene Set Enrichment Analysis (<xref ref-type="bibr" rid="B27">Subramanian et al., 2005</xref>) to identify pathways or gene sets associated with the signature for further biological interpretation. The signatures scores across samples can be used to associate signature values with sample phenotypes or temporal patterns.</p>
</sec>
<sec id="s2-5">
<title>Implementation</title>
<p>Steps that are described above were implemented in R with parallel backend computation as package <italic>ICARus</italic> and provided as pseudocode in <xref ref-type="sec" rid="s11">Supplementary Material S1</xref>. The package and a vignette is available on Github <ext-link ext-link-type="uri" xlink:href="https://github.com/Zha0rong/ICArus">https://github.com/Zha0rong/ICArus</ext-link>.</p>
</sec>
<sec id="s2-6">
<title>Test datasets</title>
<sec id="s2-6-1">
<title>Peripheral leukocyte samples from COVID-19 patients</title>
<p>To illustrate the efficacy of ICARus in identifying relevant signatures, we applied it to a publicly available RNA-Seq dataset featuring 46 peripheral blood leukocyte samples collected from 11 COVID-19 patients infected with SARS-CoV-2, with varying clinical outcomes (fast recovery, prolonged recovery, and fatal) at different time points (<xref ref-type="fig" rid="F2">Figure 2A</xref>) (<xref ref-type="bibr" rid="B15">Lam et al., 2023</xref>). Fast recovery patients had a median hospitalization time of 7 days, prolonged recovery patients had a median hospitalization time of 25 days, and fatal patients were patients that passed away due to complications of the infection. The count matrix was downloaded from the GEO repository (GSE221066), which included 26,475 genes and 55 samples. To prevent genes with sparse expression introducing noise in the analysis, only genes with non-zero expression in at least one-fourth of the samples were included in the analysis. This strategy filtered out 8,918 genes and retained 17,557 genes for the analysis. The count matrix was normalized using the Counts-Per-Million method (<xref ref-type="bibr" rid="B7">Chen et al., 2025</xref>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Application of ICARus to a COVID-19 transcriptomic dataset. <bold>(A)</bold> The test dataset consists of 46 samples of blood-derived leukocytes obtained from COVID-19 patients with different clinical outcomes at different time points during infection. After filtering genes with no expression in more than half of the dataset, 17,557 genes were kept for downstream analysis. <bold>(B)</bold> The PCA plot illustrates the separation of samples from different clinical outcomes. <bold>(C)</bold> The initiation parameter identified by ICARus for this dataset was 10 (n), and ICARus determined robust signatures using parameter values 10&#x2013;19. Signatures across parameter values were clustered and only the signatures that were reproducible across more than 5 values were considered for downstream analysis. ICARus identified 10 robust and reproducible gene expression signatures from the test dataset. <bold>(D)</bold> The box plots showed the signature score distributions in different clinical outcomes. Statistical significance determined by Wilcoxon-ranked sum test.&#x2009; &#x2a;p &#x3c; 0.05,&#x2009; &#x2a;&#x2a;p &#x3c; 0.01, &#x2009;&#x2a;&#x2a;&#x2a;p &#x3c; 0.005.</p>
</caption>
<graphic xlink:href="fbinf-05-1604418-g002.tif">
<alt-text content-type="machine-generated">Panel A displays a hierarchical cluster analysis of patient recovery groups: fast, prolonged, and fatal, with corresponding hospitalization times. Panel B shows a PCA plot of patients colored by group. Panel C features a heatmap depicting sample similarities and reproducibility. Panel D presents box plots comparing ten gene expression signatures across recovery groups. The legend indicates group colors and specific patient identifiers.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s2-6-2">
<title>Primary tumor samples of lung adenocarcinoma (LUAD)</title>
<p>To test the performance of ICARus on a large RNA-seq dataset with complex clinical phenotypes, we processed 539 lung adenocarcinoma primary tumor RNA-seq samples from TCGA database (<xref ref-type="bibr" rid="B4">Cancer Genome Atlas Research Network, 2014</xref>), which were downloaded through the TCGA-biolinks portal (<xref ref-type="bibr" rid="B9">Colaprico et al., 2016</xref>). The count matrix included 19,938 protein coding genes and 539 samples. To filter out genes with sparse and low expression in the dataset, only genes with non-zero expression in at least one-fourth of the samples were included in the analysis. This strategy filtered out 1,417 genes and retained 18,091 genes and 539 samples for the analysis. The count matrix was normalized using the Counts-Per-Million method (<xref ref-type="bibr" rid="B7">Chen et al., 2025</xref>).</p>
</sec>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>Results</title>
<sec id="s3-1">
<title>Identification of reproducible signatures in a COVID-19 expression dataset</title>
<p>To identify signatures associated with COVID-19 outcomes, we used a dataset of 46 samples derived from 11 patients with different outcomes (fast recovery, prolonged recovery, and fatal) at different time points (<xref ref-type="fig" rid="F2">Figure 2A</xref>) (<xref ref-type="bibr" rid="B15">Lam et al., 2023</xref>). First, we determined the near-optimal parameter set in the COVID-19 expression dataset. We then selected the critical elbow-point in the PCA option provided by ICARus. This corresponded to 10 principal components; therefore, the nominated range for the ICA parameter was 10&#x2013;19 independent components. We used 100 iterations for each of these parameter values, then identified the medoid signature, followed by clustering of signatures across the parameter values. This resulted in 10 signatures that were reproducible across more than half of the tested parameter values (<xref ref-type="fig" rid="F2">Figures 2B,C</xref>). By comparing the signature scores between samples from patients with different clinical outcomes, we identified two signatures (signatures 4 and 10) that monotonically increase with outcome severity (<xref ref-type="fig" rid="F2">Figure 2D</xref>). Next, we aimed to determine the biological processes associated with these signatures.</p>
<sec id="s3-1-1">
<title>Signature 4 is associated with poor prognosis and fatal outcomes</title>
<p>Signature 4 exhibited a significant correlation with patient outcomes, with samples from fatal outcome patients having the highest scores and those from fast-recovery patients having the lowest scores (<xref ref-type="fig" rid="F3">Figure 3A</xref>). By plotting signature scores across time points and clinical outcomes, we observed that signature 4 scores were higher in samples from fatal outcome patients, and lower in fast-recovery patients at every time point (<xref ref-type="fig" rid="F3">Figure 3A</xref>). This observation is important as it rules out the possibilities of association driven by the bias at one or more time points and suggests that the biological functions associated with signature 4 can be used to differentiate fast recovery patients at any time point.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>GSEA of clinical outcome-associated Signature 4. <bold>(A)</bold> The box plots show the signature 4 score distributions in different clinical outcomes. The line plot shows the temporal pattern of Signature 4 for different patient outcomes. Statistical significance determined by Wilcoxon-ranked sum test. <bold>(B)</bold> Bar graph displays the top enriched and depleted pathways from GSEA analysis results of Signature 4. Net enrichment scores are shown. <bold>(C)</bold> Network representation of the top enriched pathways and the driver genes associated with the enrichment results. <bold>(D)</bold> ssGSEA results of the top enriched pathways from GSEA analysis results of Signature 4.</p>
</caption>
<graphic xlink:href="fbinf-05-1604418-g003.tif">
<alt-text content-type="machine-generated">Composite image with four panels: Panel A shows a box plot comparing signature scores with p-values labeled for fast, prolonged, and fatal recovery over days. Panel B is a bar graph displaying net enrichment scores of immune response-related terms. Panel C is a network diagram illustrating relationships between genes involved in antigen processing and immune responses, with nodes colored by metagene scores. Panel D features multiple box plots representing net enrichment scores across various immune processes for different recovery outcomes, labeled with significance markers.</alt-text>
</graphic>
</fig>
<p>The GSEA analysis of signature 4 revealed a depletion of T and B cell activation and MHC class II antigen processing and presentation, and an enrichment of inflammation pathways and MHC Class I antigen presentation (<xref ref-type="fig" rid="F3">Figure 3B</xref>). To identify which genes were driving the observed enrichments, a net plot was generated (<xref ref-type="fig" rid="F3">Figure 3C</xref>). In this plot, large brown nodes represent enrichment terms, while smaller red or blue nodes represent individual genes colored according to their scores in signature 4. Edges were drawn between nodes when a gene belonged to the core enrichment set of a given term. In the network graph, elevated scores of immune genes, such as IL1R2, MYD88, NRLP3, CASP1, TLR4, were shown to drive the enrichment of interleukin 1 and interleukin 8 producing signaling pathways (<xref ref-type="fig" rid="F3">Figure 3C</xref>). This is consistent with previous studies linking the elevated expression of IL-1 and IL-8 with poor prognosis (<xref ref-type="bibr" rid="B18">Li et al., 2021</xref>; <xref ref-type="bibr" rid="B6">Cavalli et al., 2021</xref>). Further clinical studies also showed that the blocking of IL-1 in COVID-19 patients led to better prognosis (<xref ref-type="bibr" rid="B5">Cavalli et al., 2020</xref>). The network also showed that toll-like receptor genes such as TLR1, TLR2 and TLR4, which had elevated metagene scores, were driving the enrichment of toll-like receptor signaling pathways (<xref ref-type="fig" rid="F3">Figure 3C</xref>). The TLR2 signaling pathway, elevated in signature 4, can also be associated with poor prognosis, consistent with several clinical studies showing elevated TLR2 expression was associated with poor prognosis in COVID-19 infection (<xref ref-type="bibr" rid="B28">Taniguchi-Ponciano et al., 2021</xref>; <xref ref-type="bibr" rid="B30">Xu Q. et al., 2022</xref>). Signature 4 has also a negative association with MHC-Class II antigen presenting pathways, which is driven by the suppression of MHC-Class II such as HLA-DMA, HLA-DMB and HLA-DRA (<xref ref-type="fig" rid="F3">Figure 3C</xref>), suggesting a negative association with poor prognosis. This is consistent with previous studies showing that monocytes in COVID-19 patients have lower levels of MHC class II proteins (<xref ref-type="bibr" rid="B30">Xu Q. et al., 2022</xref>; <xref ref-type="bibr" rid="B14">Laing et al., 2020</xref>). These results were confirmed using ssGSEA (<xref ref-type="bibr" rid="B22">Reich et al., 2006</xref>) that calculate net enrichment scores of individual pathways in each sample (<xref ref-type="fig" rid="F3">Figure 3D</xref>).</p>
</sec>
<sec id="s3-1-2">
<title>ICARus identified signature 10 as associated with a temporal phenotype</title>
<p>Signature 10 not only displayed an association with clinical outcomes (lowest in fast recovery, highest in fatal), but also showed a temporal phenotype (<xref ref-type="sec" rid="s11">Supplementary Figure S2A</xref>). Prolonged recovery patients and fatal outcome patients had similar signature 10 scores in the beginning time point, but fatal outcome patients had consistent higher signature 10 scores in the later time points (<xref ref-type="sec" rid="s11">Supplementary Figure S2A</xref>). GSEA analysis of signature 10 revealed positive associations with regulation of neutrophils chemotaxis/mediated immunity, actin filaments assembly/organization and extracellular matrix (<xref ref-type="sec" rid="s11">Supplementary Figure S2B</xref>). A net plot was generated for the genes and the enriched terms to visualize the genes that drive the enrichment of given pathways (<xref ref-type="sec" rid="s11">Supplementary Figure S2C</xref>). For example, matrix metalloproteinase genes such as MMP2 and MMP8 drive the enrichment of extracellular matrix disassembly and galectin genes such as LGALS1, LGALS3 and LGALS9 drive the enrichment of neutrophil mediated immune pathways. Previous studies (<xref ref-type="bibr" rid="B25">Schulte-Schrepping et al., 2020</xref>) have shown that elevated neutrophil counts are associated with a poor prognosis in COVID-19 patients, with clinical publications attributing the poor prognosis to the formation of neutrophil extracellular traps (NETs) (<xref ref-type="bibr" rid="B36">Zuo et al., 2021</xref>). NETs, composed of cell-free DNA, histones, and cytosolic proteins released by neutrophils, require the rearrangement of the actin cytoskeleton for their formation (<xref ref-type="bibr" rid="B26">Sprenkeler et al., 2022</xref>). NETs have been implicated in thrombosis and tissue damage (<xref ref-type="bibr" rid="B21">Papayannopoulos, 2018</xref>; <xref ref-type="bibr" rid="B35">Zuo et al., 2020</xref>), contributing to the poor prognosis of COVID-19 patients.</p>
<p>GSEA analysis results also revealed a negative association between signature 10 and regulation of T cell activation and T cell mediated immunity, driven by suppression of killer cell lectin-like receptors such as KLRC2/3/4, KLRD1 and KLRK1 (<xref ref-type="sec" rid="s11">Supplementary Figure S2C</xref>). Previous studies have also shown decreasing T cell counts in COVID-19 patients with severe symptoms compared to the ones with non-severe symptoms (<xref ref-type="bibr" rid="B19">Liu et al., 2020</xref>). Further, another study reported an elevated number of neutrophils and decreasing number of T cells in COVID-19 patients with severe symptoms compared with COVID-19 patients with mild symptoms (<xref ref-type="bibr" rid="B29">Xu J. et al., 2022</xref>).</p>
</sec>
</sec>
</sec>
<sec id="s3-2">
<title>Identification of reproducible signatures in a TCGA-LUAD expression dataset</title>
<p>To demonstrate the application of ICARus on another larger dataset, we selected the TCGA-LUAD lung adenocarcinoma expression dataset (<xref ref-type="fig" rid="F4">Figure 4A</xref>) (<xref ref-type="bibr" rid="B4">Cancer Genome Atlas Research Network, 2014</xref>). To identify the near-optimal parameter set, we selected the critical elbow-point in the PCA option provided by ICARus. This corresponded to 48 principal components, and therefore, the nominated range for the ICA parameter was 48&#x2013;57 independent components. We performed 100 iterations for each of these parameter values, identified the medoid signature, then clustered signatures across the parameter values. This resulted in 22 signatures that were reproducible across more than half of the parameter values tested (<xref ref-type="fig" rid="F4">Figure 4B</xref>).</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>ICARUS extracts prognosis-related signatures from TCGA-LUAD database. <bold>(A)</bold> 539 primary tumor RNA-Seq samples were downloaded from the TCGA database, and 18,091 genes were kept in the analysis. <bold>(B)</bold> ICARUS identified 22 reproducible signatures. <bold>(C)</bold> Kaplan Meier plots of 4 signatures significantly associated with adversary prognosis. The adjusted p-values obtained from the Cox-proportional hazard ratio test, covariate factors such as gender, age, tissue of origin were regressed out using likelihood ratio test. HR &#x3d; hazard ratio. <bold>(D and E)</bold> The bar graph displays the top enriched and depleted pathways from GSEA results of Signature 10 <bold>(D)</bold> and Signature 9 <bold>(E)</bold>. Net enrichment scores are shown. <bold>(F)</bold> Dot plot of Pearson correlation coefficient (PCC) and adjusted p-value of correlation tests between signature scores and cell type proportion. The color of the dot showed correlation coefficients and the size of the dots showed &#x2212;log<sub>10</sub> (adjusted p-value).</p>
</caption>
<graphic xlink:href="fbinf-05-1604418-g004.tif">
<alt-text content-type="machine-generated">Heatmaps, survival curves, and bar plots illustrate gene expression data, signature scores, and enrichment analysis across 539 samples. Panels show gene signatures, survival rates, and net enrichment scores, highlighting specific biological processes and correlations with clinical outcomes.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3-3">
<title>Identification of gene signatures associated with disequilibrium of cell type proportion and adverse prognosis</title>
<p>To identify signatures associated with adverse prognosis, samples were stratified by the median score of each gene signature into two groups: samples with higher given gene signature scores and samples with lower given gene signature scores. The Cox proportional hazards model was used to perform the survival analysis and the likelihood ratio test was used to regress out co-variables such as age of diagnosis, gender, location of tumor origin, and cell type proportion. Four signatures were significantly associated with adverse prognosis (<xref ref-type="fig" rid="F4">Figure 4C</xref>). We performed GSEA for signatures 9 and 10 to determine the pathways associated with adverse prognosis. GSEA of signature 10 showed an enrichment of keratinization related processes and depletion of metabolic, immune-related, and cell division related pathways (<xref ref-type="fig" rid="F4">Figure 4D</xref>). Previous studies have shown that keratin gene expression activates the epithelial-mesenchymal transition in tumor cells and leads to poor prognosis (<xref ref-type="bibr" rid="B17">Li et al., 2024</xref>). GSEA of signature 9 showed an enrichment of appendage development pathways and depletion of macrophages and immune related pathways (<xref ref-type="fig" rid="F4">Figure 4E</xref>). Previous studies have shown that the enrichment of appendage development pathways was associated with poor prognosis (<xref ref-type="bibr" rid="B31">Yu et al., 2024</xref>).</p>
<p>To determine whether signature 9 is indeed associated with a depletion of macrophages in the corresponding samples, we used BayesPrism (<xref ref-type="bibr" rid="B8">Chu et al., 2022</xref>) to deconvolve the 539 bulk RNA-Seq samples and predict the proportion of each cell type in the tumor microenvironment of each sample. Then, we performed correlation tests between the proportion of each cell type and score of each signature. The results were visualized using the dot plot where the rows are signatures and the columns are cell type proportions, the sizes of the dot are the negative log10 transformed FDR adjusted p-values of the correlation tests and the colors of the dots are the correlation coefficients (<xref ref-type="fig" rid="F4">Figure 4F</xref>). We found several signatures associated with cell type proportions. In particular, signature 9 was significantly associated with a low proportion of macrophages, consistent with our GSEA results.</p>
</sec>
<sec sec-type="discussion" id="s4">
<title>Discussion</title>
<p>We developed ICARus, an R package designed to assist researchers in identifying robust and reproducible gene signatures using ICA across multiple parameter values. This pipeline is highly versatile enabling users to select analysis parameters and stringency. First, ICARus enables the user to manually or automatically select near-optimal parameter sets using elbow or knee points of PCA results. Next, ICARus allows users to select the reproducibility criteria. Although our analyses focused on signatures identified in more than half of all parameters tested, the pipeline can also output signatures present in more than half of parameters from their first instance. This allows the identification of signatures specific to higher parameter values.</p>
<p>To show that the gene expression signatures extracted by ICARus are meaningful, we tested the package on two RNA-Seq datasets. The first dataset consisted of leukocytes samples which were obtained from COVID-19 patients with different clinical outcomes, and the second dataset consisted of primary tumor samples obtained from lung adenocarcinoma patients. The analysis of COVID-19 patient samples showed that ICARus identified biologically meaningful signatures that were associated with patient prognosis and the pathways that drive these associations.</p>
<p>Analysis of the primary tumor samples showed that ICARus identified gene signatures associated with prognosis and cell type proportion in the tumor microenvironment. The reproducible signatures identified by ICARus were associated with clinical phenotypes and temporal patterns consistent with previous studies. Furthermore, the network analyses of the signatures domonstrated that the signatures will provide biologically meaningful genes driving the enrichment of relevant biological functions. These genes can be used as input for some of the recently published algorithms that employ deep learning algorithms to study gene interactions networks and drug response (<xref ref-type="bibr" rid="B33">Zhao et al., 2024</xref>; <xref ref-type="bibr" rid="B34">Zhao et al., 2025</xref>; <xref ref-type="bibr" rid="B32">Zhao et al., 2022</xref>). In principle, ICARus can also be used to extract signatures from single cell RNA-seq datasets; however, the method may need adaptation to account for noise and missing values.</p>
<p>In summary, ICARus has demonstrated the ability to produce biologically meaningful and reproducible signatures which can be extended to other expression datasets.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s11">Supplementary Material</xref>, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="s6">
<title>Author contributions</title>
<p>ZL: Conceptualization, Data curation, Formal Analysis, Investigation, Methodology, Software, Validation, Visualization, Writing &#x2013; original draft. JIFB: Funding acquisition, Resources, Supervision, Visualization, Writing &#x2013; review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s7">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research and/or publication of this article. This work was funded by the National Institutes of Health grants R35 GM128625 awarded to JIFB.</p>
</sec>
<ack>
<p>We want to thank Devlin Moyer for testing and providing feedback on the ICARus package.</p>
</ack>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s9">
<title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="s11">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fbinf.2025.1604418/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fbinf.2025.1604418/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Image1.JPEG" id="SM1" mimetype="application/JPEG" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image2.JPEG" id="SM2" mimetype="application/JPEG" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet1.docx" id="SM3" mimetype="application/docx" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<fn-group>
<fn id="fn1">
<label>1</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://github.com/etam4260/kneedle">https://github.com/etam4260/kneedle</ext-link>
</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Anders</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Huber</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Differential expression analysis for sequence count data</article-title>. <source>Genome Biol.</source> <fpage>11</fpage> (<issue>10</issue>), <fpage>R106</fpage>. <pub-id pub-id-type="doi">10.1186/gb-2010-11-10-r106</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Anglada-Girotto</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Miravet-Verde</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Serrano</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Head</surname>
<given-names>S. A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Robustica: customizable robust independent component analysis</article-title>. <source>BMC Bioinforma.</source> <volume>23</volume> (<issue>1</issue>), <fpage>519</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1186/s12859-022-05043-9</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Biton</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bernard-Pierrot</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Lou</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Krucker</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Chapeaublanc</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Rubio-P&#xe9;rez</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>Independent component analysis uncovers the landscape of the bladder tumor transcriptome and reveals insights into luminal and basal subtypes</article-title>. <source>Cell Rep.</source> <volume>9</volume> (<issue>4</issue>), <fpage>1235</fpage>&#x2013;<lpage>1245</lpage>. <pub-id pub-id-type="doi">10.1016/j.celrep.2014.10.035</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<collab>Cancer Genome Atlas Research Network</collab> (<year>2014</year>). <article-title>Comprehensive molecular profiling of lung adenocarcinoma</article-title>. <source>Nature</source> <volume>511</volume> (<issue>7511</issue>), <fpage>543</fpage>&#x2013;<lpage>550</lpage>. <pub-id pub-id-type="doi">10.1038/nature13385</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cavalli</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>De Luca</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Campochiaro</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Della-Torre</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Ripa</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Canetti</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Interleukin-1 blockade with high-dose anakinra in patients with COVID-19, acute respiratory distress syndrome, and hyperinflammation: a retrospective cohort study</article-title>. <source>Lancet Rheumatology</source> <volume>2</volume> (<issue>6</issue>), <fpage>e325</fpage>&#x2013;<lpage>e331</lpage>. <pub-id pub-id-type="doi">10.1016/s2665-9913(20)30127-2</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cavalli</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Larcher</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Tomelleri</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Campochiaro</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Della-Torre</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>De Luca</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Interleukin-1 and interleukin-6 inhibition compared with standard management in patients with COVID-19 and hyperinflammation: a cohort study</article-title>. <source>Lancet Rheumatol.</source> <volume>3</volume> (<issue>4</issue>), <fpage>e253</fpage>&#x2013;<lpage>e261</lpage>. <pub-id pub-id-type="doi">10.1016/s2665-9913(21)00012-6</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Lun</surname>
<given-names>A. T.</given-names>
</name>
<name>
<surname>Baldoni</surname>
<given-names>P. L.</given-names>
</name>
<name>
<surname>Smyth</surname>
<given-names>G. K.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>edgeR v4: powerful differential analysis of sequencing data with expanded functionality and improved support for small counts and larger datasets</article-title>. <source>Nucleic Acids Res.</source> <volume>53</volume> (<issue>2</issue>), <fpage>gkaf018</fpage>. <pub-id pub-id-type="doi">10.1093/nar/gkaf018</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chu</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Pe&#x2019;er</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Danko</surname>
<given-names>C. G.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Cell type and gene expression deconvolution with BayesPrism enables Bayesian integrative analysis across bulk and single-cell RNA sequencing in oncology</article-title>. <source>Nat. cancer</source> <volume>3</volume> (<issue>4</issue>), <fpage>505</fpage>&#x2013;<lpage>517</lpage>. <pub-id pub-id-type="doi">10.1038/s43018-022-00356-3</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Colaprico</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Silva</surname>
<given-names>T. C.</given-names>
</name>
<name>
<surname>Olsen</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Garofano</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Cava</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Garolini</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>TCGAbiolinks: an R/Bioconductor package for integrative analysis of TCGA data</article-title>. <source>Nucleic Acids Res.</source> <volume>44</volume> (<issue>8</issue>), <fpage>e71</fpage>. <pub-id pub-id-type="doi">10.1093/nar/gkv1507</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Conesa</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Madrigal</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Tarazona</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Gomez-Cabrero</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Cervera</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>McPherson</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>A survey of best practices for RNA-seq data analysis</article-title>. <source>Genome Biol.</source> <volume>17</volume>, <fpage>13</fpage>&#x2013;<lpage>19</lpage>. <pub-id pub-id-type="doi">10.1186/s13059-016-0881-8</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Himberg</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hyvarinen</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2003</year>). &#x201c;<article-title>Icasso: software for investigating the reliability of ICA estimates by clustering and visualization</article-title>,&#x201d; in <source>2003 IEEE XIII Workshop on Neural Networks for Signal Processing (IEEE Cat. No.03TH8718), Toulouse, France</source>, <fpage>259</fpage>&#x2013;<lpage>268</lpage>. <pub-id pub-id-type="doi">10.1109/NNSP.2003.1318025</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jia</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Guan</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Bo</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Barnes</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Gene ranking of RNAseq data via discriminant non-negative matrix factorization</article-title>. <source>PloS One</source> <volume>10</volume> (<issue>9</issue>), <fpage>e0137782</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0137782</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kairov</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Karpenyuk</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Ramanculov</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Zinovyev</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Network analysis of gene lists for finding reproducible prognostic breast cancer gene signatures</article-title>. <source>Bioinformation</source> <volume>8</volume> (<issue>16</issue>), <fpage>773</fpage>&#x2013;<lpage>776</lpage>. <pub-id pub-id-type="doi">10.6026/97320630008773</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Laing</surname>
<given-names>A. G.</given-names>
</name>
<name>
<surname>Lorenc</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Del Molino Del Barrio</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Das</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Fish</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Monin</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>A dynamic COVID-19 immune signature includes associations with poor prognosis</article-title>. <source>Nat. Med.</source> <volume>26</volume> (<issue>10</issue>), <fpage>1623</fpage>&#x2013;<lpage>1635</lpage>. <pub-id pub-id-type="doi">10.1038/s41591-020-1038-6</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lam</surname>
<given-names>M. T. Y.</given-names>
</name>
<name>
<surname>Duttke</surname>
<given-names>S. H.</given-names>
</name>
<name>
<surname>Odish</surname>
<given-names>M. F.</given-names>
</name>
<name>
<surname>Le</surname>
<given-names>H. D.</given-names>
</name>
<name>
<surname>Hansen</surname>
<given-names>E. A.</given-names>
</name>
<name>
<surname>Nguyen</surname>
<given-names>C. T.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Dynamic activity in cis-regulatory elements of leukocytes identifies transcription factor activation and stratifies COVID-19 severity in ICU patients</article-title>. <source>Cell Rep. Med.</source> <volume>4</volume> (<issue>2</issue>), <fpage>100935</fpage>. <pub-id pub-id-type="doi">10.1016/j.xcrm.2023.100935</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Langfelder</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Horvath</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>WGCNA: an R package for weighted correlation network analysis</article-title>. <source>BMC Bioinforma.</source> <volume>9</volume> (<issue>1</issue>), <fpage>559</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-9-559</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Mou</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Xue</surname>
<given-names>W.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Keratin gene signature expression drives epithelial-mesenchymal transition through enhanced TGF-&#x3b2; signaling pathway activation and correlates with adverse prognosis in lung adenocarcinoma</article-title>. <source>Heliyon</source> <volume>10</volume> (<issue>3</issue>), <fpage>e24549</fpage>. <pub-id pub-id-type="doi">10.1016/j.heliyon.2024.e24549</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>X.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Interleukin-8 as a biomarker for disease prognosis of coronavirus disease-2019 patients</article-title>. <source>Front. Immunol.</source> <volume>11</volume>, <fpage>602395</fpage>. <pub-id pub-id-type="doi">10.3389/fimmu.2020.602395</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>T cell response in patients with COVID-19</article-title>. <source>Blood Sci.</source> <volume>2</volume> (<issue>03</issue>), <fpage>76</fpage>&#x2013;<lpage>78</lpage>. <pub-id pub-id-type="doi">10.1097/bs9.0000000000000050</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lonsdale</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Thomas</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Salvatore</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Phillips</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Lo</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Shad</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>The genotype-tissue expression (GTEx) project</article-title>. <source>Nat. Genet.</source> <volume>45</volume> (<issue>6</issue>), <fpage>580</fpage>&#x2013;<lpage>585</lpage>. <pub-id pub-id-type="doi">10.1038/ng.2653</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Papayannopoulos</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Neutrophil extracellular traps in immunity and disease</article-title>. <source>Nat. Rev. Immunol.</source> <volume>18</volume> (<issue>2</issue>), <fpage>134</fpage>&#x2013;<lpage>147</lpage>. <pub-id pub-id-type="doi">10.1038/nri.2017.105</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reich</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Liefeld</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Gould</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lerner</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Tamayo</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Mesirov</surname>
<given-names>J. P.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>GenePattern 2.0</article-title>. <source>Nat. Genet.</source> <volume>38</volume> (<issue>5</issue>), <fpage>500</fpage>&#x2013;<lpage>501</lpage>. <pub-id pub-id-type="doi">10.1038/ng0506-500</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sastry</surname>
<given-names>A. V.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Szubin</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Hefner</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>The <italic>Escherichia coli</italic> transcriptome mostly consists of independently regulated modules</article-title>. <source>Nat. Commun.</source> <volume>10</volume> (<issue>1</issue>), <fpage>5536</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-019-13483-w</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Satopaa</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Albrecht</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Irwin</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Raghavan</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2011</year>). &#x201c;<article-title>Finding a &#x201c;kneedle&#x201d; in a haystack: detecting knee points in system behavior</article-title>,&#x201d; in <source>2011 31st international conference on distributed computing systems workshops</source> (<publisher-name>IEEE</publisher-name>), <fpage>166</fpage>&#x2013;<lpage>171</lpage>.</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schulte-Schrepping</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Reusch</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Paclik</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Ba&#xdf;ler</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Schlickeiser</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>B.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Severe COVID-19 is marked by a dysregulated myeloid cell compartment</article-title>. <source>Cell</source> <volume>182</volume> (<issue>6</issue>), <fpage>1419</fpage>&#x2013;<lpage>1440.e23</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2020.08.001</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sprenkeler</surname>
<given-names>E. G.</given-names>
</name>
<name>
<surname>Tool</surname>
<given-names>A. T.</given-names>
</name>
<name>
<surname>Henriet</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>van Bruggen</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Kuijpers</surname>
<given-names>T. W.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Formation of neutrophil extracellular traps requires actin cytoskeleton rearrangements</article-title>. <source>Blood, J. Am. Soc. Hematol.</source> <volume>139</volume> (<issue>21</issue>), <fpage>3166</fpage>&#x2013;<lpage>3180</lpage>. <pub-id pub-id-type="doi">10.1182/blood.2021013565</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Subramanian</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Tamayo</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Mootha</surname>
<given-names>V. K.</given-names>
</name>
<name>
<surname>Mukherjee</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ebert</surname>
<given-names>B. L.</given-names>
</name>
<name>
<surname>Gillette</surname>
<given-names>M. A.</given-names>
</name>
<etal/>
</person-group> (<year>2005</year>). <article-title>Gene set enrichment analysis: a knowledge-based approach for interpreting genome-wide expression profiles</article-title>. <source>Proc. Natl. Acad. Sci. U. S. A.</source> <volume>102</volume> (<issue>43</issue>), <fpage>15545</fpage>&#x2013;<lpage>15550</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.0506580102</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Taniguchi-Ponciano</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Vadillo</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Mayani</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Gonzalez-Bonilla</surname>
<given-names>C. R.</given-names>
</name>
<name>
<surname>Torres</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Majluf</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Increased expression of hypoxia-induced factor 1&#x3b1; mRNA and its related genes in myeloid blood cells from critically ill COVID-19 patients</article-title>. <source>Ann. Med.</source> <volume>53</volume> (<issue>1</issue>), <fpage>197</fpage>&#x2013;<lpage>207</lpage>. <pub-id pub-id-type="doi">10.1080/07853890.2020.1858234</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Carver</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Vanheyningen</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Parkin</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Garmire</surname>
<given-names>L. X.</given-names>
</name>
<etal/>
</person-group> (<year>2022b</year>). <article-title>Heterogeneity of neutrophils and inflammatory responses in patients with COVID-19 and healthy controls</article-title>. <source>Front. Immunol.</source> <volume>13</volume>, <fpage>970287</fpage>. <pub-id pub-id-type="doi">10.3389/fimmu.2022.970287</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>J. J.</given-names>
</name>
</person-group> (<year>2022a</year>). <article-title>Association of pyroptosis and severeness of COVID-19 as revealed by integrated single-cell transcriptome data analysis</article-title>. <source>ImmunoInformatics</source> <volume>6</volume>, <fpage>100013</fpage>. <pub-id pub-id-type="doi">10.1016/j.immuno.2022.100013</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Xia</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Comprehensive proteomic profiling of lung adenocarcinoma: development and validation of an innovative prognostic model</article-title>. <source>Transl. Cancer Res.</source> <volume>13</volume> (<issue>5</issue>), <fpage>2187</fpage>&#x2013;<lpage>2207</lpage>. <pub-id pub-id-type="doi">10.21037/tcr-23-1940</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>B. W.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>X. R.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>P. W.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>Y. P.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A geometric deep learning framework for drug repositioning over heterogeneous information networks</article-title>. <source>Briefings Bioinforma.</source> <volume>23</volume> (<issue>6</issue>), <fpage>bbac384</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbac384</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>B. W.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>X. R.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>D. X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>G. D.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>P. W.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>A heterogeneous information network learning model with neighborhood-level structural representation for predicting lncRNA-miRNA interactions</article-title>. <source>Comput. Struct. Biotechnol. J.</source> <volume>23</volume>, <fpage>2924</fpage>&#x2013;<lpage>2933</lpage>. <pub-id pub-id-type="doi">10.1016/j.csbj.2024.06.032</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>B. W.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>X. R.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>D. X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>G. D.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>P. W.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>Regulation-aware graph learning for drug repositioning over heterogeneous biological network</article-title>. <source>Inf. Sci.</source> <volume>686</volume>, <fpage>121360</fpage>. <pub-id pub-id-type="doi">10.1016/j.ins.2024.121360</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zuo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yalavarthi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Gockman</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Zuo</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Madison</surname>
<given-names>J. A.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Neutrophil extracellular traps in COVID-19</article-title>. <source>JCI insight</source> <volume>5</volume> (<issue>11</issue>), <fpage>e138999</fpage>. <pub-id pub-id-type="doi">10.1172/jci.insight.138999</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zuo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zuo</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Yalavarthi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Gockman</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Madison</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Neutrophil extracellular traps and thrombosis in COVID-19</article-title>. <source>J. Thrombosis Thrombolysis</source> <volume>51</volume>, <fpage>446</fpage>&#x2013;<lpage>453</lpage>. <pub-id pub-id-type="doi">10.1007/s11239-020-02324-z</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>