<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Big Data</journal-id>
<journal-title-group>
<journal-title>Frontiers in Big Data</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Big Data</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2624-909X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fdata.2026.1782461</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>A genetic algorithm-based framework for online sparse feature selection in data streams</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Liu</surname> <given-names>Guanyu</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Liu</surname> <given-names>Jinhang</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>He</surname> <given-names>Guifan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Liu</surname> <given-names>Yifan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Bai</surname> <given-names>Huabo</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Zhou</surname> <given-names>Min</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/3338175"/>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>College of Computer and Information Science, Southwest University</institution>, <city>Chongqing</city>, <country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>PetroChina Qinghai Oilfield Company</institution>, <city>Qinghai</city>, <country country="cn">China</country></aff>
<aff id="aff3"><label>3</label><institution>Office of Informatization Construction, Southwest University</institution>, <city>Chongqing</city>, <country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>&#x0002A;</label>Correspondence: Min Zhou, <email xlink:href="mailto:zhoumin@swu.edu.cn">zhoumin@swu.edu.cn</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-09">
<day>09</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>9</volume>
<elocation-id>1782461</elocation-id>
<history>
<date date-type="received">
<day>07</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="rev-recd">
<day>18</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>20</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2026 Liu, Liu, He, Liu, Bai and Zhou.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Liu, Liu, He, Liu, Bai and Zhou</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-09">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>High-dimensional streaming data implementations commonly utilize online streaming feature selection (OSFS) techniques. In practice, however, incomplete data due to equipment failures and technical constraints often poses a significant challenge. Online Sparse Streaming Feature Selection (OS<sup>2</sup>FS) tackles this issue by performing missing data imputation via latent factor analysis. Nevertheless, existing OS<sup>2</sup>FS approaches exhibit considerable limitations in feature evaluation, resulting in degraded performance. To address these shortcomings, this paper introduces a novel genetic algorithm-based online sparse streaming feature selection (GA-OS<sup>2</sup>FS) in data streams, which integrates two key innovations: (1) imputation of missing values using a latent factor analysis model, and (2) application of genetic algorithm to assess feature importance. Comprehensive experiments conducted on six real-world datasets show that GA-OS<sup>2</sup>FS surpasses state-of-the-art OSFS and OS<sup>2</sup>FS methods, consistently attaining higher accuracy through the selection of optimal feature subsets.</p></abstract>
<kwd-group>
<kwd>feature selection</kwd>
<kwd>genetic algorithm</kwd>
<kwd>latent factor analysis</kwd>
<kwd>missing data</kwd>
<kwd>online learning</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work was supported by the Key Project of Chongqing Technology Innovation and Application Development (No. CSTB2023TIAD-KPX0037, No. CSTB2025TIAD-KPX0027).</funding-statement>
</funding-group>
<counts>
<fig-count count="2"/>
<table-count count="6"/>
<equation-count count="19"/>
<ref-count count="63"/>
<page-count count="12"/>
<word-count count="9461"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Data Mining and Management</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>The rapid advancement of information technology has led to the widespread generation of high-dimensional data characterized by multiple levels, granularities, and modalities. This complexity poses significant challenges to foundational technologies in fields such as artificial intelligence, data management, communication, and storage (<xref ref-type="bibr" rid="B52">Yao et al., 2022</xref>; <xref ref-type="bibr" rid="B33">Ram&#x000ED;rez-Gallego et al., 2018</xref>; <xref ref-type="bibr" rid="B20">Li Z. et al., 2025</xref>). To address the issues associated with high-dimensional data, feature selection has proven to be a highly effective technique (<xref ref-type="bibr" rid="B5">Chandrashekar and Sahin, 2014</xref>; <xref ref-type="bibr" rid="B4">Casmiry et al., 2025</xref>; <xref ref-type="bibr" rid="B36">Wang et al., 2025</xref>; <xref ref-type="bibr" rid="B8">Chen et al., 2025</xref>). In recent years, a diverse array of feature selection methodologies has emerged (<xref ref-type="bibr" rid="B15">Kundu and Mitra, 2017</xref>; <xref ref-type="bibr" rid="B51">Yang et al., 2018</xref>; <xref ref-type="bibr" rid="B2">Albattah and Khan, 2025</xref>), which can be broadly categorized into filter-based, wrapper-based (<xref ref-type="bibr" rid="B50">Xue et al., 2018</xref>), and embedded approaches (<xref ref-type="bibr" rid="B49">Xue et al., 2016</xref>). Furthermore, in the context of big data applications, the feature space frequently expands dynamically, potentially to an infinite scale (<xref ref-type="bibr" rid="B29">Ni et al., 2017</xref>; <xref ref-type="bibr" rid="B10">Ditzler et al., 2018</xref>). This reality has driven the development of Online Streaming Feature Selection (OSFS). For example, <xref ref-type="bibr" rid="B45">Wu et al. (2013)</xref> pioneered an OSFS framework utilizing online relevance and redundancy analysis. Their model classifies incoming features into strongly relevant, weakly relevant, and irrelevant groups, ultimately selecting features that are relevant (strongly or weakly) and non-redundant. Subsequently, <xref ref-type="bibr" rid="B53">Yu et al. (2016)</xref> introduced the SAOLA model, which extends this concept by evaluating the pairwise relationships between streaming features through a specific mechanism.</p>
<p>However, most existing Online Streaming Feature Selection (OSFS) models are formulated under the assumption of complete feature streams, where all incoming data points are fully observed without any missing values. In real-world scenarios, this assumption often fails to hold, as streaming features frequently contain substantial missing data due to a range of unforeseen factors. For instance, in single-cell sequencing, technological constraints make it challenging to profile every cell comprehensively, preventing reliable weight assignment for all measured entities (<xref ref-type="bibr" rid="B3">Badsha et al., 2020</xref>). Similarly, in clinical settings, complete patient data collection is often hindered by equipment failures or procedural inconsistencies (<xref ref-type="bibr" rid="B14">Idri et al., 2018</xref>). This prevalent issue gives rise to the challenge of Online Sparse Streaming Feature Selection (OS<sup>2</sup>FS), which addresses the critical question of how to reliably select features from a stream that is inherently sparse and contains significant missing entries.</p>
<p>In real-world recommendation systems, features&#x02014;including user behavior logs and product attributes&#x02013;are often received as continuous streams. Since users typically interact with only a fraction of available items, missing data is commonplace. These features are also highly interdependent, complicating the decision of which should be retained or removed to fully and precisely model user interests. Consequently, identifying the most representative feature subset is essential for delivering prompt and relevant recommendations. Traditional approaches to feature evaluation are largely designed for fully observed feature streams and tend to overlook the inaccuracies arising from the imputation of missing data. Such neglect is particularly consequential, as feature selection constitutes an NP-hard binary discrete optimization problem. Evolutionary computation (EC) techniques are notably effective in overcoming these challenges, providing robust solutions for problems of high combinatorial complexity. The core advantages of genetic algorithms lie in their global exploration capability, low dependency on problem assumptions, coding flexibility, and ease of parallelization, making them particularly suitable for complex optimization problems. Furthermore, its strong global search ability helps prevent convergence to local optima, increasing the likelihood of discovering feature subsets that optimize the trade-off between model accuracy and feature sparsity. These advantages have led to the widespread adoption of GA-based strategies in feature subset selection tasks. Therefore, this paper proposes a novel genetic algorithm-based online sparse streaming feature selection (GA-OS<sup>2</sup>FS) in data streams. In smart factories, GA-OS<sup>2</sup>FS processes incomplete, high-dimensional streaming data from sensors (e.g., vibration, temperature) by imputing missing values via latent factor analysis and dynamically selecting the most discriminative features (e.g., failure-indicative patterns) using a genetic algorithm. This enables real-time, accurate anomaly detection and predictive maintenance, minimizing unplanned downtime. For IoT-based energy management systems, GA-OS<sup>2</sup>FS handles sparse and irregular streaming data from distributed sensors (e.g., occupancy, temperature). It recovers missing values and employs genetic algorithm-based evaluation to identify and retain features most relevant to energy consumption. This results in an optimized feature subset for real-time control of HVAC and lighting systems, enhancing energy efficiency in smart buildings.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Related work</title>
<p>Online Streaming Feature Selection (OSFS) models, which process feature streams in real time, have garnered significant research interest. For instance, <xref ref-type="bibr" rid="B30">Perkins and Theiler (2003)</xref> introduced Grafting, a regularized online feature selection framework. However, it requires careful tuning of regularization parameters prior to feature selection, making it less adaptable to scenarios with an unknown or expanding feature space. <xref ref-type="bibr" rid="B57">Zhou et al. (2006)</xref> proposed the Alpha-investing strategy, capable of handling infinite feature streams, though it does not account for redundancy among the selected features. <xref ref-type="bibr" rid="B45">Wu et al. (2013)</xref> categorized incoming features into strongly relevant, weakly relevant, and irrelevant groups, developing two OSFS variants: OSFS and Fast-OSFS. The latter specifically addresses redundancy between newly arrived features and the already selected subset. Building on mutual information, <xref ref-type="bibr" rid="B53">Yu et al. (2016)</xref> presented the SAOLA model, which evaluates feature relevance based on pairwise interactions. To capture more complex dependencies, <xref ref-type="bibr" rid="B61">Zhou et al. (2021b)</xref> developed the OGSFS-FI model by examining interactions between feature groups. This was later extended to the SFS-FI model <xref ref-type="bibr" rid="B60">Zhou et al. (2021a)</xref>, which can identify features involved in multi-way interactions, including two-way, three-way, and higher-order relationships. Furthermore, to better model dynamic decision-making, <xref ref-type="bibr" rid="B62">Zhou et al. (2022)</xref> applied the three-way decision (3WD) principle to propose the OSSFS-DD model, which computes partition thresholds according to 3WD theory to mitigate decision risk.</p>
<p>In parallel, rough set theory has proven to be a valuable framework for Online Streaming Feature Selection (OSFS). For example, <xref ref-type="bibr" rid="B59">Zhou et al. (2019b)</xref> introduced the OFS-A3M model, which employs a neighborhood rough set relation with adaptive neighbors to identify features that exhibit high relevance, strong dependency, and low redundancy. This work was later extended to the OFS-Density model (<xref ref-type="bibr" rid="B58">Zhou et al., 2019a</xref>), where a novel adaptive density-based neighborhood relation is used to analyze domain characteristics and configure model parameters. In a different approach, <xref ref-type="bibr" rid="B24">Luo et al. (2023)</xref> leveraged the concept of rough hypercuboids to develop the RHDOFS model. Similarly, <xref ref-type="bibr" rid="B34">Shu et al. (2024)</xref> proposed the ANOHFS model, which relies on an adaptive neighborhood mechanism to effectively identify closely related feature hierarchies within high-dimensional data. <xref ref-type="bibr" rid="B63">Zhuo et al. (2024)</xref> proposed an online feature selection method for dynamic feature spaces, with innovations in Gaussian Copula-based correlation modeling, real-time tree-ensemble selection, and geometric inference for unlabeled data. <xref ref-type="bibr" rid="B32">Qiu et al. (2025)</xref> proposed an online confidence learning algorithm for noisy labeled features. It tackles instance distribution shifts and label noise in data streams by employing online confidence inference and geometric structure learning. Although current OSFS models play a crucial role in dynamically selecting streaming features, to our knowledge, they still lack the ability to effectively handle sparse streaming features. Missing data tends to raise the computational cost of OSFS models and may also lead to the selection of less relevant or redundant features. Sparse streaming features often exhibit weak associations with other features or the target variable, complicating the reliable evaluation of their importance. Moreover, they can cause uneven data distributions, where certain sample values appear very rarely&#x02013;a situation that may undermine the overall performance of OSFS. While these methods demonstrate considerable effectiveness in tackling conventional OSFS problems, they share a common limitation: all are designed under the assumption of complete feature streams and do not account for missing data, thus leaving the challenges of OS<sup>2</sup>FS scenarios unaddressed.</p>
<p>Latent factor analysis (LFA) has established itself as an effective approach for estimating missing data (<xref ref-type="bibr" rid="B39">Wu et al., 2022</xref>). The method operates by mapping the observed entries of a high-dimensional, incomplete matrix onto latent representations associated with its rows and columns (<xref ref-type="bibr" rid="B56">Zhang Z. et al., 2017</xref>; <xref ref-type="bibr" rid="B55">Zhang J. D. et al., 2017</xref>). A learning objective is formulated to quantify the discrepancy between the original observed values and their reconstructions (<xref ref-type="bibr" rid="B25">Luo et al., 2018</xref>; <xref ref-type="bibr" rid="B11">Gong et al., 2018</xref>). Subsequently, the model constructs a complete, low-rank approximation of the target incomplete matrix by minimizing this generalized error, as defined by the learning objective (<xref ref-type="bibr" rid="B27">Luo et al., 2021b</xref>; <xref ref-type="bibr" rid="B42">Wu et al., 2021</xref>).</p>
</sec>
<sec id="s3">
<label>3</label>
<title>Preliminaries</title>
<sec>
<label>3.1</label>
<title>Online streaming feature selection</title>
<p>The Online Streaming Feature Selection (OSFS) model provides an effective approach for identifying the optimal subset of streaming features, which is accomplished through online relevance analysis and online redundancy analysis. Consider a streaming feature set <italic>F</italic> &#x0003D; {<italic>F</italic><sub>1</sub>, <italic>F</italic><sub>2</sub>, ..., <italic>F</italic><sub><italic>T</italic></sub>} and a label set <inline-formula><mml:math id="M1"><mml:mi>C</mml:mi><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mi>M</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">]</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mtext>T</mml:mtext></mml:mrow></mml:msup></mml:math></inline-formula>, where each feature <inline-formula><mml:math id="M2"><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>M</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">]</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> contains <italic>M</italic> samples, with <italic>t</italic> &#x02208; 1, 2, ..., <italic>T</italic>.</p>
<p>Suppose that two features <italic>F</italic><sub><italic>p</italic></sub> and <italic>F</italic><sub><italic>q</italic></sub>, where <italic>p</italic> &#x02260; <italic>q</italic>, <italic>p, q</italic> &#x02208; 1, 2, ..., <italic>T</italic>, if <italic>P</italic>(<italic>F</italic><sub><italic>p</italic></sub>|<italic>F</italic><sub><italic>q</italic></sub>, <italic>X</italic>) &#x0003D; <italic>P</italic>(<italic>F</italic><sub><italic>p</italic></sub>, <italic>X</italic>) or <italic>P</italic>(<italic>F</italic><sub><italic>p</italic></sub>|<italic>F</italic><sub><italic>q</italic></sub>, <italic>X</italic>) &#x0003D; <italic>P</italic>(<italic>F</italic><sub><italic>q</italic></sub>, <italic>X</italic>), such that <italic>F</italic><sub><italic>p</italic></sub> and <italic>F</italic><sub><italic>q</italic></sub> are conditionally independent to the subset <italic>X</italic> &#x02286; <italic>F</italic>.</p>
<p>For a streaming feature <italic>F</italic><sub><italic>t</italic></sub> at the time stamp <italic>t</italic>,</p>
<list list-type="simple">
<list-item><p>a) if &#x02200;&#x003C2; &#x02286; <italic>F</italic> &#x02212; <italic>F</italic><sub><italic>t</italic></sub> s.t. <italic>P</italic>(<italic>C</italic>|&#x003C2;, <italic>F</italic><sub><italic>t</italic></sub>) &#x02260; <italic>P</italic>(<italic>C</italic>|&#x003C2;), then decide <italic>F</italic><sub><italic>t</italic></sub> is strong relevance;</p></list-item>
<list-item><p>b) if &#x02203;&#x003C2; &#x02286; <italic>F</italic> &#x02212; <italic>F</italic><sub><italic>t</italic></sub> s.t. <italic>P</italic>(<italic>C</italic>|&#x003C2;, <italic>F</italic><sub><italic>t</italic></sub>) &#x02260; <italic>P</italic>(<italic>C</italic>|&#x003C2;), then decide <italic>F</italic><sub><italic>t</italic></sub> is weak relevance;</p></list-item>
<list-item><p>c) if &#x02200;&#x003C2; &#x02286; <italic>F</italic> &#x02212; <italic>F</italic><sub><italic>t</italic></sub> s.t. <italic>P</italic>(<italic>C</italic>|&#x003C2;, <italic>F</italic><sub><italic>t</italic></sub>) &#x0003D; <italic>P</italic>(<italic>C</italic>|&#x003C2;), then decide <italic>F</italic><sub><italic>t</italic></sub> is irrelevance.</p></list-item>
</list>
<p>Given a relevant feature <italic>F</italic><sub><italic>t</italic></sub>(<italic>M</italic>(<italic>F</italic><sub><italic>t</italic></sub>) &#x02209; <italic>M</italic>(<italic>C</italic>)<sub><italic>t</italic></sub>), and the redundant set <italic>X</italic><sub><italic>F</italic></sub> is denoted as follows:</p>
<disp-formula id="EQ1"><mml:math id="M3"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mo>&#x02200;</mml:mo><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mi>F</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:mi>M</mml:mi><mml:msub><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0222A;</mml:mo><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:mo>,</mml:mo><mml:mo>&#x02203;</mml:mo><mml:mi>&#x003B6;</mml:mi><mml:mo>&#x02286;</mml:mo><mml:mi>M</mml:mi><mml:msub><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0222A;</mml:mo><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:mo>-</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mi>F</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">s.t.</mml:mtext><mml:mtext>&#x02003;</mml:mtext><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>C</mml:mi><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mi>F</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mi>&#x003B6;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>C</mml:mi><mml:mo>|</mml:mo><mml:mi>&#x003B6;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(1)</label></disp-formula>
<p>where <italic>M</italic>(&#x000B7;) denotes Markov blanket.</p>
</sec>
<sec>
<label>3.2</label>
<title>Latent factor analysis</title>
<p>The Latent Factor Analysis (LFA) model plays a significant role in pre-estimating sparse matrices. This section begins by presenting the formal definition of the LFA model (<xref ref-type="bibr" rid="B12">Hancer et al., 2022</xref>; <xref ref-type="bibr" rid="B37">Wang et al., 2023</xref>).</p>
<p>Let R<sup><italic>M</italic> &#x000D7; <italic>H</italic></sup> be a sparse matrix, and an LFA model trains two latent factor matrices <italic>U</italic><sup><italic>M</italic> &#x000D7; <italic>L</italic></sup> and <italic>V</italic><sup><italic>H</italic> &#x000D7; <italic>L</italic></sup> via the know entries, which precisely represent the rank-<italic>L</italic> approximation <inline-formula><mml:math id="M65"><mml:mover accent='true'><mml:mi>R</mml:mi><mml:mo>&#x0005E;</mml:mo></mml:mover></mml:math></inline-formula> of R, where <inline-formula><mml:math id="M66"><mml:mover accent='true'><mml:mi>R</mml:mi><mml:mo>&#x0005E;</mml:mo></mml:mover></mml:math></inline-formula> is formulated as <inline-formula><mml:math id="M67"><mml:mover accent='true'><mml:mi>R</mml:mi><mml:mo>&#x0005E;</mml:mo></mml:mover></mml:math></inline-formula> &#x0003D; <italic>UV</italic><sup>T</sup>, <italic>L</italic> is the dimension of <italic>U</italic> and <italic>V</italic>, and <italic>L</italic> &#x0226A; min|<italic>M</italic>|, |<italic>H</italic>| (<xref ref-type="bibr" rid="B19">Li et al., 2024</xref>; <xref ref-type="bibr" rid="B13">Hancer et al., 2025</xref>).</p>
<p>The error of the LFA model is then formulated as:</p>
<disp-formula id="EQ2"><mml:math id="M4"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>E</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>U</mml:mi><mml:mo>,</mml:mo><mml:mi>V</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>r</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:mo>&#x0039B;</mml:mo></mml:mrow></mml:munder></mml:mstyle><mml:mi>e</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mo>&#x00394;</mml:mo><mml:mi>m</mml:mi><mml:mo>.</mml:mo><mml:mi>h</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mo>&#x00394;</mml:mo><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi><mml:mo>=</mml:mo><mml:mi>r</mml:mi><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi><mml:mo>-</mml:mo><mml:mover accent="true"><mml:mrow><mml:mi>r</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(2)</label></disp-formula>
<p>where &#x0039B; denotes the known data of R, <italic>e</italic>(&#x000B7;) calculates the error between <italic>r</italic><sub><italic>m, h</italic></sub> and <inline-formula><mml:math id="M5"><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>r</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, <italic>r</italic><sub><italic>m, h</italic></sub> is <italic>m</italic>-th row and <italic>h</italic>-th column of R, the <inline-formula><mml:math id="M6"><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>r</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the predicted value for <italic>r</italic><sub><italic>m, h</italic></sub>, <inline-formula><mml:math id="M7"><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>r</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mo>&#x000B7;</mml:mo></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> stands for predictive function.</p>
<p>Incorporating regularization is essential for the LFA model to prevent over-fitting (<xref ref-type="bibr" rid="B38">Wu et al., 2023</xref>; <xref ref-type="bibr" rid="B17">Li et al., 2023</xref>). Thus, by integrating regularization into <xref ref-type="disp-formula" rid="EQ2">Equation 2</xref>, the following objective function is derived:</p>
<disp-formula id="EQ3"><mml:math id="M8"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>&#x003B5;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>U</mml:mi><mml:mo>,</mml:mo><mml:mi>V</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>r</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:mo>&#x0039B;</mml:mo></mml:mrow></mml:munder></mml:mstyle><mml:mi>e</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>r</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>r</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mtext>&#x000A0;</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mfrac><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:mfrac><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mo stretchy="false">|</mml:mo><mml:mi>U</mml:mi><mml:msubsup><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mo>&#x0002B;</mml:mo><mml:mo stretchy="false">|</mml:mo><mml:mi>V</mml:mi><mml:msubsup><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(3)</label></disp-formula>
<p>where |&#x000B7;|<sub><italic>F</italic></sub> computes the Frobenius norm, &#x003BB; represents the regularization coefficient.</p>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Proposed algorithm</title>
<sec>
<label>4.1</label>
<title>Problem of GA-OS<sup>2</sup>FS</title>
<p>Consider a collection of sparse streaming features denoted by <inline-formula><mml:math id="M9"><mml:msup><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula>, which is postulated to possess a missing data rate of &#x003C1;. Here, &#x003C1; &#x0003D; 1 &#x02212; |&#x0039B;|/<italic>M</italic>, with |&#x000B7;| representing the cardinality of a set. From time point <italic>t</italic> to <italic>t</italic> &#x0002B; <italic>H</italic>-1, sparse streaming features <italic>F</italic>&#x02032;<italic>t, F</italic>&#x02032;<italic>t</italic> &#x0002B; 1, ..., <italic>F</italic>&#x02032;<italic>t</italic> &#x0002B; <italic>H</italic> &#x02212; 1 are generated sequentially and collected into an R<sup><italic>M</italic> &#x000D7; <italic>H</italic></sup> buffer. This buffer, of size <italic>H</italic>, forms the sparse streaming feature matrix <inline-formula><mml:math id="M10"><mml:msub><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">B</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>H</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula>. Subsequently, a completed streaming feature matrix, expressed as <inline-formula><mml:math id="M11"><mml:msub><mml:mrow><mml:mover accent='true'><mml:mi>B</mml:mi><mml:mo>&#x0005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="false">{</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>H</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">}</mml:mo></mml:mrow></mml:math></inline-formula>, is estimated based on the observed known data.</p>
<p>The principal objective of the GA-OS<sup>2</sup>FS method is to identify the optimal feature subset. Consequently, the GA-OS<sup>2</sup>FS framework is designed to address the following optimization problem:</p>
<disp-formula id="EQ4"><mml:math id="M12"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo class="qopname">arg</mml:mo><mml:mo class="qopname">max</mml:mo></mml:mrow><mml:mrow><mml:mi>&#x003C3;</mml:mi><mml:mo>&#x02286;</mml:mo><mml:msub><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:munder></mml:mstyle><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>C</mml:mi><mml:mo>|</mml:mo><mml:mi>&#x003C3;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(4)</label></disp-formula>
</sec>
<sec>
<label>4.2</label>
<title>The framework of GA-OS<sup>2</sup>FS</title>
<p>The framework of GA-OS<sup>2</sup>FO consists of two steps: first, estimating missing values, and then assessing feature importance.</p>
<sec>
<label>4.2.1</label>
<title>Estimate sparse streaming features in advance</title>
<p>In practical applications, data quality is often difficult to guarantee due to missing feature values, making it exceptionally challenging to screen high-quality features from feature streams. Taking a medical monitoring system as an example, if a sensor fails and causes data loss, traditional OSFS models may transmit erroneous signals to control devices, which could ultimately endanger patients&#x00027; lives. Therefore, preprocessing the data before feature selection to impute missing entries is of crucial importance. The LFA model holds significant value for missing-data imputation, as it completes missing values by mapping the sparse matrix onto two latent factor matrices (<xref ref-type="bibr" rid="B18">Li J. et al., 2025</xref>; <xref ref-type="bibr" rid="B6">Chen J. et al., 2024</xref>; <xref ref-type="bibr" rid="B54">Yuan et al., 2025</xref>). Traditional methods&#x02014;such as mean imputation and matrix factorization&#x02014;typically fill missing values based on observed data and rely on assumptions such as linearity or local similarity, which limits their ability to capture complex non-linear relationships in high-dimensional or sparse streaming data (<xref ref-type="bibr" rid="B7">Chen et al., 2023</xref>; <xref ref-type="bibr" rid="B47">Xu et al., 2025b</xref>). In contrast, the LFA model can capture the underlying structure of the data through latent space modeling, thereby handling complex dependencies and non-linear patterns more effectively (<xref ref-type="bibr" rid="B21">Liao et al., 2025</xref>; <xref ref-type="bibr" rid="B22">Lin M. et al., 2025</xref>).</p>
<p>The complete latent features extracted from incomplete data can be used for missing-value imputation, classification, clustering, and other tasks (<xref ref-type="bibr" rid="B41">Wu et al., 2025b</xref>; <xref ref-type="bibr" rid="B28">Lyu et al., 2026</xref>). The extraction methods are mainly divided into linear and non-linear feature extraction. Linear feature extraction mostly employs LFA-based models that rely on matrix factorization. When dealing with sparse data, such models aim to construct a low-rank approximation of the high-dimensional incomplete matrix (<xref ref-type="bibr" rid="B43">Wu et al., 2024</xref>; <xref ref-type="bibr" rid="B44">Wu H. et al., 2025</xref>). They map the known entries of the target high-dimensional incomplete matrix to its row and column nodes, formulate a learning objective that measures the discrepancy between the actual data and the estimated data, and thereby generate a complete low-rank approximation matrix of the target high-dimensional incomplete matrix. An optimizer is then used to minimize linear error, achieving efficient representation (<xref ref-type="bibr" rid="B40">Wu et al., 2025a</xref>; <xref ref-type="bibr" rid="B31">Qin et al., 2024</xref>; <xref ref-type="bibr" rid="B46">Xu et al., 2025a</xref>; <xref ref-type="bibr" rid="B9">Chen M. et al., 2024</xref>).</p>
<p>The initialization procedure subjects both matrices <italic>U</italic> and <italic>V</italic> to small random values. These values, generated by scaling a random permutation to a vicinity close to zero, serve as the non-zero starting point for the iterative algorithm, for which initial conditions are crucial. The following illustration details the update method, taking matrix <italic>U</italic> as a representative case.</p>
<disp-formula id="EQ5"><mml:math id="M13"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>004</mml:mn><mml:mo>-</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>004</mml:mn><mml:mo>&#x000D7;</mml:mo><mml:mfrac><mml:mrow><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>d</mml:mi><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>000</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>000</mml:mn></mml:mrow></mml:mfrac><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(5)</label></disp-formula>
<p>The LFA model constructs a low-rank approximation for <italic>R</italic> (<xref ref-type="bibr" rid="B35">Tang et al., 2024</xref>; <xref ref-type="bibr" rid="B26">Luo et al., 2021a</xref>). Typically, matrices <italic>U</italic> and <italic>V</italic> are derived from <italic>R</italic> by minimizing a loss function defined by the Euclidean distance between <italic>R</italic> and <inline-formula><mml:math id="M14"><mml:mover accent="true"><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula> (<xref ref-type="bibr" rid="B48">Xu et al., 2023</xref>). Building upon <xref ref-type="disp-formula" rid="EQ2">Equations 2</xref>, <xref ref-type="disp-formula" rid="EQ3">3</xref>, the complete streaming features are predicted using the known values according to:</p>
<disp-formula id="EQ6"><mml:math id="M15"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>&#x003B5;</mml:mi><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>&#x02208;</mml:mo><mml:mo>&#x0039B;</mml:mo></mml:mrow></mml:munder></mml:mstyle><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:mfrac><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>-</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>L</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>&#x0002B;</mml:mo><mml:mfrac><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:mfrac><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>L</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msubsup><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mo>&#x0002B;</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>L</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msubsup><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(6)</label></disp-formula>
<p>Subsequently, the loss function for the <italic>m</italic>-th element <italic>f</italic><sub><italic>m, j</italic></sub> is calculated as:</p>
<disp-formula id="EQ7"><mml:math id="M16"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>&#x003B5;</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:mfrac><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>-</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>L</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>&#x0002B;</mml:mo><mml:mfrac><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:mfrac><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>L</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msubsup><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mo>&#x0002B;</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>L</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msubsup><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(7)</label></disp-formula>
<p>To solve this loss function, stochastic gradient descent (SGD) is employed (<xref ref-type="bibr" rid="B1">Ahmadian et al., 2025</xref>; <xref ref-type="bibr" rid="B23">Lin X. et al., 2025</xref>; <xref ref-type="bibr" rid="B16">Lei et al., 2024</xref>). The method computes the gradient of the loss function with respect to the combined parameters and updates them in a descending direction:</p>
<disp-formula id="EQ8"><mml:math id="M17"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none none none none none none none none none none none none none none none none none none none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02190;</mml:mo><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:mi>&#x003B7;</mml:mi><mml:mfrac><mml:mrow><mml:mi>&#x02202;</mml:mi><mml:msub><mml:mrow><mml:mi>&#x003B5;</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>&#x02202;</mml:mi><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02190;</mml:mo><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:mi>&#x003B7;</mml:mi><mml:mfrac><mml:mrow><mml:mi>&#x02202;</mml:mi><mml:msub><mml:mrow><mml:mi>&#x003B5;</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>&#x02202;</mml:mi><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(8)</label></disp-formula>
<p>From <xref ref-type="disp-formula" rid="EQ7">Equations 7</xref> and <xref ref-type="disp-formula" rid="EQ8">8</xref>, the partial derivative of the loss is derived:</p>
<disp-formula id="EQ9"><mml:math id="M18"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02190;</mml:mo><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mi>&#x003B7;</mml:mi><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>-</mml:mo><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>L</mml:mi></mml:mrow></mml:msubsup><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:mi>&#x003BB;</mml:mi><mml:mi>&#x003B7;</mml:mi><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02190;</mml:mo><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mi>&#x003B7;</mml:mi><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>-</mml:mo><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>L</mml:mi></mml:mrow></mml:msubsup><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:mi>&#x003BB;</mml:mi><mml:mi>&#x003B7;</mml:mi><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(9)</label></disp-formula>
<p>Here, &#x003B7; denotes the learning rate. <italic>U</italic> and <italic>V</italic> are optimized to minimize errors on the known values, yielding <italic>R</italic> &#x0003D; <italic>UV</italic><sup><italic>T</italic></sup>. The error between the estimated and actual data is <inline-formula><mml:math id="M19"><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:msub><mml:mrow><mml:mi>r</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>-</mml:mo><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>L</mml:mi></mml:mrow></mml:msubsup><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, i.e.,</p>
<disp-formula id="EQ10"><mml:math id="M20"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02190;</mml:mo><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mi>&#x003B7;</mml:mi><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:msub><mml:mrow><mml:mi>r</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:mi>&#x003BB;</mml:mi><mml:mi>&#x003B7;</mml:mi><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02190;</mml:mo><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mi>&#x003B7;</mml:mi><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:msub><mml:mrow><mml:mi>r</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:mi>&#x003BB;</mml:mi><mml:mi>&#x003B7;</mml:mi><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(10)</label></disp-formula>
</sec>
<sec>
<label>4.2.2</label>
<title>Online feature evaluation</title>
<p>The principal advantage of GA-OS<sup>2</sup>FS lies in its independence from missing value completion via an LFA model. The method sustains a feature subset whose fitness is evaluated through real classification accuracy. Consequently, the search process gains the capacity to tolerate and sidestep local misleading associations stemming from potential completion errors. As a prominent and widely implemented evolutionary optimization method, the GA offers several compelling advantages. GA maintains a diverse population of candidate solutions, enabling simultaneous exploration of multiple regions in the solution space. Through mechanisms such as selection, crossover, and mutation, it effectively combines and propagates beneficial gene patterns while continually introducing new variations. This population-based strategy significantly mitigates the risk of premature convergence to local optima, making GA particularly robust in navigating complex, multimodal search landscapes commonly encountered in feature selection. GA operates solely on the evaluation of candidate fitness, requiring no derivative information of the objective function. This characteristic renders it highly suitable for optimizing non-differentiable, discontinuous, or noisy objective functions&#x02014;frequently the case in feature selection where the fitness is often a classification error rate or another performance metric derived from a learning model. The evolutionary process inherently promotes solutions that achieve an optimal balance between multiple, often competing, objectives. In feature selection, fitter individuals naturally tend to be those that maximize classification performance while minimizing the number of selected features, without needing an explicitly tuned regularization parameter. This emergent trade-off helps in discovering compact, discriminative feature subsets. The evaluation of fitness for each individual in a population is independent of others, making this computational step &#x0201C;embarrassingly parallel.&#x0201D; This allows for efficient distribution across multiple processors or cores, drastically reducing wall-clock time and enhancing the scalability of GA for large-scale or high-dimensional problems. Collectively, these advantages establish Genetic Algorithms as a powerful, flexible, and efficient metaheuristic framework for tackling the inherently combinatorial and complex problem of feature selection.</p>
<p>Given a dataset <inline-formula><mml:math id="M21"><mml:mrow><mml:mi mathvariant="script">D</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:msup><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> with <italic>m</italic> samples and <italic>n</italic> features, where <inline-formula><mml:math id="M22"><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> represents the feature vector and <inline-formula><mml:math id="M23"><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:mrow><mml:mi mathvariant="script">Y</mml:mi></mml:mrow></mml:math></inline-formula> denotes the class label, the feature selection problem aims to identify an optimal subset of features <inline-formula><mml:math id="M24"><mml:mrow><mml:mi mathvariant="script">S</mml:mi></mml:mrow><mml:mo>&#x02286;</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mo>&#x02026;</mml:mo><mml:mo>,</mml:mo><mml:mi>n</mml:mi></mml:math></inline-formula> that maximizes classification performance while minimizing dimensionality. This binary optimization problem can be formulated as:</p>
<disp-formula id="EQ11"><mml:math id="M25"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo class="qopname">min</mml:mo></mml:mrow><mml:mrow><mml:mi>b</mml:mi><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:munder></mml:mstyle><mml:mi>J</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>&#x003B1;</mml:mi><mml:mo>&#x000B7;</mml:mo><mml:mi>&#x003B5;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>X</mml:mtext><mml:mo>&#x02299;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mi>y</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mi>&#x003B2;</mml:mi><mml:mo>&#x000B7;</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mo>&#x02225;</mml:mo><mml:mi>b</mml:mi><mml:mo>&#x02225;</mml:mo></mml:mrow><mml:mrow><mml:mn>0</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(11)</label></disp-formula>
<p>where <inline-formula><mml:math id="M26"><mml:mi>b</mml:mi><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x02026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mo>&#x022A4;</mml:mo></mml:mrow></mml:msup></mml:math></inline-formula> is a binary vector with <italic>b</italic><sub><italic>j</italic></sub> &#x0003D; 1 if feature <italic>j</italic> is selected, and <italic>b</italic><sub><italic>j</italic></sub> &#x0003D; 0 otherwise, <inline-formula><mml:math id="M27"><mml:mo>|</mml:mo><mml:mi>b</mml:mi><mml:mo>|</mml:mo><mml:mn>0</mml:mn><mml:mo>=</mml:mo><mml:mo>&#x02211;</mml:mo><mml:msup><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msup><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> denotes the &#x02113;<sub>0</sub>-norm counting selected features, <italic>X</italic> &#x02208; &#x0211D;<sup><italic>m</italic> &#x000D7; <italic>n</italic></sup> is the feature matrix with <italic>X</italic><italic>ij</italic> &#x0003D; <italic>xi, j</italic>, <inline-formula><mml:math id="M28"><mml:mrow><mml:mi mathvariant="script">E</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mo>&#x000B7;</mml:mo></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> represents the classification error function, &#x003B1; and &#x003B2; are weighting coefficients balancing classification accuracy and feature sparsity, &#x02299; denotes element-wise multiplication, 1<sub><italic>m</italic></sub> is an <italic>m</italic>-dimensional vector of ones.</p>
<p>Each candidate solution (chromosome) is encoded as a binary vector <italic>b</italic> &#x02208; 0, 1<sup><italic>n</italic></sup>. The initial population <inline-formula><mml:math id="M29"><mml:msup><mml:mrow><mml:mrow><mml:mi mathvariant="script">P</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x02026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> of size <italic>N</italic> is generated randomly:</p>
<disp-formula id="EQ12"><mml:math id="M30"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x0007E;</mml:mo><mml:mi>B</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>u</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi><mml:mi>i</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mn>0</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mo>&#x02200;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x02208;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:mi>N</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mo>&#x02200;</mml:mo><mml:mi>j</mml:mi><mml:mo>&#x02208;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:mi>n</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(12)</label></disp-formula>
<p>where <italic>p</italic><sub>0</sub> &#x0003D; 0.5 ensures unbiased initial exploration of the feature space.</p>
<p>The fitness evaluation metric is primarily assessed by measuring the classification error achieved using the selected features. As a wrapper-based method, this approach directly employs the performance of a target classifier&#x02014;such as support vector machine (SVM)&#x02014;to determine the quality of a candidate feature subset. The classification error serves as a direct and interpretable indicator of how well the selected features support the learning algorithm in discriminating between classes. Typically, to ensure robustness and prevent overfitting, the error is estimated via cross-validation or hold-out validation. This design aligns the feature selection process closely with the end classification task, thereby enhancing the relevance and discriminative power of the final feature subset.</p>
<p>To form the mating pool <inline-formula><mml:math id="M31"><mml:msup><mml:mrow><mml:mrow><mml:mi mathvariant="script">M</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:math></inline-formula> for generation <italic>t</italic>, individuals are selected probabilistically based on their fitness. The selection probability for chromosome <italic>b</italic><sub><italic>i</italic></sub> is:</p>
<disp-formula id="EQ13"><mml:math id="M32"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>w</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:msubsup><mml:mi>w</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mfrac><mml:mo>,</mml:mo><mml:mi>w</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>f</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>&#x003B9;</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(13)</label></disp-formula>
<p>Here, &#x003B9; &#x0003E; 0 is a small constant preventing division by zero. The cumulative distribution function is:</p>
<disp-formula id="EQ14"><mml:math id="M33"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msubsup><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:mi>N</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(14)</label></disp-formula>
<p>For each selection, a random number <italic>r</italic> &#x0007E; <italic>U</italic>(0, 1) is generated, and chromosome <italic>b</italic><sub><italic>k</italic></sub> is selected where <inline-formula><mml:math id="M34"><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mo class="qopname">min</mml:mo><mml:mi>i</mml:mi><mml:mo>:</mml:mo><mml:msubsup><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">sel</mml:mtext></mml:mstyle></mml:mrow></mml:msubsup><mml:mo>&#x02265;</mml:mo><mml:mi>r</mml:mi></mml:math></inline-formula>.</p>
<p>With probability <italic>p</italic><sub><italic>c</italic></sub>, pairs of parent chromosomes undergo single-point crossover. For parents <italic>b</italic><sub><italic>p</italic></sub> and <italic>b</italic><sub><italic>q</italic></sub>, a crossover point <italic>c</italic> is randomly selected:</p>
<disp-formula id="EQ15"><mml:math id="M35"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>c</mml:mi><mml:mo>&#x0007E;</mml:mo><mml:mi>U</mml:mi><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:mi>n</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(15)</label></disp-formula>
<p>Two offspring <inline-formula><mml:math id="M36"><mml:msubsup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> and <inline-formula><mml:math id="M37"><mml:msubsup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>q</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> are generated as:</p>
<disp-formula id="EQ16"><mml:math id="M38"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>q</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>q</mml:mi><mml:mo>,</mml:mo><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>q</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>q</mml:mi><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>q</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi><mml:mo>,</mml:mo><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(16)</label></disp-formula>
<p>If crossover is not applied (with probability 1 &#x02212; <italic>p</italic><sub><italic>c</italic></sub>), the offspring are exact copies of the parents.</p>
<p>Each gene in the offspring undergoes mutation with probability <italic>p</italic><sub><italic>m</italic></sub>. For gene <italic>b</italic><sub><italic>ij</italic></sub>:</p>
<disp-formula id="EQ17"><mml:math id="M39"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none none none none none none none none none none none none none none none none none none none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mtd><mml:mtd><mml:mtext class="textrm" mathvariant="normal">with probability</mml:mtext></mml:mtd><mml:mtd><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mtext>&#x00A0;</mml:mtext></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mtd><mml:mtd><mml:mtext class="textrm" mathvariant="normal">with probability</mml:mtext></mml:mtd><mml:mtd><mml:mn>1</mml:mn><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(17)</label></disp-formula>
<p>This operator maintains population diversity and enables exploration of new regions in the search space.</p>
<p>To guarantee monotonic improvement across generations, the algorithm employs an elitism strategy. The best chromosome <inline-formula><mml:math id="M40"><mml:msubsup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">best</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> from generation <italic>t</italic> is preserved by replacing the worst chromosome in the offspring population <inline-formula><mml:math id="M41"><mml:mrow><mml:msup><mml:mi mathvariant="script">P</mml:mi><mml:mrow><mml:mo>&#x02032;</mml:mo><mml:mo stretchy='false'>(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>:</p>
<disp-formula id="EQ18"><mml:math id="M42"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>w</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mi>w</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mo class="qopname">arg</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo class="qopname">max</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:munder></mml:mstyle><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(18)</label></disp-formula>
<p>This ensures that:</p>
<disp-formula id="EQ19"><mml:math id="M43"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x02264;</mml:mo><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mo>&#x02200;</mml:mo><mml:mi>t</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(19)</label></disp-formula>
<p>The genetic algorithm for OS<sup>2</sup>FS begins with inputs including the feature matrix <italic>X</italic> &#x02208; &#x0211D;<sup><italic>m</italic> &#x000D7; <italic>n</italic></sup>, label vector <italic>y</italic> &#x02208; &#x0211D;<sup><italic>m</italic></sup>, population size <italic>N</italic>, maximum iterations <italic>T</italic><sub>max</sub>, crossover probability <italic>p</italic><sub><italic>c</italic></sub> (default: 0.8), mutation probability <italic>p</italic><sub><italic>m</italic></sub> (default: 0.05), mutation strength &#x003BC; (default: 0.01), and hold-out folds <italic>k</italic> (default: 0). Initially, it sets <italic>t</italic> &#x0003D; 0 and generates the population <inline-formula><mml:math id="M44"><mml:msup><mml:mrow><mml:mrow><mml:mi mathvariant="script">P</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:math></inline-formula> via <xref ref-type="disp-formula" rid="EQ11">Equation 11</xref>, then evaluates each individual by computing <inline-formula><mml:math id="M45"><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>X</mml:mi><mml:mi>b</mml:mi><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>y</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> for <italic>i</italic> &#x0003D; 1, &#x02026;, <italic>N</italic>, and identifies the best individual as <inline-formula><mml:math id="M46"><mml:mi>b</mml:mi><mml:msup><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">best</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mo class="qopname">arg</mml:mo><mml:mo class="qopname">min</mml:mo><mml:mi>b</mml:mi><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mi mathvariant="script">P</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mi>f</mml:mi></mml:math></inline-formula> with fitness <inline-formula><mml:math id="M47"><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">best</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mo class="qopname">min</mml:mo><mml:mi>f</mml:mi></mml:math></inline-formula>. While <italic>t</italic> &#x0003C; <italic>T</italic><sub>max</sub>, the algorithm performs selection to form the mating pool <inline-formula><mml:math id="M48"><mml:msup><mml:mrow><mml:mrow><mml:mi mathvariant="script">M</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:math></inline-formula> using roulette wheel selection (<xref ref-type="disp-formula" rid="EQ12">Equations 12</xref>, <xref ref-type="disp-formula" rid="EQ13">13</xref>), generates offspring <inline-formula><mml:math id="M49"><mml:mrow><mml:msup><mml:mi mathvariant="script">P</mml:mi><mml:mrow><mml:mo>&#x02032;</mml:mo><mml:mo stretchy='false'>(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> via single-point crossover (<xref ref-type="disp-formula" rid="EQ14">Equations 14</xref>, <xref ref-type="disp-formula" rid="EQ15">15</xref>) with probability <italic>p</italic><sub><italic>c</italic></sub>, applies bit-flip mutation (<xref ref-type="disp-formula" rid="EQ16">Equation 16</xref>) to <inline-formula><mml:math id="M50"><mml:mrow><mml:msup><mml:mi mathvariant="script">P</mml:mi><mml:mrow><mml:mo>&#x02032;</mml:mo><mml:mo stretchy='false'>(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> with probability <italic>p</italic><sub><italic>m</italic></sub>, and evaluates the offspring by computing <inline-formula><mml:math id="M51"><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>X</mml:mi><mml:mi>b</mml:mi><mml:msup><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:mi>y</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>. It updates the best individual if <inline-formula><mml:math id="M52"><mml:mo class="qopname">min</mml:mo><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>&#x0003C;</mml:mo><mml:mi>f</mml:mi><mml:msup><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">best</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:math></inline-formula>, then applies elitism by replacing the worst individual in <inline-formula><mml:math id="M53"><mml:mrow><mml:msup><mml:mi mathvariant="script">P</mml:mi><mml:mrow><mml:mo>&#x02032;</mml:mo><mml:mo stretchy='false'>(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> with <italic>b</italic>best<sup>(<italic>t</italic>)</sup> (<xref ref-type="disp-formula" rid="EQ17">Equation 17</xref>), followed by updating <inline-formula><mml:math id="M54"><mml:msup><mml:mrow><mml:mrow><mml:mi mathvariant="script">P</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mrow><mml:msup><mml:mi mathvariant="script">P</mml:mi><mml:mrow><mml:mo>&#x02032;</mml:mo><mml:mo stretchy='false'>(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, <italic>f</italic><sup>(<italic>t</italic> &#x0002B; 1)</sup> &#x0003D; <italic>f</italic>&#x02032;, and the convergence curve <inline-formula><mml:math id="M55"><mml:msub><mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">best</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula>, and incrementing <italic>t</italic> &#x0003D; <italic>t</italic> &#x0002B; 1. After the loop, it extracts the results: <inline-formula><mml:math id="M56"><mml:mrow><mml:mi mathvariant="script">S</mml:mi></mml:mrow><mml:mi>f</mml:mi><mml:mo>=</mml:mo><mml:mi>j</mml:mi><mml:mo>:</mml:mo><mml:mi>b</mml:mi><mml:msup><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">best</mml:mtext></mml:mstyle><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>T</mml:mi></mml:mrow><mml:mrow><mml:mo class="qopname">max</mml:mo></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mo>&#x02026;</mml:mo><mml:mo>,</mml:mo><mml:mi>n</mml:mi></mml:math></inline-formula>, <inline-formula><mml:math id="M57"><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo>|</mml:mo><mml:mrow><mml:mi mathvariant="script">S</mml:mi></mml:mrow><mml:mi>f</mml:mi><mml:mo>|</mml:mo></mml:math></inline-formula>, <inline-formula><mml:math id="M58"><mml:mi>X</mml:mi><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">selected</mml:mtext></mml:mstyle><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mo>:</mml:mo><mml:mo>,</mml:mo><mml:mrow><mml:mi mathvariant="script">S</mml:mi></mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, and returns <inline-formula><mml:math id="M59"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>X</mml:mi><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">selected</mml:mtext></mml:mstyle><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">S</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> as output. Redundancy analysis is first performed on individual features using Markov, and then on the selected features using <xref ref-type="disp-formula" rid="EQ1">Equation 1</xref>.</p>
<p>The algorithm&#x00027;s convergence is guaranteed by the elitism strategy, which ensures the best fitness value is non-increasing:</p>
<p><bold>Theorem 1 (Monotonic Convergence)</bold>. For the GA-OS<sup>2</sup>FS algorithm with elitism, the sequence of best fitness values <inline-formula><mml:math id="M60"><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">best</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:msup><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mo class="qopname">max</mml:mo></mml:mrow></mml:msup></mml:math></inline-formula> is monotonically non-increasing.</p>
<p><italic>Proof</italic>. By construction, the elitism strategy preserves the best solution from generation <italic>t</italic> in generation <italic>t</italic> &#x0002B; 1. Therefore, <inline-formula><mml:math id="M61"><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">best</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x02264;</mml:mo><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">best</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> for all <italic>t</italic>.</p>
<p>The expected time complexity per iteration is <inline-formula><mml:math id="M62"><mml:mrow><mml:mi mathvariant="script">O</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>N</mml:mi><mml:mo>&#x000B7;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>n</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>, where <inline-formula><mml:math id="M63"><mml:msub><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:math></inline-formula> is the cost of evaluating the fitness function for one chromosome. The overall complexity for <italic>T</italic><sub>max</sub> iterations is <inline-formula><mml:math id="M64"><mml:mrow><mml:mi mathvariant="script">O</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>T</mml:mi></mml:mrow><mml:mrow><mml:mo class="qopname">max</mml:mo></mml:mrow></mml:msub><mml:mo>&#x000B7;</mml:mo><mml:mi>N</mml:mi><mml:mo>&#x000B7;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>n</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>.</p>
<p>The proposed GA-OS<sup>2</sup>FS algorithm provides an effective approach for streaming feature selection, combining the global search capability of genetic algorithms with direct performance evaluation using the target classifier.</p>
</sec>
</sec>
</sec>
<sec id="s5">
<label>5</label>
<title>Experiments</title>
<sec>
<label>5.1</label>
<title>General settings</title>
<sec>
<label>5.1.1</label>
<title>Datasets</title>
<p>This section presents the experimental evaluation conducted on six real-world datasets obtained from two key sources: DNA microarray repositories and the benchmark collection from the Neural Information Processing Systems (NIPS) 2003 conference. These datasets are widely recognized in the machine learning and bioinformatics communities for assessing feature selection and classification algorithms under high-dimensional, small-sample conditions. The inclusion of microarray data ensures the examination of genetic expression patterns, while the NIPS 2003 datasets provide a diverse range of problem domains and complexity levels, thereby enabling a comprehensive analysis of the proposed method&#x00027;s robustness and generalizability. A detailed summary of the datasets&#x02014;including the number of features, samples, and classes&#x02014;is provided in <xref ref-type="table" rid="T1">Table 1</xref> for reference.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Details of the datasets.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Mark</bold></th>
<th valign="top" align="left"><bold>Dataset</bold></th>
<th valign="top" align="center"><bold>Features</bold></th>
<th valign="top" align="center"><bold>Instances</bold></th>
<th valign="top" align="center"><bold>Class</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">D1</td>
<td valign="top" align="left">USPS</td>
<td valign="top" align="center">242</td>
<td valign="top" align="center">1,500</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">D2</td>
<td valign="top" align="left">Madelon</td>
<td valign="top" align="center">501</td>
<td valign="top" align="center">2,600</td>
<td valign="top" align="center">6</td>
</tr>
<tr>
<td valign="top" align="left">D3</td>
<td valign="top" align="left">COIL20</td>
<td valign="top" align="center">1,025</td>
<td valign="top" align="center">1,440</td>
<td valign="top" align="center">20</td>
</tr>
<tr>
<td valign="top" align="left">D4</td>
<td valign="top" align="left">Colon</td>
<td valign="top" align="center">2,001</td>
<td valign="top" align="center">62</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">D5</td>
<td valign="top" align="left">Lung</td>
<td valign="top" align="center">3,313</td>
<td valign="top" align="center">83</td>
<td valign="top" align="center">5</td>
</tr>
<tr>
<td valign="top" align="left">D6</td>
<td valign="top" align="left">DriveFace</td>
<td valign="top" align="center">6,401</td>
<td valign="top" align="center">606</td>
<td valign="top" align="center">3</td>
</tr></tbody>
</table>
</table-wrap>
</sec>
<sec>
<label>5.1.2</label>
<title>Baselines</title>
<p>To comprehensively evaluate the efficacy of the proposed model, a rigorous comparative analysis is conducted against four state-of-the-art online streaming feature selection (OS<sup>2</sup>FS) methods, which are recognized as established benchmarks in the field. The selected competitors include Fast-OSFS, SAOLA, and LOS-SA. This diverse set of algorithms encompasses various strategic approaches to handling feature streams&#x02014;such as leveraging pairwise feature relations, redundancy analysis, and sparsity-aware selection&#x02014;thereby ensuring a robust and multifaceted comparison. Furthermore, to objectively assess the quality of the feature subsets selected by each method, the evaluation employs three fundamental yet powerful classifiers: Support Vector Machine (SVM), k-Nearest Neighbors (KNN), and Random Forest (RF). These classifiers were chosen for their distinct learning mechanisms: SVM seeks optimal separating hyperplanes, KNN relies on local similarity, and RF utilizes ensemble decision-making. Their combined use helps verify whether the selected features generalize well across different inductive biases and are not tailored to a single classification model.</p>
<p>Detailed parameter configurations for all compared OS<sup>2</sup>FS algorithms and the three classifiers are systematically summarized in <xref ref-type="table" rid="T2">Tables 2</xref>, <xref ref-type="table" rid="T3">3</xref>, respectively, to ensure full reproducibility of the experiments. All algorithms are implemented in MATLAB to maintain a consistent computational environment. And all experiments utilize five-fold cross-validation, meaning each dataset is randomly divided into an 80% training portion and a complementary 20% test portion. To account for randomness in data partitioning and algorithm initialization, each dataset is executed 10 times; the final reported result is the average predictive accuracy across these runs, along with its standard deviation where applicable.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Algorithm parameters.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Mark</bold></th>
<th valign="top" align="left"><bold>Algorithm</bold></th>
<th valign="top" align="left"><bold>Parameter</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">M1</td>
<td valign="top" align="left">GA-OS<sup>2</sup>FS</td>
<td valign="top" align="left">Z test, Alpha is 0.05.</td>
</tr>
<tr>
<td valign="top" align="left">M2</td>
<td valign="top" align="left">LOSSA</td>
<td valign="top" align="left">Z test, Alpha is 0.05. (TSMC, 2022)</td>
</tr>
<tr>
<td valign="top" align="left">M3</td>
<td valign="top" align="left">Fast-OSFS</td>
<td valign="top" align="left">Z test, Alpha is 0.05. (TPAMI, 2013)</td>
</tr>
<tr>
<td valign="top" align="left">M4</td>
<td valign="top" align="left">SAOLA</td>
<td valign="top" align="left">Z test, Alpha is 0.05. (TKDD, 2016)</td>
</tr>
<tr>
<td valign="top" align="left">M5</td>
<td valign="top" align="left">SFS-FI</td>
<td valign="top" align="left">Z test, Alpha is 0.05. (TNNLS, 2021)</td>
</tr></tbody>
</table>
</table-wrap>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Details of the classifiers.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Classifier</bold></th>
<th valign="top" align="left"><bold>Parameter</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">KNN</td>
<td valign="top" align="left">The number of neighbors was set to 3.</td>
</tr>
<tr>
<td valign="top" align="left">Random forest</td>
<td valign="top" align="left">6 decision trees.</td>
</tr>
<tr>
<td valign="top" align="left">CART</td>
<td valign="top" align="left">Predefined parameter settings.</td>
</tr></tbody>
</table>
</table-wrap>
<p>All trials were conducted on a standard personal computer equipped with an Intel Core i7 processor running at 2.40 GHz and 16 GB of RAM, ensuring that the computational demands of the online feature selection and classification processes were feasibly met within a common research setup. This controlled hardware environment also aids in the fair comparison of runtime and efficiency where relevant.</p>
</sec>
<sec>
<label>5.1.3</label>
<title>Experimental configuration</title>
<p>The efficacy of the GA-OS<sup>2</sup>FS model is rigorously assessed by benchmarking it against the aforementioned suite of advanced algorithms, specifically within the challenging context of sparse streaming features. This scenario is deliberately chosen to simulate real-world conditions where data incompleteness and sequential feature arrival are prevalent, thereby testing the models&#x00027; robustness and adaptability. To ensure a fair and statistically grounded comparison of performance across all algorithms, a non-parametric Friedman test is conducted at a stringent 95% confidence level. This test is employed under the null hypothesis that all algorithms perform equivalently, providing a holistic view of performance rankings across multiple datasets.</p>
<p>Furthermore, to drill down into pairwise performance differences, a paired Wilcoxon signed-rank test is applied at a 0.1 significance level. This test is specifically designed to examine whether the observed performance differences between the GA-OS<sup>2</sup>FS model and each individual baseline algorithm are statistically significant, rather than attributable to random chance. The resulting p-values from this comprehensive statistical analysis are consistently below the significance threshold. This robust statistical evidence leads to the conclusive finding that the GA-OS<sup>2</sup>FS model significantly and consistently outperforms all competing algorithms in the evaluation, demonstrating its superior capability in selecting informative features from sparse, evolving data streams.</p>
</sec>
</sec>
<sec>
<label>5.2</label>
<title>Accuracy comparison</title>
<sec>
<label>5.2.1</label>
<title>Detailed analysis under 10% missing data rate</title>
<p>To investigate the impact of missing data on feature selection performance, a missing-at-random scenario with a 10% data loss rate is established as a representative and practically relevant case for detailed analysis. This specific rate is chosen to simulate a common yet challenging level of data incompleteness encountered in real-world streaming applications. As illustrated in <xref ref-type="fig" rid="F1">Figure 1</xref>, the average number of features selected by each compared method under this sparse condition is presented. The bar chart reveals distinct strategies among the algorithms: some methods maintain a conservative, highly selective profile, while others retain a larger fraction of the feature stream, reflecting different trade-offs between redundancy elimination and information preservation.</p>
<fig position="float" id="F1">
<label>Figure 1</label>
<caption><p>The mean number of selected features.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-09-1782461-g0001.tif">
<alt-text content-type="machine-generated">Bar chart comparing the mean number of selected features across six datasets (D1 to D6) for five models (M1 to M5). M4 has the highest value at 102.2 in dataset D3. Different colors and patterns represent each model.</alt-text>
</graphic>
</fig>
<p>Correspondingly, <xref ref-type="table" rid="T4">Table 4</xref> documents the concrete predictive performance outcomes, quantified by classification accuracy, when applying three fundamentally different classifiers&#x02014;K-Nearest Neighbors (KNN), Support Vector Machine (SVM), and Random Forest (RF)&#x02014;to the feature subsets identified by each method. This multi-classifier evaluation is crucial, as it demonstrates whether the selected features provide robust discriminative power independent of a specific learning algorithm&#x00027;s bias. The results in the table allow for a direct, quantitative comparison of how the parsimony or comprehensiveness of a selected feature subset, as shown in <xref ref-type="fig" rid="F1">Figure 1</xref>, ultimately translates into generalization accuracy across diverse classifiers. This integrated analysis of subset size (<xref ref-type="fig" rid="F1">Figure 1</xref>) and classification efficacy (<xref ref-type="table" rid="T4">Table 4</xref>) provides a comprehensive view of each algorithm&#x00027;s effectiveness in balancing feature reduction with predictive performance under the specified missing data condition.</p>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>The accuracy when the missing rate is 0.1.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>M/D</bold></th>
<th valign="top" align="center"><bold>D1</bold></th>
<th valign="top" align="center"><bold>D2</bold></th>
<th valign="top" align="center"><bold>D3</bold></th>
<th valign="top" align="center"><bold>D4</bold></th>
<th valign="top" align="center"><bold>D5</bold></th>
<th valign="top" align="center"><bold>D6</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">M1</td>
<td valign="top" align="center">90.79 &#x000B1; 0.87</td>
<td valign="top" align="center">59.14 &#x000B1; 1.36</td>
<td valign="top" align="center">92.62 &#x000B1; 1.13</td>
<td valign="top" align="center">81.77 &#x000B1; 3.48</td>
<td valign="top" align="center">89.47 &#x000B1; 1.31</td>
<td valign="top" align="center">94.91 &#x000B1; 0.50</td>
</tr>
<tr>
<td valign="top" align="left">M2</td>
<td valign="top" align="center">84.33 &#x000B1; 0.64</td>
<td valign="top" align="center">54.97 &#x000B1; 0.69</td>
<td valign="top" align="center">84.87 &#x000B1; 2.63</td>
<td valign="top" align="center">80.45 &#x000B1; 2.59</td>
<td valign="top" align="center">84.79 &#x000B1; 2.77</td>
<td valign="top" align="center">92.29 &#x000B1; 0.58</td>
</tr>
<tr>
<td valign="top" align="left">M3</td>
<td valign="top" align="center">85.48 &#x000B1; 0.63</td>
<td valign="top" align="center">54.83 &#x000B1; 0.97</td>
<td valign="top" align="center">71.08 &#x000B1; 2.22</td>
<td valign="top" align="center">78.88 &#x000B1; 2.59</td>
<td valign="top" align="center">84.40 &#x000B1; 2.43</td>
<td valign="top" align="center">93.14 &#x000B1; 0.67</td>
</tr>
<tr>
<td valign="top" align="left">M4</td>
<td valign="top" align="center">80.18 &#x000B1; 0.54</td>
<td valign="top" align="center">53.79 &#x000B1; 0.80</td>
<td valign="top" align="center">88.59 &#x000B1; 0.49</td>
<td valign="top" align="center">77.65 &#x000B1; 3.19</td>
<td valign="top" align="center">83.38 &#x000B1; 2.32</td>
<td valign="top" align="center">93.34 &#x000B1; 0.71</td>
</tr>
<tr>
<td valign="top" align="left">M5</td>
<td valign="top" align="center">72.18 &#x000B1; 0.54</td>
<td valign="top" align="center">49.33 &#x000B1; 0.54</td>
<td valign="top" align="center">80.56 &#x000B1; 3.48</td>
<td valign="top" align="center">78.63 &#x000B1; 2.66</td>
<td valign="top" align="center">62.49 &#x000B1; 2.96</td>
<td valign="top" align="center">85.51 &#x000B1; 1.02</td>
</tr></tbody>
</table>
</table-wrap>
<sec>
<label>5.2.1.1</label>
<title>Statistical significance (Friedman test)</title>
<p>To statistically validate the performance differences observed among the compared algorithms under the 10% missing data scenario, a non-parametric Friedman test was conducted across all datasets. The test returned a <italic>P</italic>-value of 0.0011, which is substantially below the commonly adopted significance threshold of 0.05. This very small <italic>P</italic>-value allows us to firmly reject the null hypothesis that all algorithms perform equally. Therefore, the results provide strong statistical evidence that there are significant differences in the overall performance ranks of the evaluated methods. More specifically, the outcome underscores that the proposed GA-OS<sup>2</sup>FS model achieves a distinctly superior ranking compared to the alternative algorithms, confirming its enhanced robustness and effectiveness when handling incomplete streaming features with 10% missing values. Such a statistically significant finding further reinforces the practical relevance and reliability of the GA-OS<sup>2</sup>FS approach in real-world sparse data environments.</p>
</sec>
<sec>
<label>5.2.1.2</label>
<title>Analysis of feature selection quantity</title>
<p>The GA-OS<sup>2</sup>FS model demonstrates stable feature selection across different sparse datasets. In contrast, algorithms like SAOLA show considerable variation in the number of features selected depending on the dataset. Key observations include:</p>
<list list-type="order">
<list-item><p>An intriguing pattern observed in the experiments is that several compared algorithms tend to select a considerably larger set of features, yet consistently deliver lower classification accuracy compared to the GA-OS<sup>2</sup>FS model. This indicates that simply retaining more features does not guarantee better predictive performance, and often points to insufficient or less effective redundancy analysis in the feature selection process. When redundancy is not adequately assessed, many retained features may be non-informative, noisy, or highly correlated with one another, thereby adding little discriminative value while increasing model complexity and the risk of overfitting. In contrast, the GA-OS<sup>2</sup>FS model appears to implement a more refined mechanism for evaluating feature relevance and redundancy, enabling it to identify and retain a compact yet highly informative subset of features that better supports accurate classification.</p></list-item>
<list-item><p>Other algorithms, such as SFS-FI, occasionally select an extremely small number of features&#x02014;in some cases as few as only one&#x02014;on particular datasets. This behavior is likely attributable to their limited ability to comprehensively capture all essential features when processing incomplete data streams. Specifically, these methods may prematurely converge on the first few features that appear sufficiently relevant, while failing to adequately evaluate or retain subsequently arriving features that are equally or more informative. As a result, they miss critical feature interactions and discard valuable discriminative information, ultimately leading to suboptimal classification performance due to an oversimplified and incomplete feature subset.</p></list-item>
<list-item><p>The GA-OS<sup>2</sup>FOS model performs comprehensive relevance and redundancy analysis through a structured genetic optimization process. By leveraging GA-based feature evaluation, it dynamically assesses each feature&#x00027;s discriminative power and mutual dependencies within the evolving stream. This enables the model to systematically identify and retain truly informative features while filtering out redundant or noisy ones. Consequently, it avoids the premature discarding of important predictive information&#x02014;a common pitfall in many streaming feature selection methods. As a result, the model consistently achieves high classification accuracy while maintaining a compact and efficient feature subset, effectively balancing model simplicity with representational completeness.</p></list-item>
</list>
</sec>
<sec>
<label>5.2.1.3</label>
<title>Classification performance</title>
<p>As shown in <xref ref-type="table" rid="T4">Table 4</xref>, the GA-OS<sup>2</sup>FS model exceeds the performance of its rivals on six datasets. Key observations include:</p>
<list list-type="order">
<list-item><p>GA-OS<sup>2</sup>FS vs. Fast-OSFS: the experimental results demonstrate that the GA-OS<sup>2</sup>FOS model consistently delivers superior classification accuracy across a majority of the benchmark datasets. In contrast, the Fast-OSFS algorithm exhibits notable limitations. Its performance is constrained by a reliance on zero-imputation to handle incomplete data&#x02014;a method that simply fills missing values with zeros. While straightforward, this approach fails to capture any underlying data structure or relationships, potentially distorting the feature space. Furthermore, Fast-OSFS employs a less comprehensive analysis of feature relevance and redundancy. This dual shortcoming&#x02014;crude data imputation coupled with insufficient feature evaluation&#x02014;often results in the misclassification of features during the streaming selection process. Informative features may be incorrectly discarded, while redundant or noisy ones might be retained. Consequently, these limitations fundamentally undermine the quality of the final selected feature subset, leading to its comparatively poorer predictive performance.</p></list-item>
<list-item><p>GA-OS<sup>2</sup>FS vs. SAOLA: the SAOLA algorithm operates primarily by assessing pairwise relationships between features, evaluating them in isolation or through limited local comparisons. While efficient, this approach may overlook more complex, higher-order interactions among feature subsets, and its incremental update mechanism can be sensitive to the arrival order of features in a stream. In contrast, the proposed GA-OS<sup>2</sup>FS model integrates and fully leverages the complementary strengths of the LFA model and the GA framework. LFA assists in capturing underlying low-rank structures and global correlations even under sparse or missing data conditions, while GA performs robust, population-based search to dynamically evaluate and retain the most discriminative feature combinations. This hybrid strategy enables GA-OS<sup>2</sup>FS to consistently identify critical features in real-time from evolving data streams, without being constrained by purely local or pairwise evaluations.</p></list-item>
<list-item><p>GA-OS<sup>2</sup>FS vs. SFS-FI: sparse streaming data often loses critical feature interactions, which severely challenges methods like SFS-FI that rely on detecting these dependencies. Unable to accurately assess feature relevance under sparsity, SFS-FI tends to select redundant or omit informative features, resulting in the lowest classification accuracy in evaluations. This underscores its limited robustness with incomplete data and highlights the advantage of GA-OS<sup>2</sup>FS&#x00027;s more resilient design.</p></list-item>
<list-item><p>Among the evaluated models, LOSSA achieves the second-highest classification accuracy after GA-OS<sup>2</sup>FS when processing completed sparse streaming features, demonstrating the benefit of using Latent Factor Analysis (LFA) for data completion. However, LOSSA relies on conventional relevance and redundancy analyses, which lack adaptability to capture complex feature interactions or evolving stream characteristics, limiting its average accuracy. In contrast, GA-OS<sup>2</sup>FS integrates a Genetic Algorithm strategy, performing a global, population-based search that evaluates multiple feature subsets and iteratively refines them using crossover, mutation, and fitness feedback. This enables GA-OS<sup>2</sup>FS to discover more discriminative feature combinations, leading to superior predictive performance and offering a more adaptive solution for accurate feature selection in sparse streaming environments.</p></list-item>
</list>
</sec>
<sec>
<label>5.2.1.4</label>
<title>The Wilcoxon signed-ranks test</title>
<p>To rigorously substantiate the statistically significant superiority of the proposed GA-OS<sup>2</sup>FS algorithm, a non-parametric Wilcoxon signed-rank test was employed. This test was specifically chosen for its appropriateness in comparing the performance of two related samples&#x02014;in this case, the paired average classification accuracy values of the GA-OS<sup>2</sup>FS model against each of the benchmark methods across multiple datasets. The detailed outcomes of these pairwise comparisons, including the calculated test statistics and corresponding <italic>P</italic>-values, are comprehensively presented in <xref ref-type="table" rid="T5">Table 5</xref>.</p>
<table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>The rank sum of the Wilcoxon signed-ranks.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>M1 vs. Others</bold></th>
<th valign="top" align="center"><bold>R&#x0002B;<sup>a</sup></bold></th>
<th valign="top" align="center"><bold>R-<sup>a</sup></bold></th>
<th valign="top" align="center"><bold><italic>P</italic>-values<sup>b</sup></bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">M2</td>
<td valign="top" align="center">21</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">0.0156</td>
</tr>
<tr>
<td valign="top" align="left">M3</td>
<td valign="top" align="center">21</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">0.0156</td>
</tr>
<tr>
<td valign="top" align="left">M4</td>
<td valign="top" align="center">21</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">0.0156</td>
</tr>
<tr>
<td valign="top" align="left">M5</td>
<td valign="top" align="center">21</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">0.0156</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p><sup>a</sup>A larger value denotes a higher accuracy.</p>
<p><sup>b</sup>There is no significant difference when <italic>P</italic>-values &#x02208; [0.1, 0.9] at the 0.1 significance level.</p>
</table-wrap-foot>
</table-wrap>
<p>The statistical analysis yields a clear and robust conclusion: even at a missing data rate of 0.1&#x02014;representing a modest yet realistic level of data incompleteness&#x02013;the GA-OS<sup>2</sup>FS approach demonstrates a consistent and statistically significant performance advantage. It reliably outperforms the alternative algorithms on a substantial majority of the evaluated datasets. This early and significant lead established by GA-OS<sup>2</sup>FS under sparse conditions highlights its inherent robustness and effective design for handling incomplete data streams from the outset.</p>
<p>In summary, relative to traditional OS<sup>2</sup>FS models, completing sparse streaming features via the LFA model generally minimizes information loss and enhances overall results. Consequently, both GA-OS<sup>2</sup>FS and LOSSA deliver the strongest performance on sparse streaming data. Nevertheless, the feature subsets selected by GA-OS<sup>2</sup>FS yield higher classification accuracy than those from LOSSA, demonstrating that GA can improve the accuracy of feature selection.</p>
</sec>
</sec>
<sec>
<label>5.2.2</label>
<title>Accuracy analysis with higher missing rates</title>
<p>This study evaluates the effectiveness of the 3WDO model by comparing it against six prominent OS<sup>2</sup>FS models&#x02014;Fast-OSFS, SAOLA, SFS-FI, and LOSSA&#x02014;across six datasets under missing data rates ranging from 0.5 to 0.9. While LOSSA is designed to handle missing values, the other three baseline algorithms are oriented toward complete feature streams. To adapt them for sparse data, zero-imputation is applied to fill missing entries for Fast-OSFS, SAOLA, and SFS-FI. Results are highlighted where any algorithm demonstrates superior performance compared to the others. <xref ref-type="table" rid="T6">Table 6</xref> provides a pairwise comparison between GA-OS<sup>2</sup>FS and each baseline using the Wilcoxon signed-ranks test. The average accuracy trends of all models on datasets D1-D4 are visualized in <xref ref-type="fig" rid="F2">Figure 2</xref>.</p>
<table-wrap position="float" id="T6">
<label>Table 6</label>
<caption><p>The rank sum of the the Wilcoxon signed-rank test on OSFS and OS<sup>2</sup>FS models.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>&#x003C1;</bold></th>
<th valign="top" align="center" colspan="2"><bold>M2</bold></th>
<th valign="top" align="center" colspan="2"><bold>M3</bold></th>
<th valign="top" align="center" colspan="2"><bold>M4</bold></th>
<th valign="top" align="center" colspan="2"><bold>M5</bold></th>
</tr>
<tr>
<th/>
<th valign="top" align="center"><bold>R&#x0002B;</bold><sup>a</sup></th>
<th valign="top" align="center"><bold>R-</bold><sup>a</sup></th>
<th valign="top" align="center"><bold>R&#x0002B;</bold><sup>a</sup></th>
<th valign="top" align="center"><bold>R-</bold><sup>a</sup></th>
<th valign="top" align="center"><bold>R&#x0002B;</bold><sup>a</sup></th>
<th valign="top" align="center"><bold>R-</bold><sup>a</sup></th>
<th valign="top" align="center"><bold>R&#x0002B;</bold><sup>a</sup></th>
<th valign="top" align="center"><bold>R-</bold><sup>a</sup></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">0.5</td>
<td valign="top" align="center">21</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">21</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">21</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">21</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="left">0.9</td>
<td valign="top" align="center">21</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">16</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">16</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">20</td>
<td valign="top" align="center">1</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p><sup><italic>a</italic></sup>denotes missing data rate.</p>
</table-wrap-foot>
</table-wrap>
<fig position="float" id="F2">
<label>Figure 2</label>
<caption><p>The accuracy analysis with higher missing rates. <bold>(a)</bold> Missing data rate is 0.5. <bold>(b)</bold> Missing data rate is 0.9.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-09-1782461-g0002.tif">
<alt-text content-type="machine-generated">Radar charts labeled &#x0201C;a&#x0201D; and &#x0201C;b&#x0201D; compare metrics M1 to M5 across dimensions D1 to D6. Chart &#x0201C;a&#x0201D; shows M1 with a solid red line dominating D1, contrasting with dashed lines of other metrics. Chart &#x0201C;b&#x0201D; displays a similar pattern, with M1 leading, but varying line styles for M2 to M5.</alt-text>
</graphic>
</fig>
<sec>
<label>5.2.2.1</label>
<title>Overall accuracy of the GA-OS<sup>2</sup>FS model</title>
<p>Across all benchmark datasets examined, the average classification accuracy of the GA-OS<sup>2</sup>FS model demonstrates a gradual yet consistent decline as the missing data rate increases. This overall trend aligns with expectations, as higher rates of missing entries inevitably compromise the informational integrity of the feature stream, making it more challenging to reliably identify and retain discriminative features. Notably, however, on several specific datasets, the model&#x00027;s accuracy exhibits only minor fluctuations&#x02014;remaining relatively stable even as the missing rate rises. This suggests that the GA-OS<sup>2</sup>FS approach maintains a notable degree of robustness in certain data environments, likely due to its effective integration of latent factor completion and evolutionary search, which together help preserve critical predictive information under moderate to high sparsity conditions.</p>
</sec>
<sec>
<label>5.2.2.2</label>
<title>Wilcoxon signed-rank test results</title>
<p><xref ref-type="table" rid="T6">Table 6</xref> presents the Wilcoxon signed-rank test results comparing the average accuracy of GA-OS<sup>2</sup>FS against other methods. The findings indicate that as the missing data rate increases from 0.5 to 0.9, the proposed algorithm outperforms most baseline methods on the majority of datasets.</p>
</sec>
<sec>
<label>5.2.2.3</label>
<title>Performance on datasets</title>
<p>Observations from <xref ref-type="fig" rid="F2">Figure 2</xref> lead to the following conclusions:</p>
<list list-type="order">
<list-item><p>For the majority of the evaluated algorithms, classification accuracy exhibits a progressive decline as the rate of missing data increases. This decline can be attributed to the growing incompleteness of the feature stream, which hinders the reliable assessment of feature relevance and redundancy. In contrast, the proposed GA-OS<sup>2</sup>FS model consistently achieves superior accuracy across most benchmark datasets, maintaining a clear performance advantage even as the missing data rate escalates from 0.5 to 0.9. This robustness stems from its integrated use of latent factor analysis (LFA) for structured data completion and genetic algorithm (GA)-guided feature optimization, which together preserve discriminative information and adaptively select informative features under sparse conditions. By comparison, conventional methods such as Fast-OSFS, SAOLA, and SFS-FI rely primarily on zero-filling (zero-imputation) to handle incomplete streaming features. While computationally simple, this approach substitutes missing entries with zeros&#x02014;a strategy that distorts the original data distribution, disrupts inherent feature correlations, and often introduces artificial noise. Consequently, these methods are prone to selecting uninformative or redundant features, which undermines their classification performance and explains their significantly poorer results relative to the GA-OS<sup>2</sup>FS framework, especially under higher missing-rate scenarios.</p></list-item>
<list-item><p>For missing rates between 0.1 and 0.5, LOSSA generally achieves higher accuracy than baselines like Fast-OSFS, SAOLA, and SFS-FI, due to its LFA-based data completion providing better estimates than simple imputation (e.g., zero-filling). However, as missing data increases, limited known entries raise LFA&#x00027;s estimation error. This distorts the recovered feature space, causing relevant features to be misclassified as irrelevant and discarded, degrading selection quality. To address this, GA-OS<sup>2</sup>FS employs a genetic algorithm to partition and evaluate features more robustly. This enables a global, resilient importance assessment that is less sensitive to local completion errors. By reducing feature misclassification, it retains a more discriminative subset, yielding consistently higher accuracy than LOSSA and other baselines, especially as sparsity grows.</p></list-item>
</list>
<p>In summary, by pre-estimating missing data via the LFA and GA model, the GA-OS<sup>2</sup>FS model enhances the accuracy of traditional OS<sup>2</sup>FS approaches.</p>
</sec>
</sec>
</sec>
</sec>
<sec id="s6">
<label>6</label>
<title>Conclusions</title>
<p>This study introduces GA-OS<sup>2</sup>FS, a novel uncertainty-aware framework for Online Sparse Streaming Feature Selection, designed to address critical shortcomings in conventional approaches. The framework innovatively integrates Genetic Algorithms (GA) to navigate the complex search space of dynamic feature subsets. GA-OS<sup>2</sup>FS operates through a synergistic two-component architecture: firstly, a Latent Factor Analysis (LFA) model that performs robust, dynamic imputation and reconstruction of inherently sparse and incomplete data matrices in real-time; secondly, a GA-based optimization mechanism that drives an intelligent, evolutionary search for discriminative features, effectively evaluating feature importance and interactions under uncertainty. Extensive empirical evaluation conducted across 10 diverse real-world datasets&#x02014;spanning various domains and data characteristics&#x02014;demonstrates that GA-OS<sup>2</sup>FS consistently surpasses state-of-the-art OSFS and OS<sup>2</sup>FS benchmarks. It achieves superior performance not only in selection accuracy and robustness but also in maintaining operational stability, all while ensuring computational efficiency. These results collectively underscore the framework&#x00027;s strong potential and adaptability for reliable, real-time feature selection in challenging high-dimensional streaming data environments.</p>
<p>Looking ahead, future research will concentrate on advancing the theory and practice of feature quality assessment within non-stationary streaming contexts. A primary direction involves refining and extending the evolutionary computation core, leveraging advanced Genetic Algorithm strategies and other meta-heuristics to develop more adaptive feature evaluation criteria and dynamic fitness functions. These innovations will be specifically tailored to track and respond to shifting data distributions. Furthermore, we will investigate efficient, dedicated techniques to manage concept drift, such as sophisticated incremental model update protocols and adaptive sliding window mechanisms. Additional promising avenues include exploring ensemble-based feature selection tactics that combine multiple selectors, and developing dynamic feature weighting schemes to continuously prioritize the most relevant features. The overarching goal of these endeavors is to significantly enhance the framework&#x00027;s responsiveness, resilience, and scalability when confronted with the evolving patterns of complex, real-world streaming applications.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s7">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="s8">
<title>Author contributions</title>
<p>GL: Methodology, Software, Writing &#x02013; original draft. JL: Validation, Writing &#x02013; review &#x00026; editing. GH: Visualization, Writing &#x02013; review &#x00026; editing. YL: Investigation, Writing &#x02013; review &#x00026; editing. HB: Writing &#x02013; review &#x00026; editing, Data curation. MZ: Writing &#x02013; review &#x00026; editing, Resources.</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>GL was employed by PetroChina Qinghai Oilfield Company.</p>
<p>The remaining author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was used in the creation of this manuscript. AI-assisted tools were employed only for post-writing language polishing (grammar and style). The author(s) are solely responsible for the research content, accuracy, and integrity of this work.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ahmadian</surname> <given-names>S.</given-names></name> <name><surname>Berahmand</surname> <given-names>K.</given-names></name> <name><surname>Rostami</surname> <given-names>M.</given-names></name> <name><surname>Forouzandeh</surname> <given-names>S.</given-names></name> <name><surname>Moradi</surname> <given-names>P.</given-names></name> <name><surname>Jalili</surname> <given-names>M.</given-names></name></person-group> (<year>2025</year>). <article-title>Recommender systems based on nonnegative matrix factorization: a survey</article-title>. <source>IEEE Trans. Artif. Intell</source>. <volume>6</volume>, <fpage>2554</fpage>&#x02013;<lpage>2574</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TAI.2025.3559053</pub-id></mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Albattah</surname> <given-names>W.</given-names></name> <name><surname>Khan</surname> <given-names>R. U.</given-names></name></person-group> (<year>2025</year>). <article-title>Impact of imbalanced features on large datasets</article-title>. <source>Front. Big Data</source> <volume>8</volume>:<fpage>1455442</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fdata.2025.1455442</pub-id><pub-id pub-id-type="pmid">40151465</pub-id></mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Badsha</surname> <given-names>M. B.</given-names></name> <name><surname>Li</surname> <given-names>R.</given-names></name> <name><surname>Liu</surname> <given-names>B. X.</given-names></name> <name><surname>Li</surname> <given-names>Y. I.</given-names></name> <name><surname>Xian</surname> <given-names>M.</given-names></name> <name><surname>Banovich</surname> <given-names>N. E.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Imputation of single-cell gene expression with an autoencoder neural network</article-title>. <source>Quant. Biol</source>. <volume>8</volume>, <fpage>78</fpage>&#x02013;<lpage>94</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s40484-019-0192-7</pub-id><pub-id pub-id-type="pmid">32274259</pub-id></mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Casmiry</surname> <given-names>E.</given-names></name> <name><surname>Mduma</surname> <given-names>N.</given-names></name> <name><surname>Sinde</surname> <given-names>R.</given-names></name></person-group> (<year>2025</year>). <article-title>Enhanced SQL injection detection using chi-square feature selection and machine learning classifiers</article-title>. <source>Front. Big Data</source> <volume>8</volume>:<fpage>1686479</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fdata.2025.1686479</pub-id><pub-id pub-id-type="pmid">41346567</pub-id></mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chandrashekar</surname> <given-names>G.</given-names></name> <name><surname>Sahin</surname> <given-names>F.</given-names></name></person-group> (<year>2014</year>). <article-title>A survey on feature selection methods</article-title>. <source>Comput. Electr. Eng</source>. <volume>40</volume>, <fpage>16</fpage>&#x02013;<lpage>28</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compeleceng.2013.11.024</pub-id></mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>J.</given-names></name> <name><surname>Liu</surname> <given-names>K.</given-names></name> <name><surname>Luo</surname> <given-names>X.</given-names></name> <name><surname>Yuan</surname> <given-names>Y.</given-names></name> <name><surname>Sedraoui</surname> <given-names>K.</given-names></name> <name><surname>Al-Turki</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>A state-migration particle swarm optimizer for adaptive latent factor analysis of high-dimensional and incomplete data</article-title>. <source>IEEE/CAA J. Autom. Sin</source>. <volume>11</volume>, <fpage>2220</fpage>&#x02013;<lpage>2235</lpage>. doi: <pub-id pub-id-type="doi">10.1109/JAS.2024.124575</pub-id></mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>J.</given-names></name> <name><surname>Wang</surname> <given-names>R.</given-names></name> <name><surname>Wu</surname> <given-names>D.</given-names></name> <name><surname>Luo</surname> <given-names>X.</given-names></name></person-group> (<year>2023</year>). <article-title>A differential evolution-enhanced position-transitional approach to latent factor analysis</article-title>. <source>IEEE Trans. Emerg. Top. Comput. Intell</source>. <volume>7</volume>, <fpage>389</fpage>&#x02013;<lpage>401</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TETCI.2022.3186673</pub-id></mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>J.</given-names></name> <name><surname>Zhuo</surname> <given-names>S.</given-names></name> <name><surname>He</surname> <given-names>J.</given-names></name> <name><surname>Qiu</surname> <given-names>W.</given-names></name> <name><surname>Zhang</surname> <given-names>Q.</given-names></name> <name><surname>Xiong</surname> <given-names>Z.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Federated graph learning via constructing and sharing feature spaces for cross-domain IOT</article-title>. <source>IEEE Internet Things J.</source> <volume>12</volume>, <fpage>26200</fpage>&#x02013;<lpage>26214</lpage>. doi: <pub-id pub-id-type="doi">10.1109/JIOT.2025.3560635</pub-id></mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>M.</given-names></name> <name><surname>Wang</surname> <given-names>R.</given-names></name> <name><surname>Qiao</surname> <given-names>Y.</given-names></name> <name><surname>Luo</surname> <given-names>X.</given-names></name></person-group> (<year>2024</year>). <article-title>A generalized Nesterov&#x00027;s accelerated gradient-incorporated non-negative latent-factorization-of-tensors model for efficient representation to dynamic QoS data</article-title>. <source>IEEE Trans. Emerg. Top. Comput. Intell</source>. <volume>8</volume>, <fpage>2386</fpage>&#x02013;<lpage>2400</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TETCI.2024.3360338</pub-id></mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ditzler</surname> <given-names>G.</given-names></name> <name><surname>LaBarck</surname> <given-names>J.</given-names></name> <name><surname>Ritchie</surname> <given-names>J.</given-names></name> <name><surname>Rosen</surname> <given-names>G.</given-names></name> <name><surname>Polikar</surname> <given-names>R.</given-names></name></person-group> (<year>2018</year>). <article-title>Extensions to online feature selection using bagging and boosting</article-title>. <source>IEEE Trans. Neural Netw. Learn. Syst</source>. <volume>29</volume>, <fpage>4504</fpage>&#x02013;<lpage>4509</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TNNLS.2017.2746107</pub-id><pub-id pub-id-type="pmid">29028210</pub-id></mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gong</surname> <given-names>M.</given-names></name> <name><surname>Jiang</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>H.</given-names></name> <name><surname>Tan</surname> <given-names>K. C.</given-names></name></person-group> (<year>2018</year>). <article-title>Multiobjective sparse non-negative matrix factorization</article-title>. <source>IEEE Trans. Cybern</source>. <volume>49</volume>, <fpage>4250</fpage>&#x02013;<lpage>4264</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TCYB.2018.2834898</pub-id><pub-id pub-id-type="pmid">29994343</pub-id></mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hancer</surname> <given-names>E.</given-names></name> <name><surname>Xue</surname> <given-names>B.</given-names></name> <name><surname>Zhang</surname> <given-names>M.</given-names></name></person-group> (<year>2022</year>). <article-title>Fuzzy filter cost-sensitive feature selection with differential evolution</article-title>. <source>Knowl.-Based Syst</source>. <volume>241</volume>:<fpage>108259</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.knosys.2022.108259</pub-id></mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hancer</surname> <given-names>E.</given-names></name> <name><surname>Xue</surname> <given-names>B.</given-names></name> <name><surname>Zhang</surname> <given-names>M.</given-names></name></person-group> (<year>2025</year>). <article-title>A many-objective diversity-guided differential evolution algorithm for multi-label feature selection in high-dimensional datasets</article-title>. <source>IEEE Trans. Emerg. Top. Comput. Intell</source>. <volume>9</volume>, <fpage>1226</fpage>&#x02013;<lpage>1237</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TETCI.2025.3529840</pub-id></mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Idri</surname> <given-names>A.</given-names></name> <name><surname>Benhar</surname> <given-names>H. J.</given-names></name> <name><surname>Fern&#x000E1;ndez-Alem&#x000E1;n</surname> <given-names>L.</given-names></name> <name><surname>Kadi</surname> <given-names>I.</given-names></name></person-group> (<year>2018</year>). <article-title>A systematic map of medical data preprocessing in knowledge discovery</article-title>. <source>Comput. Methods Programs Biomed</source>. <volume>162</volume>, <fpage>69</fpage>&#x02013;<lpage>85</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cmpb.2018.05.007</pub-id><pub-id pub-id-type="pmid">29903496</pub-id></mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kundu</surname> <given-names>P. P.</given-names></name> <name><surname>Mitra</surname> <given-names>S.</given-names></name></person-group> (<year>2017</year>). <article-title>Feature selection through message passing</article-title>. <source>IEEE Trans. Cybern</source>. <volume>47</volume>, <fpage>4356</fpage>&#x02013;<lpage>4366</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TCYB.2016.2609408</pub-id><pub-id pub-id-type="pmid">28114086</pub-id></mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Lei</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>H.</given-names></name> <name><surname>Li</surname> <given-names>G.</given-names></name></person-group> (<year>2024</year>). <article-title>&#x0201C;PRSAMF: Personalized recommendation based on sentiment analysis and matrix factorization,&#x0201D;</article-title> in <source>Proceedings of the 2024 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)</source> (<publisher-loc>Lisbon</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>6553</fpage>&#x02013;<lpage>6560</lpage>. doi: <pub-id pub-id-type="doi">10.1109/BIBM62325.2024.10822471</pub-id></mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>C.</given-names></name> <name><surname>Che</surname> <given-names>H.</given-names></name> <name><surname>Leung</surname> <given-names>M. F.</given-names></name> <name><surname>Liu</surname> <given-names>C.</given-names></name> <name><surname>Yan</surname> <given-names>Z.</given-names></name></person-group> (<year>2023</year>). <article-title>Robust multi-view non-negative matrix factorization with adaptive graph and diversity constraints</article-title>. <source>Inf. Sci</source>. <volume>634</volume>, <fpage>587</fpage>&#x02013;<lpage>607</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ins.2023.03.119</pub-id></mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>J.</given-names></name> <name><surname>Yuan</surname> <given-names>Y.</given-names></name> <name><surname>Luo</surname> <given-names>X.</given-names></name></person-group> (<year>2025</year>). <article-title>Learning error refinement in stochastic gradient descent-based latent factor analysis via diversified PID controllers</article-title>. <source>IEEE Trans. Emerg. Top. Comput. Intell.</source> <volume>9</volume>, <fpage>3582</fpage>&#x02013;<lpage>3597</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TETCI.2025.3547854</pub-id></mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>T.</given-names></name> <name><surname>Qian</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>F.</given-names></name> <name><surname>Liang</surname> <given-names>X.</given-names></name> <name><surname>Zhan</surname> <given-names>Z. H.</given-names></name></person-group> (<year>2024</year>). <article-title>Feature subspace learning-based binary differential evolution algorithm for unsupervised feature selection</article-title>. <source>IEEE Trans. Big Data</source> <volume>11</volume>, <fpage>99</fpage>&#x02013;<lpage>114</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TBDATA.2024.3378090</pub-id></mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>Z.</given-names></name> <name><surname>Zhuo</surname> <given-names>S.</given-names></name> <name><surname>He</surname> <given-names>J.</given-names></name> <name><surname>Qiu</surname> <given-names>W.</given-names></name> <name><surname>Zheng</surname> <given-names>Z.</given-names></name> <name><surname>Chen</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Behavior enhanced representation learning for user behavior analysis</article-title>. <source>IEEE Trans. Inf. Forensics Secur.</source> <volume>20</volume>, <fpage>9275</fpage>&#x02013;<lpage>9288</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TIFS.2025.3601358</pub-id></mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liao</surname> <given-names>X.</given-names></name> <name><surname>Wu</surname> <given-names>H.</given-names></name> <name><surname>He</surname> <given-names>T.</given-names></name> <name><surname>Luo</surname> <given-names>X.</given-names></name></person-group> (<year>2025</year>). <article-title>A proximal-ADMM-incorporated nonnegative latent-factorization-of-tensors model for representing dynamic cryptocurrency transaction network</article-title>. <source>IEEE Trans. Syst. Man Cybern. Syst</source>. <volume>55</volume>, <fpage>8387</fpage>&#x02013;<lpage>8401</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TSMC.2025.3605054</pub-id></mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lin</surname> <given-names>M.</given-names></name> <name><surname>Lin</surname> <given-names>X.</given-names></name> <name><surname>Xu</surname> <given-names>X.</given-names></name> <name><surname>Xu</surname> <given-names>Z.</given-names></name> <name><surname>Luo</surname> <given-names>X.</given-names></name></person-group> (<year>2025</year>). <article-title>Neural networks-incorporated latent factor analysis for high-dimensional and incomplete data</article-title>. <source>IEEE Trans. Syst. Man Cybernet. Syst.</source> <volume>55</volume>, <fpage>7302</fpage>&#x02013;<lpage>7314</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TSMC.2025.3583919</pub-id></mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lin</surname> <given-names>X.</given-names></name> <name><surname>Yu</surname> <given-names>S.</given-names></name> <name><surname>Lin</surname> <given-names>M.</given-names></name> <name><surname>Xu</surname> <given-names>X.</given-names></name> <name><surname>Lin</surname> <given-names>J.</given-names></name> <name><surname>Xu</surname> <given-names>Z.</given-names></name></person-group> (<year>2025</year>). <article-title>An incremental nonlinear co-latent factor analysis model for large-scale student performance prediction</article-title>. <source>IEEE Trans. Serv. Comput</source>. <volume>18</volume>, <fpage>3463</fpage>&#x02013;<lpage>3476</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TSC.2025.3621687</pub-id></mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Luo</surname> <given-names>C.</given-names></name> <name><surname>Wang</surname> <given-names>S.</given-names></name> <name><surname>Li</surname> <given-names>T.</given-names></name> <name><surname>Chen</surname> <given-names>H.</given-names></name> <name><surname>Lv</surname> <given-names>J.</given-names></name> <name><surname>Yi</surname> <given-names>Z.</given-names></name></person-group> (<year>2023</year>). <article-title>RHDOFS: a distributed online algorithm towards scalable streaming feature selection</article-title>. <source>IEEE Trans. Parallel Distrib. Syst</source>. <volume>34</volume>, <fpage>1830</fpage>&#x02013;<lpage>1847</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TPDS.2023.3265974</pub-id></mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Luo</surname> <given-names>X.</given-names></name> <name><surname>Liu</surname> <given-names>Z.</given-names></name> <name><surname>Li</surname> <given-names>S.</given-names></name> <name><surname>Shang</surname> <given-names>M.</given-names></name> <name><surname>Wang</surname> <given-names>Z.</given-names></name></person-group> (<year>2018</year>). <article-title>A fast non-negative latent factor model based on generalized momentum method</article-title>. <source>IEEE Trans. Syst. Man Cybern. Syst</source>. <volume>50</volume>, <fpage>1</fpage>&#x02013;<lpage>11</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TSMC.2018.2875452</pub-id></mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Luo</surname> <given-names>X.</given-names></name> <name><surname>Wang</surname> <given-names>D.</given-names></name> <name><surname>Zhou</surname> <given-names>M.</given-names></name> <name><surname>Yuan</surname> <given-names>H.</given-names></name></person-group> (<year>2021a</year>). <article-title>Latent factor-based recommenders relying on extended stochastic gradient descent algorithms</article-title>. <source>IEEE Trans. Syst. Man Cybern. Syst</source>. <volume>51</volume>, <fpage>916</fpage>&#x02013;<lpage>926</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TSMC.2018.2884191</pub-id></mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Luo</surname> <given-names>X.</given-names></name> <name><surname>Wang</surname> <given-names>Z.</given-names></name> <name><surname>Shang</surname> <given-names>M.</given-names></name></person-group> (<year>2021b</year>). <article-title>An instance-frequency-weighted regularization scheme for non-negative latent factor analysis on high-dimensional and sparse data</article-title>. <source>IEEE Trans. Syst. Man Cybern. Syst</source>. <volume>51</volume>, <fpage>3522</fpage>&#x02013;<lpage>3532</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TSMC.2019.2930525</pub-id></mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lyu</surname> <given-names>C.</given-names></name> <name><surname>Ma</surname> <given-names>Z.</given-names></name> <name><surname>Luo</surname> <given-names>X.</given-names></name> <name><surname>Shi</surname> <given-names>Y.</given-names></name></person-group> (<year>2026</year>). <article-title>Dynamic stochastic reorientation particle swarm optimization for adaptive latent factor analysis in high-dimensional sparse matrices</article-title>. <source>IEEE Trans. Knowl. Data Eng</source>. <volume>38</volume>, <fpage>222</fpage>&#x02013;<lpage>234</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TKDE.2025.3621469</pub-id></mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Ni</surname> <given-names>J.</given-names></name> <name><surname>Fei</surname> <given-names>H.</given-names></name> <name><surname>Fan</surname> <given-names>W.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;Automated medical diagnosis by ranking clusters across the symptom-disease network,&#x0201D;</article-title> in <source>Proceedings of the 2017 IEEE International Conference on Data Mining</source> (<publisher-loc>IEEE</publisher-loc>: <publisher-name>New Orleans, LA</publisher-name>), <fpage>1009</fpage>&#x02013;<lpage>1014</lpage>. doi: <pub-id pub-id-type="doi">10.1109/ICDM.2017.130</pub-id></mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Perkins</surname> <given-names>S.</given-names></name> <name><surname>Theiler</surname> <given-names>J.</given-names></name></person-group> (<year>2003</year>). <article-title>&#x0201C;Online feature selection using grafting,&#x0201D;</article-title> in <source>Proceedings of the 20th International Conference on Machine Learning</source> (<publisher-loc>Washington, DC</publisher-loc>: <publisher-name>AAAI Press</publisher-name>), <fpage>592</fpage>&#x02013;<lpage>599</lpage>.</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Qin</surname> <given-names>W.</given-names></name> <name><surname>Luo</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>S.</given-names></name> <name><surname>Zhou</surname> <given-names>M.</given-names></name></person-group> (<year>2024</year>). <article-title>Parallel adaptive stochastic gradient descent algorithms for latent factor analysis of high-dimensional and incomplete industrial data</article-title>. <source>IEEE Trans. Autom. Sci. Eng</source>. <volume>21</volume>, <fpage>2716</fpage>&#x02013;<lpage>2729</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TASE.2023.3267609</pub-id></mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Qiu</surname> <given-names>J.</given-names></name> <name><surname>Zhuo</surname> <given-names>S.</given-names></name> <name><surname>Yu</surname> <given-names>P. S.</given-names></name> <name><surname>Wang</surname> <given-names>C.</given-names></name> <name><surname>Huang</surname> <given-names>S.</given-names></name></person-group> (<year>2025</year>). <article-title>Online learning for noisy labeled streams</article-title>. <source>ACM Trans. Knowl. Discov. Data</source>. <volume>19</volume>, <fpage>1</fpage>&#x02013;<lpage>29</lpage>. doi: <pub-id pub-id-type="doi">10.1145/3734875</pub-id></mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ram&#x000ED;rez-Gallego</surname> <given-names>S.</given-names></name> <name><surname>Mouri&#x000F1;o-Tal&#x000ED;n</surname> <given-names>H.</given-names></name> <name><surname>Mart&#x000ED;nez-Rego</surname> <given-names>D.</given-names></name> <name><surname>Bol&#x000F3;n-Canedo</surname> <given-names>V.</given-names></name> <name><surname>Ben&#x000ED;tez</surname> <given-names>J. M.</given-names></name> <name><surname>Alonso-Betanzos</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>An information theory-based feature selection framework for big data under apache spark</article-title>. <source>IEEE Trans. Syst. Man Cybern. Syst</source>. <volume>48</volume>, <fpage>1441</fpage>&#x02013;<lpage>1453</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TSMC.2017.2670926</pub-id></mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shu</surname> <given-names>T.</given-names></name> <name><surname>Lin</surname> <given-names>Y.</given-names></name> <name><surname>Guo</surname> <given-names>L.</given-names></name></person-group> (<year>2024</year>). <article-title>Online hierarchical streaming feature selection based on adaptive neighborhood rough set</article-title>. <source>Appl. Soft Comput</source>. <volume>152</volume>:<fpage>111276</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.asoc.2024.111276</pub-id></mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tang</surname> <given-names>P.</given-names></name> <name><surname>Ruan</surname> <given-names>T.</given-names></name> <name><surname>Wu</surname> <given-names>H.</given-names></name> <name><surname>Luo</surname> <given-names>X.</given-names></name></person-group> (<year>2024</year>). <article-title>Temporal pattern-aware qos prediction by biased non-negative tucker factorization of tensors</article-title>. <source>Neurocomputing</source> <volume>582</volume>:<fpage>127447</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.neucom.2024.127447</pub-id></mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>F. L.</given-names></name> <name><surname>Zain</surname> <given-names>A. M.</given-names></name> <name><surname>Ren</surname> <given-names>Y.</given-names></name> <name><surname>Bahari</surname> <given-names>M.</given-names></name> <name><surname>Samah</surname> <given-names>A. A.</given-names></name> <name><surname>Ali Shah</surname> <given-names>Z. B.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Navigating the microarray landscape: a comprehensive review of feature selection techniques and their applications</article-title>. <source>Front. Big Data</source> <volume>8</volume>:<fpage>1624507</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fdata.2025.1624507</pub-id><pub-id pub-id-type="pmid">40708675</pub-id></mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>P.</given-names></name> <name><surname>Xue</surname> <given-names>B.</given-names></name> <name><surname>Liang</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>M.</given-names></name></person-group> (<year>2023</year>). <article-title>Feature clustering-assisted feature selection with differential evolution</article-title>. <source>Pattern Recognit</source>. <volume>140</volume>:<fpage>109523</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.patcog.2023.109523</pub-id></mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>D.</given-names></name> <name><surname>He</surname> <given-names>Y.</given-names></name> <name><surname>Luo</surname> <given-names>X.</given-names></name></person-group> (<year>2023</year>). <article-title>A graph-incorporated latent factor analysis model for high-dimensional and sparse data</article-title>. <source>IEEE Trans. Emerg. Top. Comput</source>. <volume>11</volume>, <fpage>907</fpage>&#x02013;<lpage>917</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TETC.2023.3292866</pub-id></mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>D.</given-names></name> <name><surname>He</surname> <given-names>Y.</given-names></name> <name><surname>Luo</surname> <given-names>X.</given-names></name> <name><surname>Zhou</surname> <given-names>M.</given-names></name></person-group> (<year>2022</year>). <article-title>A latent factor analysis-based approach to online sparse streaming feature selection</article-title>. <source>IEEE Trans. Syst. Man Cybern. Syst</source>. <volume>52</volume>, <fpage>6744</fpage>&#x02013;<lpage>6758</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TSMC.2021.3096065</pub-id></mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>D.</given-names></name> <name><surname>Hu</surname> <given-names>Y.</given-names></name> <name><surname>Liu</surname> <given-names>K.</given-names></name> <name><surname>Li</surname> <given-names>J.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Deng</surname> <given-names>S.</given-names></name> <name><surname>Zheng</surname> <given-names>N.</given-names></name> <name><surname>Luo</surname> <given-names>X.</given-names></name></person-group> (<year>2025a</year>). <article-title>An outlier-resilient autoencoder for representing high-dimensional and incomplete data</article-title>. <source>IEEE Trans. Emerg. Top. Comput. Intell</source>. <volume>9</volume>, <fpage>1379</fpage>&#x02013;<lpage>1391</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TETCI.2024.3437370</pub-id></mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>D.</given-names></name> <name><surname>Li</surname> <given-names>Z.</given-names></name> <name><surname>Yu</surname> <given-names>Z.</given-names></name> <name><surname>He</surname> <given-names>Y.</given-names></name> <name><surname>Luo</surname> <given-names>X.</given-names></name></person-group> (<year>2025b</year>). <article-title>Robust low-rank latent feature analysis for spatiotemporal signal recovery</article-title>. <source>IEEE Trans. Neural Netw. Learn. Syst</source>. <volume>36</volume>, <fpage>2829</fpage>&#x02013;<lpage>2842</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TNNLS.2023.3339786</pub-id><pub-id pub-id-type="pmid">38100344</pub-id></mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>D.</given-names></name> <name><surname>Luo</surname> <given-names>X.</given-names></name> <name><surname>Shang</surname> <given-names>M.</given-names></name> <name><surname>He</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>G.</given-names></name> <name><surname>Zhou</surname> <given-names>M.</given-names></name></person-group> (<year>2021</year>). <article-title>A deep latent factor model for high-dimensional and sparse matrices in recommender systems</article-title>. <source>IEEE Trans. Syst. Man Cybern. Syst</source>. <volume>51</volume>, <fpage>4285</fpage>&#x02013;<lpage>4296</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TSMC.2019.2931393</pub-id></mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>D.</given-names></name> <name><surname>Zhang</surname> <given-names>P.</given-names></name> <name><surname>He</surname> <given-names>Y.</given-names></name> <name><surname>Luo</surname> <given-names>X.</given-names></name></person-group> (<year>2024</year>). <article-title>MMLF: Multi-metric latent feature analysis for high-dimensional and incomplete data</article-title>. <source>IEEE Trans. Serv. Comput</source>. <volume>17</volume>, <fpage>575</fpage>&#x02013;<lpage>588</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TSC.2023.3331570</pub-id></mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>H.</given-names></name> <name><surname>Wang</surname> <given-names>Q.</given-names></name> <name><surname>Luo</surname> <given-names>X.</given-names></name> <name><surname>Wang</surname> <given-names>Z.</given-names></name></person-group> (<year>2025</year>). <article-title>Learning accurate representation to nonstandard tensors via a mode-aware tucker network</article-title>. <source>IEEE Trans. Knowl. Data Eng</source>. <volume>37</volume>, <fpage>7272</fpage>&#x02013;<lpage>7285</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TKDE.2025.3617894</pub-id></mixed-citation>
</ref>
<ref id="B45">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>X.</given-names></name> <name><surname>Yu</surname> <given-names>K.</given-names></name> <name><surname>Ding</surname> <given-names>W.</given-names></name> <name><surname>Wang</surname> <given-names>H.</given-names></name> <name><surname>Zhu</surname> <given-names>X.</given-names></name></person-group> (<year>2013</year>). <article-title>Online feature selection with streaming features</article-title>. <source>IEEE Trans. Pattern Anal. Mach. Intell</source>. <volume>35</volume>, <fpage>1178</fpage>&#x02013;<lpage>1192</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TPAMI.2012.197</pub-id><pub-id pub-id-type="pmid">23520258</pub-id></mixed-citation>
</ref>
<ref id="B46">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>R.</given-names></name> <name><surname>Wu</surname> <given-names>D.</given-names></name> <name><surname>Luo</surname> <given-names>X.</given-names></name></person-group> (<year>2025a</year>). <article-title>Recursion-and-fuzziness reinforced online sparse streaming feature selection</article-title>. <source>IEEE Trans. Fuzzy Syst</source>. <volume>33</volume>, <fpage>2574</fpage>&#x02013;<lpage>2586</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TFUZZ.2025.3569272</pub-id></mixed-citation>
</ref>
<ref id="B47">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>R.</given-names></name> <name><surname>Wu</surname> <given-names>D.</given-names></name> <name><surname>Wang</surname> <given-names>R.</given-names></name> <name><surname>Luo</surname> <given-names>X.</given-names></name></person-group> (<year>2025b</year>). <article-title>A highly-accurate three-way decision-incorporated online sparse streaming features selection model</article-title>. <source>IEEE Trans. Syst. Man Cybern. Syst</source>. <volume>55</volume>, <fpage>4258</fpage>&#x02013;<lpage>4272</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TSMC.2025.3548648</pub-id></mixed-citation>
</ref>
<ref id="B48">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>X.</given-names></name> <name><surname>Lin</surname> <given-names>M.</given-names></name> <name><surname>Li</surname> <given-names>W.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Wu</surname> <given-names>H.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;Time-varying QoS estimation via non-negative latent factorization of tensors with extended linear biases,&#x0201D;</article-title> in <source>Proceedings of the 2023 IEEE International Conference on Big Data (BigData)</source> (<publisher-loc>Sorrento</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>86</fpage>&#x02013;<lpage>95</lpage>. doi: <pub-id pub-id-type="doi">10.1109/BigData59044.2023.10386709</pub-id></mixed-citation>
</ref>
<ref id="B49">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xue</surname> <given-names>B.</given-names></name> <name><surname>Zhang</surname> <given-names>M.</given-names></name> <name><surname>Browne</surname> <given-names>W. N.</given-names></name> <name><surname>Yao</surname> <given-names>X.</given-names></name></person-group> (<year>2016</year>). <article-title>A survey on evolutionary computation approaches to feature selection</article-title>. <source>IEEE Trans. Evol. Comput</source>. <volume>20</volume>, <fpage>606</fpage>&#x02013;<lpage>626</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TEVC.2015.2504420</pub-id></mixed-citation>
</ref>
<ref id="B50">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xue</surname> <given-names>X.</given-names></name> <name><surname>Yao</surname> <given-names>M.</given-names></name> <name><surname>Wu</surname> <given-names>Z.</given-names></name></person-group> (<year>2018</year>). <article-title>A novel ensemble-based wrapper method for feature selection using extreme learning machine and genetic algorithm</article-title>. <source>Inf. Syst</source>. <volume>57</volume>, <fpage>389</fpage>&#x02013;<lpage>412</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10115-017-1131-4</pub-id></mixed-citation>
</ref>
<ref id="B51">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>Y.</given-names></name> <name><surname>Chen</surname> <given-names>D.</given-names></name> <name><surname>Wang</surname> <given-names>H.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name></person-group> (<year>2018</year>). <article-title>Incremental perspective for feature selection based on fuzzy rough sets</article-title>. <source>IEEE Trans. Fuzzy Syst</source>. <volume>26</volume>, <fpage>1257</fpage>&#x02013;<lpage>1273</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TFUZZ.2017.2718492</pub-id></mixed-citation>
</ref>
<ref id="B52">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yao</surname> <given-names>F.</given-names></name> <name><surname>Ding</surname> <given-names>Y. L.</given-names></name> <name><surname>Hong</surname> <given-names>S. G.</given-names></name> <name><surname>Yang</surname> <given-names>S. H.</given-names></name> <name><surname>Ben&#x000ED;tez</surname> <given-names>J. M.</given-names></name> <name><surname>Alonso-Betanzos</surname> <given-names>A.</given-names></name> <name><surname>Herrera</surname> <given-names>F.</given-names></name></person-group> (<year>2022</year>). <article-title>A survey on evolved lora-based communication technologies for emerging internet of things applications</article-title>. <source>Int. J. Netw. Dyn. Intell</source>. <volume>1</volume>, <fpage>4</fpage>&#x02013;<lpage>19</lpage>. doi: <pub-id pub-id-type="doi">10.53941/ijndi0101002</pub-id></mixed-citation>
</ref>
<ref id="B53">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>K.</given-names></name> <name><surname>Wu</surname> <given-names>X.</given-names></name> <name><surname>Ding</surname> <given-names>W.</given-names></name> <name><surname>Pei</surname> <given-names>J.</given-names></name></person-group> (<year>2016</year>). <article-title>Scalable and accurate online feature selection for big data</article-title>. <source>ACM Trans. Knowl. Discov. Data</source> <volume>11</volume>:<fpage>16</fpage>. doi: <pub-id pub-id-type="doi">10.1145/2976744</pub-id></mixed-citation>
</ref>
<ref id="B54">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yuan</surname> <given-names>Y.</given-names></name> <name><surname>Lu</surname> <given-names>S.</given-names></name> <name><surname>Luo</surname> <given-names>X.</given-names></name></person-group> (<year>2025</year>). <article-title>A proportional integral controller-enhanced non-negative latent factor analysis model</article-title>. <source>IEEE/CAA J. Autom. Sin</source>. <volume>12</volume>, <fpage>1246</fpage>&#x02013;<lpage>1259</lpage>. doi: <pub-id pub-id-type="doi">10.1109/JAS.2024.125055</pub-id></mixed-citation>
</ref>
<ref id="B55">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>J. D.</given-names></name> <name><surname>Chow</surname> <given-names>C. Y.</given-names></name> <name><surname>Xu</surname> <given-names>J.</given-names></name></person-group> (<year>2017</year>). <article-title>Enabling Kernel-based attribute-aware matrix factorization for rating prediction</article-title>. <source>IEEE Trans. Knowl. Data Eng</source>. <volume>29</volume>, <fpage>798</fpage>&#x02013;<lpage>812</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TKDE.2016.2641439</pub-id></mixed-citation>
</ref>
<ref id="B56">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Z.</given-names></name> <name><surname>Jiang</surname> <given-names>W.</given-names></name> <name><surname>Li</surname> <given-names>F.</given-names></name> <name><surname>Zhao</surname> <given-names>M.</given-names></name> <name><surname>Li</surname> <given-names>B.</given-names></name> <name><surname>Zhang</surname> <given-names>L.</given-names></name></person-group> (<year>2017</year>). <article-title>Structured latent label consistent dictionary learning for salient machine faults representation-based robust classification</article-title>. <source>IEEE Trans. Ind. Inform</source>. <volume>13</volume>, <fpage>644</fpage>&#x02013;<lpage>656</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TII.2017.2653184</pub-id></mixed-citation>
</ref>
<ref id="B57">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhou</surname> <given-names>J.</given-names></name> <name><surname>Foster</surname> <given-names>D. P.</given-names></name> <name><surname>Stine</surname> <given-names>R. A.</given-names></name> <name><surname>Ungar</surname> <given-names>L. H.</given-names></name></person-group> (<year>2006</year>). <article-title>Streamwise feature selection</article-title>. <source>J. Mach. Learn. Res</source>. <volume>7</volume>, <fpage>1861</fpage>&#x02013;<lpage>1885</lpage>.</mixed-citation>
</ref>
<ref id="B58">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhou</surname> <given-names>P.</given-names></name> <name><surname>Hu</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>P.</given-names></name> <name><surname>Wu</surname> <given-names>X.</given-names></name></person-group> (<year>2019a</year>). <article-title>OFS-density: a novel online streaming feature selection method</article-title>. <source>Pattern Recognit</source>., <volume>86</volume>, <fpage>48</fpage>&#x02013;<lpage>61</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.patcog.2018.08.009</pub-id></mixed-citation>
</ref>
<ref id="B59">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhou</surname> <given-names>P.</given-names></name> <name><surname>Hu</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>P.</given-names></name> <name><surname>Wu</surname> <given-names>X.</given-names></name></person-group> (<year>2019b</year>). <article-title>Online streaming feature selection using adapted neighborhood rough set</article-title>. <source>Inf. Sci</source>. <volume>481</volume>, <fpage>258</fpage>&#x02013;<lpage>279</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ins.2018.12.074</pub-id></mixed-citation>
</ref>
<ref id="B60">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhou</surname> <given-names>P.</given-names></name> <name><surname>Li</surname> <given-names>P. P.</given-names></name> <name><surname>Zhao</surname> <given-names>S.</given-names></name> <name><surname>Wu</surname> <given-names>X. D.</given-names></name></person-group> (<year>2021a</year>). <article-title>Feature interaction for streaming feature selection</article-title>. <source>IEEE Trans. Neural Netw. Learn. Systs</source> <volume>32</volume>, <fpage>4691</fpage>&#x02013;<lpage>4702</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TNNLS.2020.3025922</pub-id><pub-id pub-id-type="pmid">33021946</pub-id></mixed-citation>
</ref>
<ref id="B61">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhou</surname> <given-names>P.</given-names></name> <name><surname>Wang</surname> <given-names>N.</given-names></name> <name><surname>Zhao</surname> <given-names>S.</given-names></name></person-group> (<year>2021b</year>). <article-title>Online group streaming feature selection considering feature interaction</article-title>. <source>Knowl. Based Syst</source>. <volume>226</volume>:<fpage>107157</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.knosys.2021.107157</pub-id></mixed-citation>
</ref>
<ref id="B62">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhou</surname> <given-names>P.</given-names></name> <name><surname>Zhao</surname> <given-names>S.</given-names></name> <name><surname>Yan</surname> <given-names>Y. T.</given-names></name> <name><surname>Wu</surname> <given-names>X. D.</given-names></name></person-group> (<year>2022</year>). <article-title>Online scalable streaming feature selection via dynamic decision</article-title>. <source>ACM Trans. Knowl. Discov. Data</source> <volume>16</volume>, <fpage>1</fpage>&#x02013;<lpage>20</lpage>. doi: <pub-id pub-id-type="doi">10.1145/3502737</pub-id></mixed-citation>
</ref>
<ref id="B63">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhuo</surname> <given-names>S.-D.</given-names></name> <name><surname>Qiu</surname> <given-names>J.-J.</given-names></name> <name><surname>Wang</surname> <given-names>C.-D.</given-names></name> <name><surname>Huang</surname> <given-names>S.-Q.</given-names></name></person-group> (<year>2024</year>). <article-title>Online feature selection with varying feature spaces</article-title>. <source>IEEE Trans. Knowl. Data Eng</source>. <volume>36</volume>, <fpage>4806</fpage>&#x02013;<lpage>4819</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TKDE.2024.3377243</pub-id></mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3117376/overview">Qingguo L&#x000FC;</ext-link>, Chongqing University, China</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1716240/overview">Peng Zhou</ext-link>, Anhui University, China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2196862/overview">Shengda Zhuo</ext-link>, Jinan University, China</p>
</fn>
</fn-group>
</back>
</article>