<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1491602</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2024.1491602</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>BIMSSA: enhancing cancer prediction with salp swarm optimization and ensemble machine learning approaches</article-title>
<alt-title alt-title-type="left-running-head">Panda et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fgene.2024.1491602">10.3389/fgene.2024.1491602</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Panda</surname>
<given-names>Pinakshi</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2916501/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Bisoy</surname>
<given-names>Sukant Kishoro</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2928709/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Panigrahi</surname>
<given-names>Amrutanshu</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2683664/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Pati</surname>
<given-names>Abhilash</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2607150/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Sahu</surname>
<given-names>Bibhuprasad</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2916486/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Guo</surname>
<given-names>Zheshan</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2677663/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Haipeng</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/502817/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Jain</surname>
<given-names>Prince</given-names>
</name>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1275117/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of Computer Science and Engineering</institution>, <institution>C. V. Raman Global University</institution>, <addr-line>Bhubaneswar</addr-line>, <addr-line>Odisha</addr-line>, <country>India</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Computer Science and Engineering</institution>, <institution>Siksha &#x2018;O&#x2019; Anusandhan (Deemed to be University)</institution>, <addr-line>Bhubaneswar</addr-line>, <addr-line>Odisha</addr-line>, <country>India</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Department of Information Technology</institution>, <institution>Vardhaman College of Engineering (Autonomous)</institution>, <addr-line>Hyderabad</addr-line>, <addr-line>Telangana</addr-line>, <country>India</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Key Laboratory of Biomedical Engineering of Hainan Province</institution>, <institution>School of Biomedical Engineering</institution>, <institution>Hainan University</institution>, <addr-line>Sanya</addr-line>, <country>China</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Centre for Intelligent Healthcare</institution>, <institution>Coventry University</institution>, <addr-line>Coventry</addr-line>, <country>United Kingdom</country>
</aff>
<aff id="aff6">
<sup>6</sup>
<institution>Department of Mechatronics Engineering</institution>, <institution>Parul Institute of Technology</institution>, <institution>Parul University</institution>, <addr-line>Vadodara</addr-line>, <addr-line>Gujarat</addr-line>, <country>India</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/827295/overview">Lei Chen</ext-link>, Shanghai Maritime University, China</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2892363/overview">Puspanjali Mohapatra</ext-link>, International Institute of Information Technology, India</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2909699/overview">Abdelkader Benyettou</ext-link>, Centre Universitaire de Relizane, Algeria</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Zheshan Guo, <email>guozheshan@hainanu.edu.cn</email>; Prince Jain, <email>princeece48@gmail.com</email>
</corresp>
<fn fn-type="other" id="fn1">
<label>
<sup>&#x2020;</sup>
</label>
<p>
<bold>ORCID ID:</bold> Haipeng Liu, <ext-link ext-link-type="uri" xlink:href="https://orcid.org/0000-0002-4212-2503">https://orcid.org/0000-0002-4212-2503</ext-link>; Prince Jain, <ext-link ext-link-type="uri" xlink:href="https://orcid.org/0000-0002-7950-7263">https://orcid.org/0000-0002-7950-7263</ext-link>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>06</day>
<month>01</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>15</volume>
<elocation-id>1491602</elocation-id>
<history>
<date date-type="received">
<day>06</day>
<month>10</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>11</day>
<month>12</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Panda, Bisoy, Panigrahi, Pati, Sahu, Guo, Liu and Jain.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Panda, Bisoy, Panigrahi, Pati, Sahu, Guo, Liu and Jain</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec>
<title>Background</title>
<p>Cancer rates are rising rapidly, causing global mortality. According to the World Health Organization (WHO), 9.9 million people died from cancer in 2020. Machine learning (ML) helps identify cancer early, reducing deaths. An ML-based cancer diagnostic model can use the patient&#x2019;s genetic information, such as microarray data. Microarray data are high dimensional, which can degrade the performance of the ML-based models. For this, feature selection becomes essential.</p>
</sec>
<sec>
<title>Methods</title>
<p>Swarm Optimization Algorithm (SSA), Improved Maximum Relevance and Minimum Redundancy (IMRMR), and Boruta form the basis of this work&#x2019;s ML-based model BIMSSA. The BIMSSA model implements a pipelined feature selection method to effectively handle high-dimensional microarray data. Initially, Boruta and IMRMR were applied to extract relevant gene expression aspects. Then, SSA was implemented to optimize feature size. To optimize feature space, five separate machine learning classifiers, Support Vector Machine (SVM), Random Forest (RF), Extreme Learning Machine (ELM), AdaBoost, and XGBoost, were applied as the base learners. Then, majority voting was used to build an ensemble of the top three algorithms. The ensemble ML-based model BIMSSA was evaluated using microarray data from four different cancer types: Adult acute lymphoblastic leukemia and Acute myelogenous leukemia (ALL-AML), Lymphoma, Mixed-lineage leukemia (MLL), and Small round blue cell tumors (SRBCT).</p>
</sec>
<sec>
<title>Results</title>
<p>In terms of accuracy, the proposed BIMSSA (Boruta &#x002B; IMRMR &#x002B; SSA) achieved 96.7&#x0025; for ALL-AML, 96.2&#x0025; for Lymphoma, 95.1&#x0025; for MLL, and 97.1&#x0025; for the SRBCT cancer datasets, according to the empirical evaluations.</p>
</sec>
<sec>
<title>Conclusion</title>
<p>The results show that the proposed approach can accurately predict different forms of cancer, which is useful for both physicians and researchers.</p>
</sec>
</abstract>
<kwd-group>
<kwd>cancer prediction</kwd>
<kwd>microarray data</kwd>
<kwd>feature selection</kwd>
<kwd>swarm intelligence</kwd>
<kwd>ensemble learning</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Computational Genomics</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Cancer has been one of the main causes of mortality across the globe for many decades, making it a significant public health issue on a global scale. Cancer is an umbrella term for illnesses caused by the unchecked proliferation and metastasis of aberrant cells. It significantly affects public health and affects individuals of all ages and walks of life. Cancer mortality rates change both annually and geographically. WHO and the International Agency for Research on Cancer (IARC) frequently release statistics on cancer fatalities. It was predicted that there were around 9.9 million deaths from cancer globally in 2020 (<xref ref-type="bibr" rid="B33">Panigrahi et al., 2023</xref>). Deaths from cancer are not distributed in the same way all across the world. Developed nations often have higher incidence and survival rates because of better healthcare, earlier diagnosis, and more effective treatments. However, cancer may be a significant problem in locations with inadequate healthcare infrastructure (<xref ref-type="bibr" rid="B45">Venkatesan et al., 2022</xref>). Screening for cancer at an early stage is another way to boost treatment success. Many forms of cancer now have better survival rates because of advances in cancer research and therapy (<xref ref-type="bibr" rid="B23">Khalsan et al., 2022</xref>).</p>
<p>Infants with ALL rearranged to MLL have even worse prognoses, with survival rates of 40%&#x2013;50%; the overall death rate for ALL is 20%&#x2013;25% in children and up to 50% in adults. The death rate for lymphomas varies greatly depending on subtype. For example, aggressive non-Hodgkin lymphomas, such as diffuse large B-cell lymphoma, have a mortality rate of 30%&#x2013;40%, whereas Hodgkin lymphomas have a rate below 10%. In children, the survival rate for leukemias with MLL rearrangements is around 50%, but in adults, it is lower, especially in cases of acute myeloid leukemia (AML) (<xref ref-type="bibr" rid="B24">Lewis et al., 2020</xref>). The 5-year survival rate for SRBCTs drops to 15%&#x2013;30% in metastatic instances from over 70% in localized illness (<xref ref-type="bibr" rid="B13">de Leval and Jaffe, 2020</xref>).</p>
<p>Through enhancing early detection and diagnosis, treatment, and patient outcomes, machine learning has the potential to contribute significantly to reducing cancer mortality rates (<xref ref-type="bibr" rid="B11">Bol&#xf3;n-Canedo et al., 2014</xref>). Medical imaging like mammograms and CT scans may be analyzed using machine learning algorithms for the early detection of cancer. Earlier diagnosis is associated with better treatment results (<xref ref-type="bibr" rid="B26">Ma et al., 2020</xref>). It may ensure that patients get the best possible treatment promptly by lowering the percentage of erroneous diagnostic results. Machine learning may predict treatment outcomes by analyzing a patient&#x2019;s medical record, genetic data, and tumor features (<xref ref-type="bibr" rid="B2">Alghunaim and Al-Baity, 2019</xref>). By aiding in medication research and development, machine learning may help produce more effective and tailored medicines for cancer.</p>
<p>Several forms of medical data have been analyzed and interpreted using machine learning (ML) for cancer detection. Medical imaging data and genomic data account for the vast majority of ML-based cancer research. Mammograms, X-rays, CT scans, and MRI scans are all examples of medical imaging data (<xref ref-type="bibr" rid="B3">Almugren and Alshamlan, 2019</xref>). DNA sequencing data and gene expression profiling or microarray data are further examples of &#x201c;genomic data.&#x201d; Microarray data, or genetic information, is crucial for cancer diagnosis because it reveals important information about the disease itself and its genetic disorders. This knowledge facilitates personalized therapy, boosts treatment efficacy, and facilitates educated decision-making in cancer care and prevention (<xref ref-type="bibr" rid="B25">L&#xf3;pez-Garc&#xed;a et al., 2020</xref>). When processing microarray data, ML encounters several challenges. The most difficult aspects of the microarray data for ML to handle are the high dimensionality, small sample size, and class imbalance. The high dimensionality of microarray data results from the fact that thousands of characteristics (genes or probes) are often assessed for each sample or patient. Owing to the &#x201c;curse of dimensionality,&#x201d; conventional ML methods may become ineffective in required resources (both computational and cognitive) (<xref ref-type="bibr" rid="B39">Shukla et al., 2020a</xref>). There are often fewer samples available than there are characteristics to analyze. Creating reliable and generalizable models might be difficult when working with a small sample size.</p>
<p>The best possible solution is to overcome the above-stated challenges, employing strategies such as dimensionality reduction, feature selection, and optimization algorithms to effectively select genes for cancer diagnosis (<xref ref-type="bibr" rid="B41">Shukla et al., 2019</xref>). Deploying a single feature selection algorithm may reduce the number of features to some extent. However, if the feature space is larger, employing a single method to select the appropriate number of features may not be sufficient. Hence, the current work aims to deal with the high dimensionality issues of microarray data with the help of the pipelined featured selection algorithms followed by a nature-inspired optimization algorithm to reduce the feature space to the extent upon which the machine learning models can be used to develop a more effective ML model. The reported research includes the Boruta and IMRMR feature selection algorithm in a pipelined manner. The motivation behind using the Boruta Feature selection algorithm is its ability to identify all relevant genes while considering the complex feature interaction with the target feature using Random Forest (RF). In addition, it also deals with the challenges, including the overfitting and interaction of the features with the target variable. Improved Maximum Relevance and Minimum Redundancy (IMRMR) IMRMR improves traditional mRMR by selecting feature subsets highly related to the target class and mainly uncorrelated. This makes features more informative and efficient. This balance is a significant benefit of the approach in complicated datasets where inter-feature relationships may not be evident and impair each feature&#x2019;s prediction ability. The Salp Swarm Optimization Algorithm (SSA) effectively lowers the dimensionality of datasets and enhances model performance by identifying the most influential features that contribute to the model&#x2019;s predictive capability. Its innate simplicity and resilience make it highly suitable for managing intricate, high-dimensional data where conventional feature selection approaches may encounter difficulties. It utilizes the swarming characteristics of the salp in an ocean, which balances the exploration and exploitation with efficacy, which helps the SSA to converge on a globally optimal solution while avoiding the local optima. In the current work, BIMSSA, the Boruta is initially applied to eliminate the irrelevant features from the dataset, thus reducing the feature space. It creates the shadow features by shuffling the original features randomly. Using the base classifier RF, the importance score is calculated for original and shadowed features. The highest score of the shadowed feature set is termed the threshold value. The original features with a score greater than the threshold value are considered relevant. Then, the IMRMR is used to select the most relevant features from the feature selection by calculating the IMRMR score. Finally, to the selected features, the SSA is applied to select an optimal number through multiple iterations of features by having a stopping criterion, such as maximum iteration for current work. The algorithms mentioned above are dedicated to selecting the features from the entire feature space. Thus, these algorithms work in a vertical approach.</p>
<sec id="s1-1">
<title>1.1 Objective</title>
<p>This study aims to create an ensemble ML-based model using the Boruta IMRMR technique to select features and the SSA algorithm to optimize those features. The research&#x2019;s key contribution is summed up as follows.<list list-type="simple">
<list-item>
<p>&#x2022; To develop an ensemble ML model for efficient Cancer diagnosis.</p>
</list-item>
<list-item>
<p>&#x2022; To compare the performance of various feature selection approaches, specifically Boruta and IMRMR, in reducing the dimensionality of the microarray data.</p>
</list-item>
<list-item>
<p>&#x2022; To access the impact of SSA in optimizing the selected features in the context of the Cancer gene expression data.</p>
</list-item>
<list-item>
<p>&#x2022; To analyze the performance of hybrid models with feature selection methods along with four conventional ML classifiers.</p>
</list-item>
<list-item>
<p>&#x2022; To analyze the efficacy of the ensemble learning model over the hybrid model.</p>
</list-item>
<list-item>
<p>&#x2022; Finally, four different cancer microarray data are considered to evaluate the proposed model.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s1-2">
<title>1.2 Literature survey</title>
<p>For the current work, 130 reports are initially identified. Several records are excluded from the study with different steps. Twenty duplicate records are removed before the screening process. The remaining 110 numbers of records are considered for the screening phase. In this phase, ten irrelevant records are excluded. From the screening phase, 100 records are processed for the retrieval phase, out of which 7 records are excluded as those records could not be retrieved. Hence, the remaining 93 full-text records are considered. From the considered full texts, 17 records are excluded as sufficient data for analysis are unavailable. In addition, 20 records are excluded as those dealing with diseases other than cancer, and 7 records are found to be irrelevant to the study. Finally, the remaining 49 numbers of articles are considered for the current work. From the considered records, 20 articles are used in the literature survey part, and the remaining 29 numbers of records are considered in the rest of the manuscript. The Figure in <xref ref-type="app" rid="app1">Appendix I</xref> shows the current work&#x2019;s Preferred Reporting Items for Systematic Reviews and Meta-Analyses (PRISMA).</p>
<p>
<xref ref-type="bibr" rid="B20">Ibrahim et al. (2017)</xref> introduced a novel hybrid approach, the Salp Swarm Algorithm in feature selection (SSA-FS), on the real datasets obtained from Iraqi hospitals for breast, bladder, and colon cancers. <xref ref-type="bibr" rid="B18">Hegazy et al. (2018a)</xref> introduced a novel method, the chaotic salp swarm algorithm (CSSA), to enhance the SSA on 28 datasets. <xref ref-type="bibr" rid="B19">Hegazy et al. (2018b)</xref> introduced a novel method, the improved salp swarm algorithm (ISSA), which is consolidated with the KNN classifier for feature selection on 23 UCI-ML datasets and claimed to achieve enhanced accuracies on Breast Cancer, Lung Cancer, and BeastEW disease datasets respectively. Using a neighborhood entropy-based uncertainty measures model, Sun et al. (<xref ref-type="bibr" rid="B42">Sun et al., 2019</xref>) successfully applied machine learning (ML) methods, including k-nearest neighbor (KNN), C4.5, and Support Vector Machine (SVM), to the classification of the colon, diffuse large B-cell lymphoma (DLBCL), leukemia, lung, and small round blue cell tumor (SRBCT). <xref ref-type="bibr" rid="B16">Ghoniem (2020)</xref> introduced a novel bio-inspired liver cancer diagnosis model considering a deep learning (DL) approach, i.e., Convolutional Neural Network (CNN) along with SegNet and UNet, and the optimization technique, i.e., Artificial Bee Colony optimization (ABC) on Radiopaedia and LiTS datasets. <xref ref-type="bibr" rid="B40">Shukla et al. (2020b)</xref> introduced an adaptive inertia weight teaching-learning model considering machine learning approaches, i.e., Support Vector Machine (SVM), Extreme Learning Machine (ELM), and Na&#xef;ve Bayes (NB) on Breast Cancer, Colon Cancer, DLBCL, Leukaemia, SRBCT, Lung Cancer.</p>
<p>
<xref ref-type="bibr" rid="B29">Meenachi and Ramakrishnan (2020)</xref> introduced differential evolution and global optimal feature selection for cancer data classification model considering the machine learning (ML) approach, i.e., Decision Tree (DT) and optimization technique, i.e., Ant colony optimization (ACO) on five datasets, i.e., DLBCL, Breast Cancer, Leukemia, SRBCT, Gisette datasets. <xref ref-type="bibr" rid="B32">Nouri-Moghaddam et al. (2021)</xref> introduced a new hybrid solution based on a multi-filter and adaptive chaotic multi-objective forest optimization algorithm (AC-MOFOA) considering Forest optimization algorithm (FOA), Extreme learning machine (ELM), multi-objective optimization (MOO), and five filter methods, i.e., IG, mRMR, RelifF, CFS, and Fisher-score, on nine datasets, i.e., SRBCT, Tumors_9, Leukaemia3, colon_prostate, Lung, GCM, Breast, Rsctc_5, Rsctc_6.</p>
<p>
<xref ref-type="bibr" rid="B46">Yan et al. (2021)</xref> introduced a novel feature selection model considering a machine learning (ML) approach, i.e., K-nearest neighbor (KNN) and a wrapper feature selection algorithm FS_SSA based on Salp swarm, on five datasets, i.e., ALL-AML-4, Colon Cancer, Lymphoma, MLL, SRBCT datasets. Sarala et al. (<xref ref-type="bibr" rid="B6">Arun Prabha et al., 2021</xref>) introduced a decision-based Salp Swarm Optimization (DT-SWO) algorithm considering machine learning (ML) approaches, i.e., Decision Tree (DT), Support Vector Machine (SVM), Na&#xef;ve Bayes (NB), Kernel Support Vector Machine (KSVM), and optimization technique, i.e., Salp Swarm Optimization (SWO) on four datasets, i.e., DLBCL, Leukemia, Lung Cancer and colon datasets. <xref ref-type="bibr" rid="B4">Alomari et al. (2021)</xref> introduced a hybrid filter-wrapper approach considering robust Minimum Redundancy Maximum Relevancy (rMRMR) as a filter approach, Modified Gray Wolf Optimization (MGWO) as a wrapper approach, and ML approaches including Random Forest (RF), Elastic Networks (EN) and Decision Tree (DT) on nine datasets. <xref ref-type="bibr" rid="B9">Balakrishnan et al. (2021)</xref> introduced an improved salp swarm algorithm (iSSA) based on the levy flight for feature selection model and Support Vector Machine (SVM) classifier on six datasets, i.e., Oral Squamous Cell Carcinoma (OSCC), Ovarian cancer, Breast Cancer, CNS, Colon Cancer, Leukemia datasets. <xref ref-type="bibr" rid="B17">Hameed et al. (2021)</xref> included the binary particle swarm optimization (BPSO), the genetic algorithm (GA), and the cuckoo search algorithm (CS) for selecting the features. ML approaches, including SVM, NB, KNN, and RF, are applied to twelve datasets.</p>
<p>Gene selection for microarray data categorization was developed by <xref ref-type="bibr" rid="B36">Rostami et al. (2022)</xref> using a multi-objective graph theoretic-based approach model that considers the idea of community detection with node centrality (CDNC). On six datasets, <xref ref-type="bibr" rid="B8">Aziz (2022)</xref> presented a metaheuristics model that was inspired by nature. The model utilized ML approaches such as Support Vector Machine (SVM), Na&#xef;ve Bayes (NB), Artificial Neural Network (ANN), cuckoo search (CS), genetic algorithm (GA), and artificial bee colony (ABC). <xref ref-type="bibr" rid="B5">Alromema et al. (2023)</xref> used logistic regression, Support Vector Machine, K-Nearest Neighbours, Neural Networks, Naive Bayes, Decision Tree, and eXtreme Gradient Boosting to gene expression datasets. On 17 microarray expression datasets, including CNS, Colon, Leukemia_3C, Leukemia_4C, Leukaemia, Hungtington Disease, DLBCL, Lymphoma66 &#xf6;4026_3c, Lymphoma, Prostate, SRBCT, Lung Cancer, Breast Cancer, Sarcoma, Mycloma, and Ovarian, <xref ref-type="bibr" rid="B22">Ke et al. (2022)</xref> suggested a population initialization method based on ranking criteria (PIRC) using NB, C4.5, genetic algorithm (GA), and ant colony optimization (ACO). In their study on the Wisconsin Breast Cancer Dataset (WBCD), <xref ref-type="bibr" rid="B37">Rustagi et al. (2024)</xref> presented a method for breast cancer detection that relies on Salp Swarm and Grey Wolf Optimisation. They took SVM and KNN classifiers into account. <xref ref-type="bibr" rid="B44">&#xdc;nalan et al. (2024)</xref> highlighted ensemble learning methodologies&#x2019; efficacy in classifying breast cancer. It was revealed that performance improved over standalone classifiers. Classifiers AdaBoost, GBM, and RGF gave an impressive accuracy of 99.5%. However, ensembles of this kind surpassed the respective individual algorithms LGBM for accuracy and gave an F1 score of 99.2% alongside an accuracy of 98.9%. Incorporating stratified shuffle split and k-fold cross-validation raises the question of the strict evaluation technique in obtaining credible and clinically relevant classification outputs. <xref ref-type="table" rid="T1">Table 1</xref> shows the analytical study of the literature mentioned above. <xref ref-type="bibr" rid="B10">Batool and Byun (2024)</xref> showed that ensemble learning applied to the task of breast cancer classification was doing well by enhancing the general predictive accuracy through a set of multiple models. Several studies on the WBCD dataset showed that the ensemble method, for example, voting classifier involving ETC, LightGBM, RC, and LDA models, performed better than individual models. The proposed model succeeded in surpassing all known state-of-the-art classifiers utilized in the detection and diagnosis cases of breast cancer with an average accuracy of 97.6% and an F1 value of 98.1%. <xref ref-type="bibr" rid="B27">Mahesh et al. (2022)</xref> proposes an early prediction of breast cancer through a blended ensemble learning approach with SVM, KNN, DT, RF, and LR as base classifiers. The model&#x2019;s performance is checked using a breast cancer dataset, which has yielded considerable improvements in accuracy at 98.14%. Accurate, recall, precision, and F1-score metrics validate the ensemble model&#x2019;s effectiveness over individual classifiers.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Analytical Study of existing literature.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Ref</th>
<th align="center">Techniques employed</th>
<th align="center">Datasets employed</th>
<th align="center">Findings (%)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="3" align="center">
<xref ref-type="bibr" rid="B20">Ibrahim et al. (2017)</xref>
</td>
<td rowspan="3" align="left">SSA, SVM</td>
<td align="left">Breast Cancer</td>
<td align="left">Accuracy: 98.75</td>
</tr>
<tr>
<td align="left">Bladder</td>
<td align="left">Accuracy: 100</td>
</tr>
<tr>
<td align="left">Colon Cancer</td>
<td align="left">Accuracy: 99.75</td>
</tr>
<tr>
<td rowspan="3" align="center">
<xref ref-type="bibr" rid="B18">Hegazy et al. (2018a)</xref>
</td>
<td rowspan="3" align="left">SSA, KNN</td>
<td align="left">Breast Cancer</td>
<td align="left">Accuracy: 97.08</td>
</tr>
<tr>
<td align="left">Lung Cancer</td>
<td align="left">Accuracy: 60</td>
</tr>
<tr>
<td align="left">Breast EW</td>
<td align="left">Accuracy: 97.08</td>
</tr>
<tr>
<td rowspan="7" align="center">
<xref ref-type="bibr" rid="B19">Hegazy et al. (2018b)</xref>
</td>
<td rowspan="7" align="left">ISSA, KNN</td>
<td align="left">Breast Cancer</td>
<td align="left">Accuracy: 95.70</td>
</tr>
<tr>
<td align="left">Lung Cancer</td>
<td align="left">Accuracy: 59.78</td>
</tr>
<tr>
<td align="left">Breast EW</td>
<td align="left">Accuracy: 96.10</td>
</tr>
<tr>
<td align="left">DLBCL</td>
<td align="left">Accuracy: 92.7</td>
</tr>
<tr>
<td align="left">Leukemia</td>
<td align="left">Accuracy: 92.9</td>
</tr>
<tr>
<td align="left">Lung</td>
<td align="left">Accuracy: 98.8</td>
</tr>
<tr>
<td align="left">SRBCT</td>
<td align="left">Accuracy: 93.6</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B42">Sun et al. (2019)</xref>
</td>
<td align="left">KNN, C4.5, SVM</td>
<td align="left">SRBCT</td>
<td align="left">Accuracy: 93.6</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B16">Ghoniem (2020)</xref>
</td>
<td align="left">CNN along with SegNet and UNet, ABC</td>
<td align="left">Radiopaedia datasets</td>
<td align="left">Accuracy: 99.3, F1-Score: 99.0, Specificity: 99.0</td>
</tr>
<tr>
<td rowspan="5" align="center">
<xref ref-type="bibr" rid="B40">Shukla et al. (2020b)</xref>
</td>
<td rowspan="5" align="left">SVM, ELM, and NB</td>
<td align="left">Breast Cancer</td>
<td align="left">Accuracy: 89.59</td>
</tr>
<tr>
<td align="left">Colon Cancer</td>
<td align="left">Accuracy: 98.03</td>
</tr>
<tr>
<td align="left">DLBCL</td>
<td align="left">Accuracy: 99.89</td>
</tr>
<tr>
<td align="left">Leukaemia</td>
<td align="left">Accuracy: 98.99</td>
</tr>
<tr>
<td align="left">Lung Cancer</td>
<td align="left">Accuracy: 98.83</td>
</tr>
<tr>
<td rowspan="4" align="center">
<xref ref-type="bibr" rid="B29">Meenachi and Ramakrishnan (2020)</xref>
</td>
<td rowspan="4" align="left">DT and ACO</td>
<td align="left">DLBCL</td>
<td align="left">Accuracy: 92.65, Specificity: 98.4, Precision: 95.8, Recall: 95.4, F-Measure: 95.1</td>
</tr>
<tr>
<td align="left">Breast Cancer</td>
<td align="left">Accuracy: 71.88, Specificity: 91.1, Precision: 59.3, Recall: 72.1, F-measure: 64.3</td>
</tr>
<tr>
<td align="left">Leukemia</td>
<td align="left">Accuracy: 85.29, Specificity: 92.8, Precision: 89.6, Recall: 85.8, F-measure: 85.6</td>
</tr>
<tr>
<td align="left">SRBCT</td>
<td align="left">Accuracy: 81.58, Specificity: 93.1, Precision: 82.5, Recall: 82.1, F-measure: 81.7</td>
</tr>
<tr>
<td rowspan="6" align="center">
<xref ref-type="bibr" rid="B32">Nouri-Moghaddam et al. (2021)</xref>
</td>
<td rowspan="6" align="left">FOA, ELM, MOO</td>
<td align="left">SRBCT</td>
<td align="left">Accuracy: 90.72</td>
</tr>
<tr>
<td align="left">Tumors_9</td>
<td align="left">Accuracy: 84.41</td>
</tr>
<tr>
<td align="left">Leukaemia3</td>
<td align="left">Accuracy: 97.66</td>
</tr>
<tr>
<td align="left">Colon</td>
<td align="left">Accuracy: 97.89</td>
</tr>
<tr>
<td align="left">Lung</td>
<td align="left">Accuracy: 93.97</td>
</tr>
<tr>
<td align="left">Breast</td>
<td align="left">Accuracy: 86.53</td>
</tr>
<tr>
<td rowspan="5" align="center">
<xref ref-type="bibr" rid="B46">Yan et al. (2021)</xref>
</td>
<td rowspan="5" align="left">KNN, FS_SSA, PSO, and GA</td>
<td align="left">ALL-AML-4</td>
<td align="left">Accuracy: 94.23</td>
</tr>
<tr>
<td align="left">Colon Tumor</td>
<td align="left">Accuracy: 82.09</td>
</tr>
<tr>
<td align="left">Lymphoma</td>
<td align="left">Accuracy: 88.57</td>
</tr>
<tr>
<td align="left">MLL</td>
<td align="left">Accuracy: 86.19</td>
</tr>
<tr>
<td align="left">SRBCT</td>
<td align="left">Accuracy: 76.74</td>
</tr>
<tr>
<td rowspan="4" align="center">
<xref ref-type="bibr" rid="B6">Arun Prabha et al. (2021)</xref>
</td>
<td rowspan="4" align="left">DT, SVM, NB, KSVM, and SWO</td>
<td align="left">DLBCL</td>
<td align="left">Accuracy: 95</td>
</tr>
<tr>
<td align="left">Leukemia</td>
<td align="left">Accuracy: 97</td>
</tr>
<tr>
<td align="left">Lung Cancer</td>
<td align="left">Accuracy: 94</td>
</tr>
<tr>
<td align="left">Colon</td>
<td align="left">Accuracy: 98</td>
</tr>
<tr>
<td rowspan="8" align="center">
<xref ref-type="bibr" rid="B4">Alomari et al. (2021)</xref>
</td>
<td rowspan="8" align="left">rMRMR-MGWO, LASSO, RF, EN, and DT</td>
<td align="left">Colon Tumor</td>
<td align="left">Accuracy: 94.14, Precision: 95.33, Recall: 91.97, F1-Score: 95.46, Mathew&#x2019;s Co-relation Coefficient (MCC).: 86.39</td>
</tr>
<tr>
<td align="left">CNS</td>
<td align="left">Accuracy: 100, Precision: 100, Recall: 100, F1- Score: 100, MCC: 100</td>
</tr>
<tr>
<td align="left">AII-AML</td>
<td align="left">Accuracy: 100, Precision: 100, Recall: 100, F1- Score: 100, MCC: 100</td>
</tr>
<tr>
<td align="left">Ovarian Cancer</td>
<td align="left">Accuracy: 100, Precision: 100, Recall: 100, F1- Score: 100, MCC: 100</td>
</tr>
<tr>
<td align="left">Lung Cancer</td>
<td align="left">Accuracy: 97.52, Precision: 94.45, Recall: 98.82, F1-score: 95.79, MCC: 92.0</td>
</tr>
<tr>
<td align="left">ALL-AML-3C</td>
<td align="left">Accuracy: 99.86, Precision: 99.82, Recall: 99.94, F1-Score: 99.77, MCC: 97</td>
</tr>
<tr>
<td align="left">AII-AML-4C</td>
<td align="left">Accuracy: 98.84, Precision: 99.11, Recall: 99.62, F1-Score: 98.63, MCC: 93</td>
</tr>
<tr>
<td align="left">MLL</td>
<td align="left">Accuracy: 99.90, Precision: 99.89, Recall: 99.95, F1-Score: 99.9, MCC: 98.0</td>
</tr>
<tr>
<td rowspan="6" align="center">
<xref ref-type="bibr" rid="B9">Balakrishnan et al. (2021)</xref>
</td>
<td rowspan="6" align="left">SSA, Levy Flight, SVM</td>
<td align="left">OSCC</td>
<td align="left">Accuracy: 85.7, F1-score: 85.7, Recall: 90, Precision: 90.0</td>
</tr>
<tr>
<td align="left">Ovarian Cancer</td>
<td align="left">Accuracy: 83.33, F1-score: 84.3, Recall: 89.5, Precision: 88.0</td>
</tr>
<tr>
<td align="left">Breast Cancer</td>
<td align="left">Accuracy: 50, F1-Score: 66.6, Recall: 100, Precision: 50</td>
</tr>
<tr>
<td align="left">CNS</td>
<td align="left">Accuracy: 66.6, F1-Score: 58.8, Recall: 45.4, Precision: 83.3</td>
</tr>
<tr>
<td align="left">Colon Cancer</td>
<td align="left">Accuracy: 86.9, F1-Score: 88.0, Recall: 100, Precision: 78.5</td>
</tr>
<tr>
<td align="left">Leukemia</td>
<td align="left">Accuracy: 85.7, F1-score: 87.5, Recall: 100, Precision: 100</td>
</tr>
<tr>
<td rowspan="9" align="center">
<xref ref-type="bibr" rid="B17">Hameed et al. (2021)</xref>
</td>
<td rowspan="9" align="left">BPSO, GA, CS, KNN, SVM, NB, RF</td>
<td align="left">Brain</td>
<td align="left">Accuracy: 97.62</td>
</tr>
<tr>
<td align="left">Breast</td>
<td align="left">Accuracy: 86.60</td>
</tr>
<tr>
<td align="left">CNS</td>
<td align="left">Accuracy: 80.00</td>
</tr>
<tr>
<td align="left">Colon</td>
<td align="left">Accuracy: 93.55</td>
</tr>
<tr>
<td align="left">Leukemia</td>
<td align="left">Accuracy: 100</td>
</tr>
<tr>
<td align="left">Lung</td>
<td align="left">Accuracy: 97.54</td>
</tr>
<tr>
<td align="left">Ovarian</td>
<td align="left">Accuracy: 100</td>
</tr>
<tr>
<td align="left">Prostate</td>
<td align="left">Accuracy: 96.08</td>
</tr>
<tr>
<td align="left">TCGA</td>
<td align="left">Accuracy: 100</td>
</tr>
<tr>
<td rowspan="5" align="center">
<xref ref-type="bibr" rid="B36">Rostami et al. (2022)</xref>
</td>
<td rowspan="5" align="left">CDNC</td>
<td align="left">Colon</td>
<td align="left">Accuracy: 88.73</td>
</tr>
<tr>
<td align="left">Leukemia</td>
<td align="left">Accuracy: 90.18</td>
</tr>
<tr>
<td align="left">SRBCT</td>
<td align="left">Accuracy: 82.82</td>
</tr>
<tr>
<td align="left">Prostate Tumor</td>
<td align="left">Accuracy: 82.91</td>
</tr>
<tr>
<td align="left">Lung Cancer</td>
<td align="left">Accuracy: 91.76</td>
</tr>
<tr>
<td rowspan="6" align="center">
<xref ref-type="bibr" rid="B8">Aziz (2022)</xref>
</td>
<td rowspan="6" align="left">SVM, NB, and ANN, along with CS, GA, and ABC</td>
<td align="left">Colon Cancer</td>
<td align="left">Accuracy: 93.01</td>
</tr>
<tr>
<td align="left">Acute Leukemia</td>
<td align="left">Accuracy: 93.35</td>
</tr>
<tr>
<td align="left">Prostate Tumor</td>
<td align="left">Accuracy: 89.14</td>
</tr>
<tr>
<td align="left">High-grade Glioma</td>
<td align="left">Accuracy: 90.32</td>
</tr>
<tr>
<td align="left">Lung Cancer II</td>
<td align="left">Accuracy: 87.71</td>
</tr>
<tr>
<td align="left">Leukemia-2</td>
<td align="left">Accuracy: 93.67</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B5">Alromema et al. (2023)</xref>
</td>
<td align="left">SVM, KNN, NN, NB, DT, XGBoost, LR</td>
<td align="left">Gene Expression Dataset</td>
<td align="left">Accuracy: 97.6, F1-Score: 97.4, AUC:0.961</td>
</tr>
<tr>
<td rowspan="14" align="center">
<xref ref-type="bibr" rid="B22">Ke et al. (2022)</xref>
</td>
<td rowspan="14" align="left">NB, C4.5, GA, and ACO</td>
<td align="left">CNS</td>
<td align="left">Accuracy: 85.00</td>
</tr>
<tr>
<td align="left">Colon</td>
<td align="left">Accuracy: 91.90</td>
</tr>
<tr>
<td align="left">Leukemia_3C</td>
<td align="left">Accuracy: 100</td>
</tr>
<tr>
<td align="left">Leukemia_4C</td>
<td align="left">Accuracy: 97.50</td>
</tr>
<tr>
<td align="left">Leukemia</td>
<td align="left">Accuracy: 100</td>
</tr>
<tr>
<td align="left">DLBCL</td>
<td align="left">Accuracy: 100</td>
</tr>
<tr>
<td align="left">Lymphoma66 &#xd7; 4,026_3c</td>
<td align="left">Accuracy: 100</td>
</tr>
<tr>
<td align="left">Lymphoma</td>
<td align="left">Accuracy: 93.21</td>
</tr>
<tr>
<td align="left">Prostate</td>
<td align="left">Accuracy: 95.00</td>
</tr>
<tr>
<td align="left">Lung Cancer</td>
<td align="left">Accuracy: 100</td>
</tr>
<tr>
<td align="left">Breast Cancer</td>
<td align="left">Accuracy: 97.18</td>
</tr>
<tr>
<td align="left">Sarcoma</td>
<td align="left">Accuracy: 75.36</td>
</tr>
<tr>
<td align="left">Mycloma</td>
<td align="left">Accuracy: 90.20</td>
</tr>
<tr>
<td align="left">Ovarian</td>
<td align="left">Accuracy: 98.80</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B37">Rustagi et al. (2024)</xref>
</td>
<td align="left">SSA, SVM, KNN</td>
<td align="left">WBCD</td>
<td align="left">Accuracy: 99.42</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B44">&#xdc;nalan et al. (2024)</xref>
</td>
<td align="left">AdaBoost, GBM, and RGF, LGBM</td>
<td align="left">WBCD</td>
<td align="left">Accuracy: 99.5</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B10">Batool and Byun (2024)</xref>
</td>
<td align="left">Extra Tree Classifier, Ridge, LGBM</td>
<td align="left">WBCD</td>
<td align="left">Accuracy: 97.6, F-1 Score: 98.1</td>
</tr>
<tr>
<td align="center">
<xref ref-type="bibr" rid="B27">Mahesh et al. (2022)</xref>
</td>
<td align="left">SVM, KNN, DT, RF, and LR</td>
<td align="left">Breast Cancer</td>
<td align="left">Accuracy:98.14, Precision: 96.18, Recall, 97.23, and F-1 Score: 96.43</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>
<xref ref-type="table" rid="T1">Table 1</xref> shows the analytical study of the considered literature in which it has been observed that the reported literature follows a two-stage feature selection process. In addition, the adopted approaches follow a hybrid approach for cancer classification. Applying the two-stage feature selection does not impact the undertaken dataset more, i.e., ALL-AML, Lymphoma, MLL, and SRBCT cancer dataset. Thus, the current research aims to apply a pipelined feature selection method consisting of three different feature selection approaches, starting from Boruta and followed by IMRMR and SSA. However, to make it more effective, <xref ref-type="table" rid="T2">Table 2</xref> has been taken from <xref ref-type="table" rid="T1">Table 1</xref>, which is dedicated to the literature that deals with the dataset considered for the current study, such as ALL-AML, Lymphoma, MLL, and SRBCT.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Literature survey summary for the ALL-AML, Lymphoma, MLL, and SRBCT datasets.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Ref</th>
<th align="center">Techniques employed</th>
<th align="center">Datasets employed</th>
<th align="center">Findings (%)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">
<xref ref-type="bibr" rid="B42">Sun et al. (2019)</xref>
</td>
<td align="left">KNN, C4.5, SVM</td>
<td align="left">SRBCT</td>
<td align="left">Accuracy: 93.6</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B29">Meenachi and Ramakrishnan (2020)</xref>
</td>
<td align="left">DT and ACO</td>
<td align="left">SRBCT</td>
<td align="left">Accuracy: 81.58</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B32">Nouri-Moghaddam et al. (2021)</xref>
</td>
<td align="left">FOA, ELM, MOO</td>
<td align="left">SRBCT</td>
<td align="left">Accuracy: 90.72</td>
</tr>
<tr>
<td rowspan="3" align="left">
<xref ref-type="bibr" rid="B46">Yan et al. (2021)</xref>
</td>
<td rowspan="3" align="left">KNN, FS_SSA, PSO, and GA</td>
<td align="left">Lymphoma</td>
<td align="left">Accuracy: 88.57</td>
</tr>
<tr>
<td align="left">MLL</td>
<td align="left">Accuracy: 86.19</td>
</tr>
<tr>
<td align="left">SRBCT</td>
<td align="left">Accuracy: 76.74</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B4">Alomari et al. (2021)</xref>
</td>
<td align="left">rMRMR-MGWO, LASSO, RF, EN, and DT</td>
<td align="left">ALL-AML-3C</td>
<td align="left">Accuracy: 99.86</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B36">Rostami et al. (2022)</xref>
</td>
<td align="left">CDNC</td>
<td align="left">SRBCT</td>
<td align="left">Accuracy: 82.82</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B22">Ke et al. (2022)</xref>
</td>
<td align="left">NB, C4.5, GA, and ACO</td>
<td align="left">Lymphoma</td>
<td align="left">Accuracy: 93.21</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s1-3">
<title>1.3 Paper structure</title>
<p>The remaining parts of the paper are organized as follows. <xref ref-type="sec" rid="s2">Section 2</xref> depicts the approach used in developing the suggested model and describes the utilized dataset. The core ideas behind the proposed paradigm are discussed in <xref ref-type="sec" rid="s3">Section 3</xref>. The empirical evaluation of the suggested model is presented in <xref ref-type="sec" rid="s4">Section 4</xref>. In <xref ref-type="sec" rid="s5">Section 5</xref>, we do a critical analysis of the proposed model. The conclusion is presented in <xref ref-type="sec" rid="s6">Section 6</xref>.</p>
</sec>
</sec>
<sec id="s2">
<title>2 Methodology of BIMSSA</title>
<p>This section details the methods used to create the reported model and the dataset it was built from. The BIMSSA model utilizes a pipelined feature selection technique to efficiently address the dimensionality problem in the microarray data. Initially, Boruta and IMRMR are used with the objective of extracting pertinent gene expression features. In the next step, the SSA method is used to optimize the size of the feature set acquired by Boruta and IMRMR. The Support Vector Machine (SVM), Extreme Learning Machine (ELM), Random Forest (RF), AdaBoost, and XGBoost are the foundation learners that are used in the construction of an ensemble model.</p>
<sec id="s2-1">
<title>2.1 Dataset description</title>
<p>Four publicly available cancer gene expression data, including ALL-AML (D1), Lymphoma (D2), MLL (D3), and SRBCT (D4), are considered for developing the current work (<xref ref-type="bibr" rid="B49">Zhu et al., 2007</xref>). Among the above datasets, D1, D2, and D3 have three classes, and D4 has four classes. ALL-AML dataset contains three classes labeled B-Cell, T-Cell, and AML. The Lymphoma cancer dataset includes DLBCL, FL, and CLL classes. Considering the MLL cancer dataset, it contains three classes, including ALL, AML, and MLL. Similarly, the class names for the SRBCT cancer dataset are BL, EWS, NB, and RMS. <xref ref-type="table" rid="T3">Table 3</xref> shows the proposed model&#x2019;s dataset description and the class-wise distribution in each dataset.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Dataset description.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Dataset</th>
<th align="center">Number of samples</th>
<th align="center">Number of features</th>
<th align="center">Number of classes</th>
<th align="center">Class distribution</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">ALL-AML</td>
<td align="left">72</td>
<td align="left">7,129</td>
<td align="left">3</td>
<td align="left">B-Cell- 38; T-cell- 9; AML- 25</td>
</tr>
<tr>
<td align="left">Lymphoma</td>
<td align="left">62</td>
<td align="left">4,026</td>
<td align="left">3</td>
<td align="left">DLBCL- 46; FL- 9; CLL-11</td>
</tr>
<tr>
<td align="left">MLL</td>
<td align="left">72</td>
<td align="left">12,582</td>
<td align="left">3</td>
<td align="left">ALL- 24; AML- 28; MLL- 20</td>
</tr>
<tr>
<td align="left">SRBCT</td>
<td align="left">83</td>
<td align="left">2,308</td>
<td align="left">4</td>
<td align="left">BL- 29; EWS- 11; NB- 18; RMS- 25</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2-2">
<title>2.2 Boruta feature selection</title>
<p>Boruta is a wrapper-based feature selection that aims to improve the efficiency and interpretability of machine learning models. Boruta finds and chooses important features from a dataset by combining a random forest classifier with a significance testing method. Shadow features, essentially randomized replicas of the original features, are first generated by the algorithm. After that, the original and shadow feature sets are pooled and used to train a random forest model. Based on how each feature affects the model&#x2019;s performance, Boruta ranks them and compares them to their corresponding shadow characteristics to determine their relative relevance. Initially, dataset (D) contains the total number of features. <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>N</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>, where N is the total samples present in that dataset, then the shadow of the actual features is calculated using <xref ref-type="disp-formula" rid="e1">Equation 1</xref> to form a shadow feature set (Fshadow):<disp-formula id="e1">
<mml:math id="m2">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>S</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>f</mml:mi>
<mml:mi>f</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi mathvariant="normal">&#x3a1;</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi mathvariant="normal">&#x3a1;</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi mathvariant="normal">&#x3a1;</mml:mi>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi mathvariant="normal">&#x3a1;</mml:mi>
<mml:mi mathvariant="normal">N</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
<p>Where Shuffle is a function that shuffles the values of the feature <inline-formula id="inf2">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> with a random permutation P with indices {1,2,&#x2025;,N}. After getting the shadow features, the D is extended to include the shadow features. Hence, a new feature set (<inline-formula id="inf3">
<mml:math id="m4">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) is derived by integrating F with Fshadow, as represented in <xref ref-type="disp-formula" rid="e2">Equation 2</xref>.<disp-formula id="e2">
<mml:math id="m5">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2190;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="&#x7c;">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mtext>shadow</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2190;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>N</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
</p>
<p>To the integrated feature set, <inline-formula id="inf4">
<mml:math id="m6">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> the random forest importance technique is applied to calculate each feature&#x2019;s importance score(I) using <xref ref-type="disp-formula" rid="e3">Equation 3</xref>.<disp-formula id="e3">
<mml:math id="m7">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x394;</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mi>t</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">&#x39a;</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
</p>
<p>After obtaining the I for each feature, the threshold value (Ith) is calculated as the maximum score among the shadow features. This can be represented by <xref ref-type="disp-formula" rid="e4">Equation 4</xref>. <xref ref-type="disp-formula" rid="e5">Equation 5</xref> shows the criteria based on which the Boruta feature selection algorithms identify the important features present in the dataset, thus discarding the others to modify the feature set (F)<disp-formula id="e4">
<mml:math id="m8">
<mml:mrow>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="italic">Max</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mo>&#x2200;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
<disp-formula id="e5">
<mml:math id="m9">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="" separators="&#x7c;">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>F</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3e;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>K</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>p</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>e</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>f</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>F</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>d</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>e</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>f</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
</p>
<p>Once Boruta determines if a feature is relevant, the algorithm iteratively confirms or rejects it until all features have been categorized. A refined collection of characteristics expected to improve model generalization and prediction accuracy is the final output, a subset of the relevant features. Boruta provides a strong method for feature selection in the ML process, making it ideal for dealing with high-dimensional datasets (<xref ref-type="bibr" rid="B28">Maurya et al., 2023</xref>).</p>
</sec>
<sec id="s2-3">
<title>2.3 Improved maximum relevance and minimum redundancy (IMRMR)</title>
<p>Maximum relevance and minimum redundancy (MRMR) is a filter-based feature selection technique used to select a subset of features from a more extensive set of features. The objective is to select the most useful features while minimizing redundancy. This method is often used in machine learning and data analysis for better model performance, reduced overfitting, and more interpretable models. The IMRMR is the modified version of the MRMR feature section algorithm. Like MRMR, it also selects relevant features with low redundancy scores. MRMR adopts the Mutual Information (MI) to select those features that depend on the target feature. In addition to MI, IMRMR aids Pearson correlation to focus on the linear relation between the features. The following shows the workings of MRMR and IMRMR to determine the difference between these two for selecting features (<xref ref-type="bibr" rid="B14">Ding and Peng, 2005</xref>; <xref ref-type="bibr" rid="B47">Yan and Jia, 2019</xref>; <xref ref-type="bibr" rid="B48">Zhao et al., 2019</xref>).</p>
<sec id="s2-3-1">
<title>2.3.1 Relevance calculation</title>
<p>
<list list-type="simple">
<list-item>
<p>&#x2022; Calculating each feature&#x2019;s relevance score requires assessing its correlation with the target variable.</p>
</list-item>
<list-item>
<p>&#x2022; It is generally accepted that characteristics with high relevance scores should be weighed more for classification.</p>
</list-item>
<list-item>
<p>&#x2022; The Relevance score (<inline-formula id="inf5">
<mml:math id="m10">
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) of a feature is calculated based on Mutual Information (MI) as <xref ref-type="disp-formula" rid="e6">Equations 6</xref> and <xref ref-type="disp-formula" rid="e7">7</xref>.</p>
</list-item>
</list>
<disp-formula id="e6">
<mml:math id="m11">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>F</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2a;</mml:mo>
<mml:mo>&#x2211;</mml:mo>
<mml:msub>
<mml:mi>log</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2a;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
<disp-formula id="e7">
<mml:math id="m12">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">max</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>M</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="&#x7c;">
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>F</mml:mi>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>
</p>
<p>F is the total feature set, &#x7c;F&#x7c; is the total number of features, <inline-formula id="inf6">
<mml:math id="m13">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is known as the selected feature, <inline-formula id="inf7">
<mml:math id="m14">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the target variable of the feature set. <inline-formula id="inf8">
<mml:math id="m15">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the joint probability of the feature <inline-formula id="inf9">
<mml:math id="m16">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. <inline-formula id="inf10">
<mml:math id="m17">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2a;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> are the marginal probabilities of the features <inline-formula id="inf11">
<mml:math id="m18">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> respectively (<xref ref-type="bibr" rid="B22">Ke et al., 2022</xref>).</p>
</sec>
<sec id="s2-3-2">
<title>2.3.2 Redundancy calculation</title>
<p>
<list list-type="simple">
<list-item>
<p>&#x2022; Perform a pairwise redundancy analysis between the characteristics. The word &#x201c;redundancy&#x201d; refers to the degree to which two characteristics are comparable regarding the information they contain.</p>
</list-item>
<list-item>
<p>&#x2022; Metrics such as mutual information, correlation, and distance-based metrics are common redundancy measurements.</p>
</list-item>
<list-item>
<p>&#x2022; It is best to avoid using features in the final decision that are extremely repetitive with one another since they give comparable information. The redundancy score (<inline-formula id="inf12">
<mml:math id="m19">
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) can be calculated using <xref ref-type="disp-formula" rid="e8">Equations 8</xref> and <xref ref-type="disp-formula" rid="e9">9</xref>.</p>
</list-item>
</list>
<disp-formula id="e8">
<mml:math id="m20">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>F</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2a;</mml:mo>
<mml:mo>&#x2211;</mml:mo>
<mml:msub>
<mml:mi>log</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2a;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>
<disp-formula id="e9">
<mml:math id="m21">
<mml:mrow>
<mml:mi mathvariant="italic">min</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="&#x7c;">
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>F</mml:mi>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>
</p>
<p>The MRMR score of the considered feature <inline-formula id="inf13">
<mml:math id="m22">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is calculated as <xref ref-type="disp-formula" rid="e10">Equation 10</xref>. The feature with the highest MRMR score is considered for classification (<xref ref-type="bibr" rid="B37">Rustagi et al., 2024</xref>).<disp-formula id="e10">
<mml:math id="m23">
<mml:mrow>
<mml:mtext mathvariant="italic">MRMR</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="italic">Max</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="italic">M</mml:mi>
<mml:mtext mathvariant="italic">re</mml:mtext>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi mathvariant="italic">M</mml:mi>
<mml:mtext mathvariant="italic">rd</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>
</p>
<p>By determining the significance and duplication of each feature, the mRMR technique quantifies the contribution of features. It does not take into account the combined effect of several characteristics. Mutual information measures are the only basis for relevance and redundancy. To choose the best feature subset, IMRMR employs two metrics&#x2014;the Pearson correlation coefficient and mutual information&#x2014;to assess the subsets&#x2019; relevance and redundancy with weight factors ranging [0.1,1] with step size 0.1.</p>
<p>Calculate the relevance (<inline-formula id="inf14">
<mml:math id="m24">
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) and redundancy (<inline-formula id="inf15">
<mml:math id="m25">
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) of each feature (<inline-formula id="inf16">
<mml:math id="m26">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) in the feature space F using <xref ref-type="disp-formula" rid="e11">Equations 11</xref>, <xref ref-type="disp-formula" rid="e13">13</xref>, introducing the Pearson Correlation (<xref ref-type="disp-formula" rid="e12">Equation 12</xref>) and weight factor <italic>&#x3b1;</italic> ranging from [0.1, 1] with step size 0.1.<disp-formula id="e11">
<mml:math id="m27">
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x221d;</mml:mo>
<mml:mo>&#x2a;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x221d;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>
<disp-formula id="e12">
<mml:math id="m28">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:msqrt>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>
<disp-formula id="e13">
<mml:math id="m29">
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mo>&#x221d;</mml:mo>
<mml:mo>&#x2a;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x221d;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(13)</label>
</disp-formula>
</p>
<p>The modified IMRMR(i) can be calculated using <xref ref-type="disp-formula" rid="e14">Equation 14</xref>.<disp-formula id="e14">
<mml:math id="m30">
<mml:mrow>
<mml:mtext mathvariant="italic">IMRMR</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="italic">Max</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="italic">M</mml:mi>
<mml:mtext mathvariant="italic">re</mml:mtext>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="italic">M</mml:mi>
<mml:mtext mathvariant="italic">rd</mml:mtext>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(14)</label>
</disp-formula>
</p>
</sec>
</sec>
<sec id="s2-4">
<title>2.4 Salp swarm optimization algorithm (SSA)</title>
<p>After applying the IMRMR feature selection, the relevant features remain in the dataset. However, applying the IMRMR does not ensure that the number of selected features has a role in the diagnosis process. Some features will be relevant, but removing them does not impact the diagnosis model. The Salp Swarm Optimization Algorithm (SSA), a wrapper-based feature selection algorithm, is implemented to ensure that the optimal set of relevant features is in the selected feature set, through which the processing time of the developed model can be decreased.</p>
<p>The SSA has been implemented to solve optimization challenges. It mimics the group dynamics of salps, sea animals similar to jellyfish. When applied to an optimization issue, SSA seeks to identify the best possible outcome (<xref ref-type="bibr" rid="B30">Mirjalili et al., 2017</xref>; <xref ref-type="bibr" rid="B12">Castelli et al., 2022</xref>). There are more than 1.2 million known marine creature species. Most of these species have similar habits and traits, including ways of communication, speed of movement, and foraging strategies. The salp&#x2019;s habitats are notoriously tough to reach, yet scientists think this behavior aids the animals in movement and feeding. Inspired by the coordinated movement of salps (gelatinous sea organisms), the SSA uses natural selection to find optimal solutions. Collective behavior in these species serves as a paradigm for the optimization difficulties SSA seeks to solve (<xref ref-type="bibr" rid="B21">Ibrahim et al., 2018</xref>; <xref ref-type="bibr" rid="B43">Thawkar, 2021</xref>; <xref ref-type="bibr" rid="B38">Sayed et al., 2018</xref>).</p>
<sec id="s2-4-1">
<title>2.4.1 Mathematical model</title>
<p>A member of the family Salpidae, salps are found in the ocean. Its cylindrical form and end apertures evoke images of jellyfish, which pump water through their gelatinous bodies using internal feeding filters to propel themselves and eat. Some of the aquatic creatures exhibit similar behaviors, such as swarming. In the case of fish, this group is known as a school, whereas in the case of salps, it is referred to as a salp chain.</p>
<p>The SSA starts with the swarm X of n numbers of salps. <xref ref-type="disp-formula" rid="e15">Equation 15</xref> shows the two-dimensional matrix of the scalp position.<disp-formula id="e15">
<mml:math id="m31">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mn>11</mml:mn>
</mml:msub>
</mml:mtd>
<mml:mtd>
<mml:mo>&#x2026;</mml:mo>
</mml:mtd>
<mml:mtd>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mo>&#x22ee;</mml:mo>
</mml:mtd>
<mml:mtd>
<mml:mo>&#x22f1;</mml:mo>
</mml:mtd>
<mml:mtd>
<mml:mo>&#x22ee;</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mtd>
<mml:mtd>
<mml:mo>&#x2026;</mml:mo>
</mml:mtd>
<mml:mtd>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(15)</label>
</disp-formula>
</p>
<p>The working of the SSA is started by determining the fitness function (F ()) population size, maximum iteration, initial step size (<inline-formula id="inf17">
<mml:math id="m32">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">&#x393;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>)and damping factor (<inline-formula id="inf18">
<mml:math id="m33">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">&#x393;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>). For each salp <inline-formula id="inf19">
<mml:math id="m34">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> the social influence is calculated as <xref ref-type="disp-formula" rid="e16">Equation 16</xref> (<xref ref-type="bibr" rid="B43">Thawkar, 2021</xref>).<disp-formula id="e16">
<mml:math id="m35">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mfrac>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(16)</label>
</disp-formula>
<inline-formula id="inf20">
<mml:math id="m36">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the fitness function of the salp <inline-formula id="inf21">
<mml:math id="m37">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. <inline-formula id="inf22">
<mml:math id="m38">
<mml:mrow>
<mml:mfenced open="|" close="|" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> is the Euclidian distance between two salps <inline-formula id="inf23">
<mml:math id="m39">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. With the personal best solution (Xb), the personal component P is calculated as <xref ref-type="disp-formula" rid="e17">Equation 17</xref>. The exploration component (E) can be calculated as <xref ref-type="disp-formula" rid="e18">Equation 18</xref>.<disp-formula id="e17">
<mml:math id="m40">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>b</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(17)</label>
</disp-formula>
<disp-formula id="e18">
<mml:math id="m41">
<mml:mrow>
<mml:mfenced open="" close="|" separators="&#x7c;">
<mml:mrow>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3ba;</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="" close="|" separators="&#x7c;">
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>b</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(18)</label>
</disp-formula>
</p>
<p>Where <inline-formula id="inf24">
<mml:math id="m42">
<mml:mrow>
<mml:mi>&#x3ba;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is a random vector between [0,1], and R () is the method to generate the random number between 0 and 1. <inline-formula id="inf25">
<mml:math id="m43">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>b</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the personal best of the selected salp. The position of the salp can be updated by using <xref ref-type="disp-formula" rid="e19">Equation 19</xref>.<disp-formula id="e19">
<mml:math id="m44">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">&#x393;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x2a;</mml:mo>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">&#x393;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#x2a;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(19)</label>
</disp-formula>
</p>
<p>The fitness of the salp is updated by comparing the fitness of the new position with the previous position. This can be determined by using <xref ref-type="disp-formula" rid="e20">Equation 20</xref>. Personal best is based on the fitness function obtained. The position of salp can be obtained by using <xref ref-type="disp-formula" rid="e21">Equation 21</xref>.<disp-formula id="e20">
<mml:math id="m45">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="" separators="&#x7c;">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
<mml:mi>f</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>e</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(20)</label>
</disp-formula>
<disp-formula id="e21">
<mml:math id="m46">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>b</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="" separators="&#x7c;">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
<mml:mi>f</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>e</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(21)</label>
</disp-formula>
</p>
<p>The exploitation components of salp aim to improve the current obtained solution to an optimized one. This process ensures that the obtained solution is the best. Exploitation is the process of leader-follower dynamics where the entire salp group is divided into two groups. The first salp is the leader salp, and the others are called the follower salp. The position of the leader salp and follower salp is updated using <xref ref-type="disp-formula" rid="e22">Equations 22</xref> and <xref ref-type="disp-formula" rid="e23">23</xref>, respectively. Let x, y be the leader and follower salp belonging to the salp community X.<disp-formula id="e22">
<mml:math id="m47">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>c</mml:mi>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>b</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>l</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>c</mml:mi>
<mml:mn>2</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>l</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(22)</label>
</disp-formula>
</p>
<p>Where lb and ub are the lower and upper bounds of the exploring dimension, x(t) is the current position of the leader salp at current time t, and c1 and c2 are the random numbers between the interval [0,1].<disp-formula id="e23">
<mml:math id="m48">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(23)</label>
</disp-formula>
<inline-formula id="inf26">
<mml:math id="m49">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the current position of a follower salp <inline-formula id="inf27">
<mml:math id="m50">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf28">
<mml:math id="m51">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the current position of the proceeding follower salp.</p>
</sec>
<sec id="s2-4-2">
<title>2.4.2 Working of SSA</title>
<p>The selected features from MRMR are taken as input for the SSA algorithm. The workings of the SSA are described below. <xref ref-type="fig" rid="F1">Figure 1</xref> shows how the SSA algorithm works.<list list-type="simple">
<list-item>
<p>Step 1: The parameter initialization is the primary of the algorithm. Various parameters have been initiated, including maximum iteration (Max_It), number of salps (num_salps), etc. The parameter initialization is shown in <xref ref-type="table" rid="T4">Table 4</xref>.</p>
</list-item>
<list-item>
<p>Step 2: To get the best solution, the proper fitness function needs to be calculated. For the current work, average accuracy (<xref ref-type="disp-formula" rid="e24">Equation 24</xref>) determines the fitness function of SSA. For determining the across-validation, the fitness function <inline-formula id="inf29">
<mml:math id="m52">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> can be calculated by using <xref ref-type="disp-formula" rid="e25">Equation 25</xref>.</p>
</list-item>
</list>
<disp-formula id="e24">
<mml:math id="m53">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2a;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mi>i</mml:mi>
</mml:munder>
</mml:mstyle>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(24)</label>
</disp-formula>
<disp-formula id="e25">
<mml:math id="m54">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(25)</label>
</disp-formula>
<list list-type="simple">
<list-item>
<p>Step 3: The best solution is identified based on the objective function.</p>
</list-item>
<list-item>
<p>Step 4: Update the salp position as per <xref ref-type="disp-formula" rid="e15">Equation 15</xref>.</p>
</list-item>
<list-item>
<p>Step 5: Local and global exploration is performed to find the local and global solution. This algorithm aims to strike a balance between global exploration, which searches the whole solution space, and local exploration, which exploits the neighborhood of specific solutions.</p>
</list-item>
<list-item>
<p>Step 6: Update the best solution.</p>
</list-item>
<list-item>
<p>Step 7: Repeat Step-3 to Step-6 for each feature until the number of iterations is less than Max_It.</p>
</list-item>
</list>
</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Working of SSA.</p>
</caption>
<graphic xlink:href="fgene-15-1491602-g001.tif"/>
</fig>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Parameter initialization of SSA.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">SSA parameter</th>
<th align="center">Value</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Max_It</td>
<td align="center">100</td>
</tr>
<tr>
<td align="center">num_salps</td>
<td align="center">30</td>
</tr>
<tr>
<td align="center">Lb</td>
<td align="center">&#x2212;10</td>
</tr>
<tr>
<td align="center">Ub</td>
<td align="center">10</td>
</tr>
<tr>
<td align="center">
<inline-formula id="inf30">
<mml:math id="m55">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">&#x393;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">1</td>
</tr>
<tr>
<td align="center">
<inline-formula id="inf31">
<mml:math id="m56">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">&#x393;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">0.8</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s2-5">
<title>2.5 Voting ensemble classifier</title>
<p>The current work employs ML classifiers such as SVM (<xref ref-type="bibr" rid="B1">Alfian et al., 2022</xref>), RF (<xref ref-type="bibr" rid="B1">Alfian et al., 2022</xref>), ELM (<xref ref-type="bibr" rid="B15">Ding et al., 2013</xref>), AdaBoost (<xref ref-type="bibr" rid="B7">Asselman et al., 2021</xref>), and XGBoost (<xref ref-type="bibr" rid="B7">Asselman et al., 2021</xref>) to make the initial prediction. Then, the majority voting classifier is used as an ensemble learning approach to make the final prediction. It is a simple and powerful strategy for integrating the results of numerous models into a single prediction. When many models provide varying results and a group choice must be made based on those results, majority voting is an effective tool (<xref ref-type="bibr" rid="B31">Naji et al., 2021</xref>; <xref ref-type="bibr" rid="B34">Pati et al., 2023</xref>).</p>
</sec>
</sec>
<sec id="s3">
<title>3 Workflow of BIMSSA</title>
<p>Initially, the dataset is considered for normalization to remove the noisy data. The MRMR is then applied to the normalized data to choose the best feature. As a last step, the SSA is used as an optimizer. <xref ref-type="statement" rid="algorithm_1">Algorithm 1</xref> shows the working description of BIMSSA. The functionality of the suggested model is shown in <xref ref-type="fig" rid="F2">Figure 2</xref>. Initially, the dataset is considered for normalization to remove the noisy data. The MRMR is then applied to the normalized data to choose the best feature. As a last step, the SSA is used as an optimizer. <xref ref-type="fig" rid="F2">Figure 2</xref> and <xref ref-type="statement" rid="algorithm_1">Algorithm 1</xref> show the suggested model&#x2019;s functionality. The suggested method&#x2019;s operation is described below.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Workflow of the Proposed BIMSSA for feature selection and classification.</p>
</caption>
<graphic xlink:href="fgene-15-1491602-g002.tif"/>
</fig>
<sec id="s3-1">
<title>3.1 Step-1: dataset considered for preprocessing</title>
<p>The dataset is first subjected to a preprocessing stage to ensure data quality and consistency. This involves several key steps.<list list-type="simple">
<list-item>
<p>&#x2022; Data Cleaning: Remove or correct any noisy data, missing values, or inconsistencies within the dataset.</p>
</list-item>
<list-item>
<p>&#x2022; Normalization: Apply normalization techniques to scale the features to a standard range, typically [0, 1] or [-1, 1]. This helps in improving the performance and convergence of machine learning algorithms.</p>
</list-item>
<list-item>
<p>&#x2022; Encoding: If applicable, convert categorical variables into numerical values using label encoding.</p>
</list-item>
<list-item>
<p>&#x2022; Balancing the Dataset: If the dataset is imbalanced, techniques like oversampling, undersampling, or synthetic data generation (e.g., SMOTE) are applied to ensure that the model does not become biased towards the majority class.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s3-2">
<title>3.2 Step 2: dataset splitting</title>
<p>The preprocessed dataset is then split into training and testing sets. Two different splitting ratios are considered: 80&#x2013;20. This helps in evaluating the robustness of the model.</p>
</sec>
<sec id="s3-3">
<title>3.3 Step 3: Feature selection</title>
<p>To enhance model performance, feature selection algorithms are applied.<list list-type="simple">
<list-item>
<p>&#x2022; Boruta Algorithm: An all-relevant feature selection method that identifies relevant features by comparing original attributes with shadow attributes.</p>
</list-item>
<list-item>
<p>&#x2022; Improved Minimum Redundancy Maximum Relevance (MRMR): Select the best features with maximum relevance with the target variable and minimal redundancy among themselves.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s3-4">
<title>3.4 Step 4: optimization using salp swarm algorithm (SSA)</title>
<p>The SSA is used to optimize the feature set selected in the previous step.<list list-type="simple">
<list-item>
<p>i. Initiate Population</p>
</list-item>
<list-item>
<p>ii. Parameter Initialization</p>
</list-item>
<list-item>
<p>iii. Fitness Function Calculation</p>
</list-item>
<list-item>
<p>iv. Optimization Process Starts: Iteratively update the positions of salps to find the optimal feature set.</p>
</list-item>
<list-item>
<p>v. Check the Maximum Iteration</p>
</list-item>
<list-item>
<p>vi. If Not Exceeding, Return to iii</p>
</list-item>
<list-item>
<p>vii. If Exceeds, Obtain the Best Feature Set</p>
</list-item>
</list>
</p>
</sec>
<sec id="s3-5">
<title>3.5 Step 5: model training</title>
<p>Based on the training data and the optimized feature set, train the following five models.<list list-type="simple">
<list-item>
<p>&#x2022; Support Vector Machine (SVM)</p>
</list-item>
<list-item>
<p>&#x2022; Random Forest (RF)</p>
</list-item>
<list-item>
<p>&#x2022; Extreme Learning Machine (ELM)</p>
</list-item>
<list-item>
<p>&#x2022; AdaBoost</p>
</list-item>
<list-item>
<p>&#x2022; XGBoost</p>
</list-item>
</list>
</p>
</sec>
<sec id="s3-6">
<title>3.6 Step 6: Classifier Selection</title>
<p>Select the top three classifiers from the trained models based on their highest accuracy during training.</p>
</sec>
<sec id="s3-7">
<title>3.7 Step 7: Model Evaluation</title>
<p>Test the ensemble classifier using the testing dataset to obtain evaluative parameters such as accuracy, precision, recall, F1-score, and ROC-AUC.</p>
<p>
<statement content-type="algorithm" id="algorithm_1">
<label>Algorithm 1</label>
<p>Working of Proposed BIMSSA model.<list list-type="simple">
<list-item>
<p>Input: <bold>Dataset D&#x2190; {D</bold>
<sub>
<bold>1</bold>
</sub>
<bold>, D</bold>
<sub>
<bold>2</bold>
</sub>
<bold>, D</bold>
<sub>
<bold>3</bold>
</sub>
<bold>, D</bold>
<sub>
<bold>4</bold>
</sub>
<bold>}, Feature set F&#x2190; {f</bold>
<sub>
<bold>1</bold>
</sub>
<bold>, f</bold>
<sub>
<bold>2</bold>
</sub>
<bold>, &#x2026;&#x2026;f</bold>
<sub>
<bold>n</bold>
</sub>
<bold>}, Max Iteration, num_salps, Lb, Ub, &#x393;</bold>
<sub>
<bold>1</bold>
</sub>
<bold>, &#x393;</bold>
<sub>
<bold>2</bold>
</sub>
<bold>, BL: {SVM (BL</bold>
<sub>
<bold>1</bold>
</sub>
<bold>), RF (BL</bold>
<sub>
<bold>2</bold>
</sub>
<bold>), ELM (BL</bold>
<sub>
<bold>3</bold>
</sub>
<bold>), AdaBoost (BL</bold>
<sub>
<bold>4</bold>
</sub>
<bold>), XGBoost (BL</bold>
<sub>
<bold>5</bold>
</sub>
<bold>),</bold> <inline-formula id="inf32">
<mml:math id="m57">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold">D</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> <bold>is the feature set selected by Boruta,</bold> <inline-formula id="inf33">
<mml:math id="m58">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold">D</mml:mi>
<mml:mo>&#x2033;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> <bold>is the feature selected by IMRMR,</bold> <inline-formula id="inf34">
<mml:math id="m59">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold">D</mml:mi>
<mml:mo>&#x2034;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> <bold>is the feature selected by SSA}</bold>
</p>
</list-item>
<list-item>
<p>Output: <bold>Performance Measures</bold>
</p>
</list-item>
<list-item>
<p>For k &#x3d; 1 &#x2192; 4</p>
</list-item>
<list-item>
<p>&#x2003;Normalize Dk</p>
</list-item>
<list-item>
<p>End for</p>
</list-item>
<list-item>
<p>Split the dataset with an aspect ratio of 85:15 for Train: Test</p>
</list-item>
<list-item>
<p>Invoke Boruta () to D and return the resulting feature subset is <inline-formula id="inf35">
<mml:math id="m60">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="normal">D</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>Invoke IMRMR () to <inline-formula id="inf36">
<mml:math id="m61">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="normal">D</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and return the resulting subset <inline-formula id="inf37">
<mml:math id="m62">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="normal">D</mml:mi>
<mml:mo>&#x2033;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>for k &#x3d; 1 &#x2192; 4</p>
</list-item>
<list-item>
<p>&#x2003;for i &#x3d; 1 &#x2192; n</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Find <inline-formula id="inf38">
<mml:math id="m63">
<mml:mrow>
<mml:mtext>MI</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">f</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">f</mml:mi>
<mml:mi mathvariant="normal">c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> using <xref ref-type="disp-formula" rid="e1">Equation 1</xref>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;<inline-formula id="inf39">
<mml:math id="m64">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">M</mml:mi>
<mml:mtext>re</mml:mtext>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x221d;</mml:mo>
<mml:mo>&#x2a;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mtext>MI</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">f</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">f</mml:mi>
<mml:mi mathvariant="normal">c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x221d;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mtext>Pear</mml:mtext>
<mml:mi mathvariant="normal">c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">f</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">f</mml:mi>
<mml:mi mathvariant="normal">c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;for j &#x3d; i&#x2b;1 &#x2192; n</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;Find <inline-formula id="inf40">
<mml:math id="m65">
<mml:mrow>
<mml:mtext>MI</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">f</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">f</mml:mi>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> using <xref ref-type="disp-formula" rid="e3">Equation 3</xref>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;<inline-formula id="inf41">
<mml:math id="m66">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">M</mml:mi>
<mml:mtext>rd</mml:mtext>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">f</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">f</mml:mi>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">f</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mo>&#x221d;</mml:mo>
<mml:mo>&#x2a;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mtext>MI</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">f</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">f</mml:mi>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x221d;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mtext>Pear</mml:mtext>
<mml:mi mathvariant="normal">c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">f</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">f</mml:mi>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;End for</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;Find <inline-formula id="inf42">
<mml:math id="m67">
<mml:mrow>
<mml:mtext>IMRMR</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> using <xref ref-type="disp-formula" rid="e9">Equation 9</xref>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;<inline-formula id="inf43">
<mml:math id="m68">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="normal">D</mml:mi>
<mml:mi mathvariant="normal">k</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> &#x2190; <inline-formula id="inf44">
<mml:math id="m69">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">f</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;End for</p>
</list-item>
<list-item>
<p>End For</p>
</list-item>
<list-item>
<p>Invoke SSA () to <inline-formula id="inf45">
<mml:math id="m70">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="normal">D</mml:mi>
<mml:mo>&#x2033;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and return resulting feature subset <inline-formula id="inf46">
<mml:math id="m71">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="normal">D</mml:mi>
<mml:mo>&#x2034;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>for k &#x3d; 1 &#x2192; 4</p>
</list-item>
<list-item>
<p>&#x2003;for i &#x3d; 1 &#x2192; n</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;Find Fitness function F (<inline-formula id="inf47">
<mml:math id="m72">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">X</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>)</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;Calculate <inline-formula id="inf48">
<mml:math id="m73">
<mml:mrow>
<mml:mi mathvariant="normal">S</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">X</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> using <xref ref-type="disp-formula" rid="e7">Equation 7</xref>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;Calculate <inline-formula id="inf49">
<mml:math id="m74">
<mml:mrow>
<mml:mi mathvariant="normal">P</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">X</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> using <xref ref-type="disp-formula" rid="e8">Equation 8</xref>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;Calculate <inline-formula id="inf50">
<mml:math id="m75">
<mml:mrow>
<mml:mi mathvariant="normal">E</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">X</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> using <xref ref-type="disp-formula" rid="e9">Equation 9</xref>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;Find <inline-formula id="inf51">
<mml:math id="m76">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">X</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mtext>new</mml:mtext>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">X</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">&#x393;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x2a;</mml:mo>
<mml:mi mathvariant="normal">S</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">X</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">&#x393;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#x2a;</mml:mo>
<mml:mi mathvariant="normal">P</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">X</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="normal">E</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">X</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;Find <inline-formula id="inf52">
<mml:math id="m77">
<mml:mrow>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">X</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mtext>new</mml:mtext>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;Update position salp <inline-formula id="inf53">
<mml:math id="m78">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">X</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;End for</p>
</list-item>
<list-item>
<p>&#x2003;Update <inline-formula id="inf54">
<mml:math id="m79">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="normal">D</mml:mi>
<mml:mi mathvariant="normal">k</mml:mi>
<mml:mo>&#x2034;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> &#x2190; <inline-formula id="inf55">
<mml:math id="m80">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">f</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>End for</p>
</list-item>
<list-item>
<p>Apply Majority Voting ensemble technique with the three best BL</p>
</list-item>
<list-item>
<p>Measure the performance of the proposed model</p>
</list-item>
<list-item>
<p>for k &#x3d; 1 &#x2192; 4</p>
</list-item>
<list-item>
<p>&#x2003;for i &#x3d; 1 &#x2192; 5</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;Apply BLi to <inline-formula id="inf56">
<mml:math id="m81">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="normal">D</mml:mi>
<mml:mi mathvariant="normal">k</mml:mi>
<mml:mo>&#x2034;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;Calculate the performance measures</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;Find best three BLi</p>
</list-item>
<list-item>
<p>&#x2003;End for</p>
</list-item>
<list-item>
<p>End for</p>
</list-item>
</list>
</p>
</statement>
</p>
</sec>
</sec>
<sec id="s4">
<title>4 Empirical analysis</title>
<p>The suggested model uses Python 3.11 on Ubuntu 20.04 with 32&#xa0;GB of RAM, an Intel Core i7 CPU, and a 1&#xa0;TB SSD. The Boruta and IMRMR feature selection techniques extract the important and relevant features from the dataset. After identifying the relevant features, the SSA optimization technique selects a set of relevant features. The implemented Boruta feature selection algorithm selects 2067, 1,043, 3,767, and 643 features for ALL-AML, Lymphoma, MLL, and SRBCT datasets, respectively. While applying the Boruta &#x2b; IMRMR hybrid feature selection model, it selects 413, 197, 656, and 118 numbers of features for ALL-AML, Lymphoma, MLL, and SRBCT datasets, respectively. Finally, SSA is applied to the feature subset selected by IMRMR, resulting in 57, 29, 194, and 23 numbers of features selected by the hybrid model Boruta &#x2b; IMRMR &#x2b; SSA for the final features classification purpose. The empirical analysis has been done in 3 different approaches. Approach 1 shows the performance of all considered classifiers with the IMRMR feature selection algorithm. Approach 2 shows the performance of all considered classifiers with IMRMR and SSA optimizer. Approach 3 shows the performance of the proposed ensemble classifier with the three best classifiers from Approach 2 based on accuracy. The performance in all of the above-mentioned approaches is based on ten different parameters, including Accuracy (ACC), Precision (PRE), F&#x3b2;-score (F1-Score (F1-S) and F2 Score (F 2)), Specificity (SPE), Misclassification Rate (MCR), False Negative Rate (FNR), False Positive Rate (FPR), and MCC. Additionally, we employed confidence interval (CI) statistical analysis of the obtained results (<xref ref-type="bibr" rid="B7">Asselman et al., 2021</xref>; <xref ref-type="bibr" rid="B31">Naji et al., 2021</xref>). The relative importance of precision and recall can be adjusted using the beta parameter of the F-beta score. Precision and recall are of equal value when &#x3b2; &#x3d; 1 (F1 score). When &#x3b2; is higher than 1 (F 2 score), the model is more sensitive to positive cases since recall is given more weight. As beta decreases (to values of F0.5), the model becomes more concerned with delivering accurate positive predictions. In the proposed model, two values of &#x3b2; have been considered, such as F-1 and F2. The above-said parameters can be calculated by using <xref ref-type="disp-formula" rid="e26">Equations 26</xref>&#x2013;<xref ref-type="disp-formula" rid="e35">35</xref>. The training set contains 61 features, and the test set contains 11 samples for the ALL-AML dataset. The training and testing set contains 52 and 10 samples for the Lymphoma dataset. The training set contains 61 features, and the test set contains 11 samples for the MLL dataset. Considering the SRBCT dataset, the train set includes 70 and 13 samples in the test set. <xref ref-type="disp-formula" rid="e26">Equations 26</xref>&#x2013;<xref ref-type="disp-formula" rid="e35">35</xref> show the calculation of all evaluative parameters.<disp-formula id="e26">
<mml:math id="m82">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>&#x2b;</mml:mo>
<mml:mover accent="true">
<mml:mi>F</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>&#x2b;</mml:mo>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mo>&#x2550;</mml:mo>
</mml:mrow>
</mml:mover>
<mml:mo>&#x2b;</mml:mo>
<mml:mover accent="true">
<mml:mi>F</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>&#x2b;</mml:mo>
<mml:mover accent="true">
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mo>&#x2550;</mml:mo>
</mml:mrow>
</mml:mover>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(26)</label>
</disp-formula>
<disp-formula id="e27">
<mml:math id="m83">
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(27)</label>
</disp-formula>
<disp-formula id="e28">
<mml:math id="m84">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>R</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>&#x2b;</mml:mo>
<mml:mover accent="true">
<mml:mi>F</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(28)</label>
</disp-formula>
<disp-formula id="e29">
<mml:math id="m85">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>E</mml:mi>
<mml:mi>C</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>&#x2b;</mml:mo>
<mml:mover accent="true">
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mo>&#x2550;</mml:mo>
</mml:mrow>
</mml:mover>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(29)</label>
</disp-formula>
<disp-formula id="e30">
<mml:math id="m86">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mo>&#x2550;</mml:mo>
</mml:mrow>
</mml:mover>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mo>&#x2550;</mml:mo>
</mml:mrow>
</mml:mover>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(30)</label>
</disp-formula>
<disp-formula id="e31">
<mml:math id="m87">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mi>S</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mover accent="true">
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mo>&#x2550;</mml:mo>
</mml:mrow>
</mml:mover>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(31)</label>
</disp-formula>
<disp-formula id="e32">
<mml:math id="m88">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mn>2</mml:mn>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>5</mml:mn>
<mml:mo>&#x2a;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mfrac>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>&#x2b;</mml:mo>
<mml:mover accent="true">
<mml:mi>F</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2a;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mfrac>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>&#x2b;</mml:mo>
<mml:mover accent="true">
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mo>&#x2550;</mml:mo>
</mml:mrow>
</mml:mover>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
<mml:mrow>
<mml:mn>4</mml:mn>
<mml:mo>&#x2a;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mfrac>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>&#x2b;</mml:mo>
<mml:mover accent="true">
<mml:mi>F</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2b;</mml:mo>
<mml:mfrac>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>&#x2b;</mml:mo>
<mml:mover accent="true">
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mo>&#x2550;</mml:mo>
</mml:mrow>
</mml:mover>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(32)</label>
</disp-formula>
<disp-formula id="e33">
<mml:math id="m89">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
<mml:mi>R</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mover accent="true">
<mml:mi>F</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>&#x2b;</mml:mo>
<mml:mover accent="true">
<mml:mi>F</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(33)</label>
</disp-formula>
<disp-formula id="e34">
<mml:math id="m90">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>R</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mover accent="true">
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mo>&#x2550;</mml:mo>
</mml:mrow>
</mml:mover>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mo>&#x2550;</mml:mo>
</mml:mrow>
</mml:mover>
<mml:mo>&#x2b;</mml:mo>
<mml:mover accent="true">
<mml:mi>F</mml:mi>
<mml:mo>&#x2550;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(34)</label>
</disp-formula>
<disp-formula id="e35">
<mml:math id="m91">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="&#x7c;">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mo>&#x2550;</mml:mo>
</mml:mrow>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:mover accent="true">
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mo>&#x2550;</mml:mo>
</mml:mrow>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>&#x2b;</mml:mo>
<mml:mover accent="true">
<mml:mi>F</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>&#x2b;</mml:mo>
<mml:mover accent="true">
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mo>&#x2550;</mml:mo>
</mml:mrow>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mo>&#x2550;</mml:mo>
</mml:mrow>
</mml:mover>
<mml:mo>&#x2b;</mml:mo>
<mml:mover accent="true">
<mml:mi>F</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mo>&#x2550;</mml:mo>
</mml:mrow>
</mml:mover>
<mml:mo>&#x2b;</mml:mo>
<mml:mover accent="true">
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mo>&#x2550;</mml:mo>
</mml:mrow>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:msqrt>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(35)</label>
</disp-formula>
<inline-formula id="inf57">
<mml:math id="m92">
<mml:mrow>
<mml:mover accent="true">
<mml:mi mathvariant="bold">T</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>,</mml:mo>
<mml:mtext>and&#x2009;</mml:mtext>
<mml:mover accent="true">
<mml:mi mathvariant="bold">F</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> are the true and false positives of the confusion matrix. <inline-formula id="inf58">
<mml:math id="m93">
<mml:mrow>
<mml:mover accent="true">
<mml:mi mathvariant="bold">T</mml:mi>
<mml:mrow>
<mml:mo>&#x2550;</mml:mo>
</mml:mrow>
</mml:mover>
<mml:mo>,</mml:mo>
<mml:mtext>and&#x2009;</mml:mtext>
<mml:mover accent="true">
<mml:mi mathvariant="bold">F</mml:mi>
<mml:mrow>
<mml:mo>&#x2550;</mml:mo>
</mml:mrow>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> are the true and false negatives of the confusion matrix.</p>
<sec id="s4-1">
<title>4.1 Analysis of hybrid models with Boruta, IMRMR and SSA</title>
<p>The performance analysis of the mentioned hybrid model, such as Boruta &#x2b; IMRMR &#x2b; SSA &#x2b; SVM, Boruta &#x2b; IMRMR &#x2b; SSA &#x2b; RF, Boruta &#x2b; IMRMR &#x2b; SSA &#x2b; ELM, Boruta &#x2b; IMRMR &#x2b; SSA &#x2b; AdaBoost, Boruta &#x2b; IMRMR &#x2b; SSA &#x2b; XGBoost for different cancer gene expression datasets is summarized as follows. <xref ref-type="fig" rid="F3">Figures 3</xref>&#x2013;<xref ref-type="fig" rid="F6">6</xref> show the performance of the hybrid mentioned above models in contrast to different datasets.<list list-type="simple">
<list-item>
<p>&#x2022; For the ALL-AML dataset, the Boruta (B)&#x2b; IMRMR (IM) &#x2b;XGBoost with SSA shows the highest accuracy at 0.917. The other parameters, such as PRE, REC, F1-S, F 2, and SPE, are 0.929, 0.929, 0.929, and 0.929, respectively. The other parameters, such as FNR, FPR, MCC, and MCR, are 0.071, 0.100, 0.829, and 0.083, respectively.</p>
</list-item>
<list-item>
<p>&#x2022; For the Lymphoma dataset, B &#x2b; IM &#x2b; SSA &#x2b; AdaBoost shows the best performance with an accuracy of 0.919. The other parameters, such as PRE, REC, F1-S, F 2, and SPE, are 0.938, 0.957, 0.947, 0.953, and 0.800, respectively. The other parameters, such as FNR, FPR, MCC, and MCR, are 0.043, 0.200, 0.776, and 0.081, respectively.</p>
</list-item>
<list-item>
<p>&#x2022; Similarly, for the MLL dataset, B &#x2b; IM &#x2b; AdaBoost with SSA shows the highest performance with an accuracy of 0.917 compared to other hybrid models, as resented. The other parameters, such as PRE, REC, F1-S, F 2, and SPE, are 0.939, 0.939, 0.939, 0.939, and 0.870, respectively. The other parameters, such as FNR, FPR, MCC, and MCR, are 0.061, 0.130, 0.808, and 0.083, respectively.</p>
</list-item>
<list-item>
<p>&#x2022; With the SRBCT gene expression dataset, the B &#x2b; IM &#x2b; SSA &#x2b; XGBoost performs best with an accuracy of 0.916. The other parameters, such as PRE, REC, F1-S, F 2, and SPE, are 0.941, 0.923, 0.932, 0.927, and 0.903, respectively. The other parameters, such as FNR, FPR, MCC, and MCR, are 0.077, 0.097, 0.821, and 0.084, respectively.</p>
</list-item>
</list>
</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Out-of-sample performance measure of the different classifiers with MRMR and SSA for the ALL-AML dataset.</p>
</caption>
<graphic xlink:href="fgene-15-1491602-g003.tif"/>
</fig>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Out-of-sample performance measure of the different classifiers with MRMR and SSA for the Lymphoma dataset.</p>
</caption>
<graphic xlink:href="fgene-15-1491602-g004.tif"/>
</fig>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Out-of-sample performance measure of different classifiers with MRMR and SSA for the MLL dataset.</p>
</caption>
<graphic xlink:href="fgene-15-1491602-g005.tif"/>
</fig>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Out-of-sample performance measure of the different classifiers with MRMR and SSA for the SRBCT dataset.</p>
</caption>
<graphic xlink:href="fgene-15-1491602-g006.tif"/>
</fig>
<p>The analysis demonstrates that the B &#x2b; IM &#x2b; SSA &#x2b; AdaBoost model, resulting in an accuracy of 0.919, uses the highest obtained Lymphoma dataset. In order to enhance the accuracy even further, it is possible to include an ensemble machine learning approach. The present study uses the majority voting approach to combine predictions from various models and make a final choice based on the majority of votes. This strategy may improve the accuracy by using the advantages of many models while reducing the limitations of each model. By using majority voting, the ultimate model may amalgamate these advantages, resulting in enhanced overall accuracy and more resilient categorization outcomes. The hybrid models mentioned demonstrate remarkable accuracy across several cancer gene expression datasets, reaching a maximum accuracy of 0.919. A majority voting ensemble classifier may help overcome the problems with the provided hybrid models, namely, their high computational cost and the possibility of overfitting caused by combining several algorithms. By integrating several models&#x2019; strengths, this method streamlines decision-making, leads to more accurate forecasts with less hyperparameter tweaking, and reduces the danger of overfitting. To further enhance accuracy, the current work integrates the prediction of the best three classifiers using the majority voting technique. This improves overall precision, minimizes mistakes, and results in more dependable and transferable results in cancer categorization assignments.</p>
</sec>
<sec id="s4-2">
<title>4.2 Analysis of the proposed model</title>
<p>The performance analysis of the proposed BIMSSA model is shown in <xref ref-type="table" rid="T5">Table 5</xref>. <xref ref-type="table" rid="T5">Table 5</xref> quantifies the ACC, PRE, REC, F-1S, F-2, and SPE measures with Clopper&#x2013;Pearson confidence interval (CI) (<xref ref-type="bibr" rid="B35">Puza and O&#x2019;neill, 2006</xref>).<list list-type="simple">
<list-item>
<p>&#x2022; For the ALL-AML dataset, the proposed BIMSSA model shows the accuracy as 0.967 with a CI of 0.887&#x2013;0.99. The other parameters, such as PRE, REC, F1-S, F 2, and SPE, are 0.967, 0.974, 0.974, 0.974, 0.974, and 0.955, respectively. The other parameters, such as FNR, FPR, MCC, and MCR, are 0.026, 0.046, 0.929, and 0.033, respectively.</p>
</list-item>
<list-item>
<p>&#x2022; For the Lymphoma dataset, the proposed BIMSSA shows an accuracy level of 0.962% with a CI of 0.862&#x2013;0.995. The other parameters, such as PRE, REC, F1-S, F 2, and SPE, are 0.972, 0.972, 0.972, 0.972, and 0.938respectively. The other parameters, such as FNR, FPR, MCC, and MCR, are 0.028, 0.063, 0.910, and 0.039, respectively.</p>
</list-item>
<list-item>
<p>&#x2022; Similarly, for the MLL dataset, the proposed BIMSSA shows the accuracy level as 0.951 with a CI of 0.887&#x2013;0.99. The other parameters, such as PRE, REC, F1-S, F 2, and SPE, are 0.949, 0.974, 0.961, 0.969, and 0.913, respectively. The other parameters, such as FNR, FPR, MCC, and MCR, are 0.026, 0.087, 0.895, and 0.049, respectively.</p>
</list-item>
<list-item>
<p>&#x2022; With SRBCT, the BIMSSA shows a 0.971 accuracy level with a CI of 0.901&#x2013;0.991. The other parameters, such as PRE, REC, F1-S, and F 2, are 0.978. Specificity (SPE) is 0.958. The other parameters, such as FNR, FPR, MCC, and MCR, are 0.022, 0.042, 0.937, and 029, respectively.</p>
</list-item>
<list-item>
<p>&#x2022; The model maintains a good level of performance throughout all four datasets (D1, D2, D3, and D4) in regards to accuracy, recall, precision, F1-Score, F2-Score, and specificity. The Clopper-Pearson method&#x2019;s confidence intervals support the measures&#x2019; dependability; narrow intervals show little fluctuation and strong faith in the model&#x2019;s forecasts. As shown by its low false positive rate (FPR) of 0.042, high Matthews correlation coefficient (MCC) of 0.937, and low Misclassification Rate (MCR) of 0.029, dataset D4 has the best overall performance, especially in accuracy and specificity. These numbers show how well D4 can detect real positives and genuine negatives, which allows it to make strong and trustworthy predictions. D1 additionally demonstrates excellent outcomes; it is a balanced model with a little advantage in predictive accuracy, with a FNR of 0.026, FPR of 0.046, MCC of 0.929, and MCR of 0.033. The great predictive power for D2 is shown in its FNR of 0.028, FPR of 0.063, MCC of 0.910, and MCR of 0.039, which are somewhat lower than D1 and D4, but it still maintains a high level of efficiency. Having a greater FPR of 0.087 affects D3&#x2019;s MCC of 0.895 and MCR of 0.049, which in turn causes a little drop in specificity. Regardless, the model&#x2019;s great predictive power and balance between recall and accuracy make it quite useful across all datasets.</p>
</list-item>
<list-item>
<p>&#x2022; The developed BIMSSA model&#x2019;s ROC curve is shown in <xref ref-type="fig" rid="F7">Figure 7</xref> for ALL-AML, Lymphoma, MLL, and SRBCT datasets. The suggested model&#x2019;s AUC for the ALL-AML dataset is 0.971. The suggested model&#x2019;s AUC for the Lymphoma dataset is 0.961. The suggested model has AUC values of 0.950 and 0.985 for the MLL and SRBCT datasets.</p>
</list-item>
<list-item>
<p>&#x2022; <xref ref-type="fig" rid="F8">Figure 8</xref> shows the training and test time of the developed BIMSSA compared to cancer datasets.</p>
</list-item>
</list>
</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Performance Analysis of Proposed BIMSSA Model with Clopper&#x2013;Pearson confidence interval (CI).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Dataset</th>
<th align="center">Metric</th>
<th align="center">Predicted value</th>
<th align="center">Clopper&#x2013;Pearson CI (95%)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="6" align="left">D1</td>
<td align="left">ACC</td>
<td align="left">0.967</td>
<td align="left">(0.887, 0.99)</td>
</tr>
<tr>
<td align="left">PRE</td>
<td align="left">0.974</td>
<td align="left">(0.912, 0.996)</td>
</tr>
<tr>
<td align="left">REC</td>
<td align="left">0.974</td>
<td align="left">(0.912, 0.996)</td>
</tr>
<tr>
<td align="left">F1-S</td>
<td align="left">0.974</td>
<td align="left">(0.912, 0.996)</td>
</tr>
<tr>
<td align="left">F-2</td>
<td align="left">0.974</td>
<td align="left">(0.912, 0.996)</td>
</tr>
<tr>
<td align="left">SPE</td>
<td align="left">0.955</td>
<td align="left">(0.863, 0.99)</td>
</tr>
<tr>
<td rowspan="6" align="left">D2</td>
<td align="left">ACC</td>
<td align="left">0.962</td>
<td align="left">(0.868, 0.995)</td>
</tr>
<tr>
<td align="left">PRE</td>
<td align="left">0.972</td>
<td align="left">(0.897, 0.995)</td>
</tr>
<tr>
<td align="left">REC</td>
<td align="left">0.972</td>
<td align="left">(0.897, 0.995)</td>
</tr>
<tr>
<td align="left">F1-S</td>
<td align="left">0.972</td>
<td align="left">(0.897, 0.995)</td>
</tr>
<tr>
<td align="left">F-2</td>
<td align="left">0.972</td>
<td align="left">(0.897, 0.995)</td>
</tr>
<tr>
<td align="left">SPE</td>
<td align="left">0.938</td>
<td align="left">(0.841, 0.979)</td>
</tr>
<tr>
<td rowspan="6" align="left">D3</td>
<td align="left">ACC</td>
<td align="left">0.951</td>
<td align="left">(0.887, 0.99)</td>
</tr>
<tr>
<td align="left">PRE</td>
<td align="left">0.949</td>
<td align="left">(0.863, 0.982)</td>
</tr>
<tr>
<td align="left">REC</td>
<td align="left">0.974</td>
<td align="left">(0.912, 0.996)</td>
</tr>
<tr>
<td align="left">F1-S</td>
<td align="left">0.961</td>
<td align="left">(0.887, 0.99)</td>
</tr>
<tr>
<td align="left">F-2</td>
<td align="left">0.969</td>
<td align="left">(0.912, 0.996)</td>
</tr>
<tr>
<td align="left">SPE</td>
<td align="left">0.913</td>
<td align="left">(0.819, 0.963)</td>
</tr>
<tr>
<td rowspan="6" align="left">D4</td>
<td align="left">ACC</td>
<td align="left">0.971</td>
<td align="left">(0.901, 0.991)</td>
</tr>
<tr>
<td align="left">PRE</td>
<td align="left">0.978</td>
<td align="left">(0.923, 0.997)</td>
</tr>
<tr>
<td align="left">REC</td>
<td align="left">0.978</td>
<td align="left">(0.923, 0.997)</td>
</tr>
<tr>
<td align="left">F1-S</td>
<td align="left">0.978</td>
<td align="left">(0.923, 0.997)</td>
</tr>
<tr>
<td align="left">F-2</td>
<td align="left">0.978</td>
<td align="left">(0.923, 0.997)</td>
</tr>
<tr>
<td align="left">SPE</td>
<td align="left">0.958</td>
<td align="left">(0.88, 0.991)</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Comparative ROC evaluation for ALL-AML, Lymphoma, MLL, and SRBCT using BIMSSA.</p>
</caption>
<graphic xlink:href="fgene-15-1491602-g007.tif"/>
</fig>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Comparative analysis of Training and Testing Time of BIMSSA over different dataset.</p>
</caption>
<graphic xlink:href="fgene-15-1491602-g008.tif"/>
</fig>
<p>The proposed BIMSSA does include the Boruta, IMRMR, and SSA as the feature selection algorithm. In addition, the BIMSSA includes SVM, RF, ELM, AdaBoost, and XGBoost as the base classifiers, including majority voting as the ensemble classifier. The model is evaluated over 4 different high dimensional datasets for evaluation. The computational complexity of the proposed BIMSSA model becomes O (T&#x22c5;F&#x22c5;log(F)) &#x2b; O(P&#x22c5;F&#x22c5;I) &#x2b; O(N<sup>2</sup>&#x22c5;F), with N &#x3c;&#x3c; F. Where, N is the number of sample present in the dataset, F is the number of features, T is the number of trees in the RF classifier, I is the number of iterations, and P is the number of population for SSA.</p>
<p>Although the proposed model is efficient, it still retains several limitations that must be mitigated. First is the increased computational complexity based on the iterative structure of the Boruta algorithm, the added overhead of SSA, and the computation-intensive nature of classifiers used, such as SVM and methods based on trees. Then, hybrid approaches for models increase the complexity of their implementation and call for careful coordination between feature selection and classification phases for optimal performance. The pipeline&#x2019;s repetitive nature might make it seem to extend training time, making it clumsy for situations requiring deployment and results in a very fast manner.</p>
</sec>
</sec>
<sec id="s5">
<title>5 Critical analysis</title>
<p>In this section the proposed BIMSSA is compared with some existing literature based on four cancer datasets including ALL-AML, Lymphoma, MLL, and SRBCT. The detailed comparison of BIMSSA with the existing works (<xref ref-type="bibr" rid="B42">Sun et al., 2019</xref>; <xref ref-type="bibr" rid="B29">Meenachi and Ramakrishnan, 2020</xref>; <xref ref-type="bibr" rid="B32">Nouri-Moghaddam et al., 2021</xref>; <xref ref-type="bibr" rid="B46">Yan et al., 2021</xref>; <xref ref-type="bibr" rid="B4">Alomari et al., 2021</xref>; <xref ref-type="bibr" rid="B36">Rostami et al., 2022</xref>; <xref ref-type="bibr" rid="B22">Ke et al., 2022</xref>), emphasizing the performance differences across these datasets is given below.</p>
<sec id="s5-1">
<title>5.1 Lymphoma dataset</title>
<p>
<list list-type="simple">
<list-item>
<p>&#x2022; For Lymphoma dataset the proposed BIMSSA shows an accuracy of 96.2%, BIMSSA again shows strong performance in classifying Lymphoma, indicating the model&#x2019;s versatility.</p>
</list-item>
<list-item>
<p>&#x2022; The work in <xref ref-type="bibr" rid="B46">Yan et al. (2021)</xref> reports an accuracy of 88.57% for Lymphoma, which is significantly lower than that of BIMSSA. The proposed model outperforms <xref ref-type="bibr" rid="B46">Yan et al. (2021)</xref> by &#x223c;8.61%. This difference may highlight the superiority of the BIMSSA model in handling Lymphoma data, possibly due to better feature selection, model training, and data preprocessing techniques.</p>
</list-item>
<list-item>
<p>&#x2022; The work in <xref ref-type="bibr" rid="B22">Ke et al. (2022)</xref> reports an accuracy of 93.21% for Lymphoma, which is still lower than that of BIMSSA by &#x223c;3.21%. This further confirms BIMSSA&#x2019;s effectiveness in this context.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s5-2">
<title>5.2 MLL dataset</title>
<p>
<list list-type="simple">
<list-item>
<p>&#x2022; BIMSSA achieves an accuracy of 95.1%, showcasing its strong performance in this dataset as well.</p>
</list-item>
<list-item>
<p>&#x2022; The work in <xref ref-type="bibr" rid="B46">Yan et al. (2021)</xref> obtains an accuracy of 86.19%, which is lower than that of BIMSSA by a significant margin of &#x223c;10.33%. This difference further emphasizes BIMSSA&#x2019;s advantage in handling complex datasets.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s5-3">
<title>5.3 SRBCT dataset</title>
<p>
<list list-type="simple">
<list-item>
<p>&#x2022; BIMSSA with an accuracy of 97.1%, performs exceptionally well on the SRBCT dataset, indicating its robustness and effectiveness across different cancer types.</p>
</list-item>
<list-item>
<p>&#x2022; The work in <xref ref-type="bibr" rid="B42">Sun et al. (2019)</xref> reports a high accuracy of 93.6% for SRBCT, which is noteworthy but still lower than BIMSSA&#x2019;s accuracy by &#x223c;3.73%.</p>
</list-item>
<list-item>
<p>&#x2022; In <xref ref-type="bibr" rid="B29">Meenachi and Ramakrishnan (2020)</xref> the accuracy is 81.58% which is lower than that of the BIMSSA with a significant margin of &#x223c;19.02%, suggesting that the feature selection, classification and preprocessing techniques used in <xref ref-type="bibr" rid="B29">Meenachi and Ramakrishnan (2020)</xref> struggles with this dataset.</p>
</list-item>
<list-item>
<p>&#x2022; The work in <xref ref-type="bibr" rid="B32">Nouri-Moghaddam et al. (2021)</xref> reports an accuracy of 90.72% for SRBCT, which, while remarkable, is still outperformed by BIMSSA by &#x223c;6.92%.</p>
</list-item>
<list-item>
<p>&#x2022; The work in <xref ref-type="bibr" rid="B46">Yan et al. (2021)</xref> reported accuracy here is 76.74%, the lowest among the all compared literature, and BIMSSA outperforms it by &#x223c;26.53%. It is indicating that the model in <xref ref-type="bibr" rid="B46">Yan et al. (2021)</xref> may not be as effective for SRBCT.</p>
</list-item>
<list-item>
<p>&#x2022; In <xref ref-type="bibr" rid="B36">Rostami et al. (2022)</xref> the accuracy of 82.82% is reported, which is again significantly low by &#x223c;17.24% when compared to BIMSSA.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s5-4">
<title>5.4 ALL-AML dataset</title>
<p>
<list list-type="simple">
<list-item>
<p>&#x2022; BIMSSA achieves an accuracy of 96.7%, which demonstrates strong performance and reliability in classifying this cancer type.</p>
</list-item>
<list-item>
<p>&#x2022; The work in <xref ref-type="bibr" rid="B4">Alomari et al. (2021)</xref> reports an exceptionally high accuracy of 99.86%, which outperforms the proposed BIMSSA model by &#x223c;3.27%.</p>
</list-item>
</list>
</p>
<p>BIMSSA proves to be a very efficient and dependable model for categorizing cancer, surpassing the accuracy of other models from the existing literature in all tested datasets except ALL-AML. Though the accuracy is low as compared to <xref ref-type="bibr" rid="B4">Alomari et al. (2021)</xref> in case of ALL-AML dataset, still it shows an accuracy of 96.7% that can be considered as a noteworthy performance. The model&#x2019;s constant and superior performance in classifying many forms of cancer, together with its excellent generalization, sets it out as an exceptional model in the field of cancer classification. Therefore, BIMSSA showcases the flexibility and applicability needed for various cancer datasets. <xref ref-type="fig" rid="F9">Figure 9</xref> shows the comparison of BIMSSA with other existing models in contrast to diverse datasets.</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>Out-of-sample performance evaluation of BIMSSA with Existing literature.</p>
</caption>
<graphic xlink:href="fgene-15-1491602-g009.tif"/>
</fig>
</sec>
</sec>
<sec sec-type="conclusion" id="s6">
<title>6 Conclusion</title>
<p>The primary focus of this study is developing an ensemble machine learning-based model for cancer detection. Gene expression data, also known as microarray data, leaves its unique imprint when used in a cancer detection model. However, there are challenges unique to dealing with microarray data, such as a limited sample size, which diminishes the model&#x2019;s performance. To deal with this issue, the proposed BIMSSA considers a pipeline feature selection approach with Boruta, IMRMR, and SSA feature selection algorithm to select relevant features. The selected features, such as SVM, RF, ELM, AdaBoost, and XGBoost, are applied to the optimized feature set as the base classifiers. Based on the performance, three classifiers, ELM, AdaBoost, and XGBoost with Boruta, IMRMR, and SSA feature selection, are considered for developing the ensemble model with a majority voting classifier. After selecting three classifiers, we use majority voting to create an ensemble ML-based cancer diagnostic model called BIMSSA. Empirical results from this study using the developed BIMSSA reveal an accuracy of 0.967, 0.962, 0.951, and 0.971 for ALL-AML, Lymphoma, MLL, and SRBCT datasets. The suggested model&#x2019;s AUC for the ALL-AML dataset is 0.973. The proposed model&#x2019;s AUC for the Lymphoma dataset is 0.969. The suggested model achieves an AUC of 0.951 for the MLL dataset and 0.979 for the SRBCT dataset. The proposed model does not perform well despite the existing literature regarding the ALL-MLL dataset. However, there are some limitations to the current work. In addition, the current research does not aim to consider the concept of class imbalance.</p>
<p>As a future scope of this manuscript, the feature selection process will be reversed, starting with SSA to explore different combinations of features and identify the promising subset, followed by IMRMR to refine the feature subset further and prioritize the most informative features. Finally, Boruta feature selection is applied to validate the selected feature subset and provide additional insights into the importance of the feature. Testing the model&#x2019;s performance on more kinds of cancer gene expression datasets is planned for the future of this study. In addition, future plans for this study include using deep learning approaches to increase the model&#x2019;s capability for working with picture datasets.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s7">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: <ext-link ext-link-type="uri" xlink:href="https://csse.szu.edu.cn/staff/zhuzx/Datasets.html">https://csse.szu.edu.cn/staff/zhuzx/Datasets.html</ext-link>.</p>
</sec>
<sec sec-type="author-contributions" id="s8">
<title>Author contributions</title>
<p>PP: Conceptualization, Writing&#x2013;original draft, Writing&#x2013;review and editing. SB: Investigation, Writing&#x2013;original draft, Writing&#x2013;review and editing. AmP: Data curation, Investigation, Writing&#x2013;original draft, Writing&#x2013;review and editing. AbP: Conceptualization, Investigation, Writing&#x2013;original draft, Writing&#x2013;review and editing. BS: Software, Writing&#x2013;original draft, Writing&#x2013;review and editing. ZG: Conceptualization, Data curation, Investigation, Methodology, Visualization, Writing&#x2013;original draft, Writing&#x2013;review and editing. HL: Methodology, Validation, Writing&#x2013;original draft, Writing&#x2013;review and editing. PJ: Conceptualization, Software, Writing&#x2013;original draft, Writing&#x2013;review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s9">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. This research was funded by the Innovational Fund for Scientific and Technological Personnel of Hainan Province (Grant No. KJRC 2023L01), and the South China Sea Rising Star Project of Hainan Province.</p>
</sec>
<sec sec-type="COI-statement" id="s10">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
<p>The author(s) declared that they were an editorial board member of Frontiers at the time of submission. This had no impact on the peer review process and the final decision.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alfian</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Syafrudin</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Fahrurrozi</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Fitriyani</surname>
<given-names>N. L.</given-names>
</name>
<name>
<surname>Atmaji</surname>
<given-names>F. T. D.</given-names>
</name>
<name>
<surname>Widodoet al</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Predicting breast cancer from risk factors using SVM and extra-trees-based feature selection method</article-title>. <source>Computers</source> <volume>11</volume> (<issue>9</issue>), <fpage>136</fpage>. <pub-id pub-id-type="doi">10.3390/computers11090136</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alghunaim</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Al-Baity</surname>
<given-names>H. H.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>On the scalability of machine-learning algorithms for breast cancer prediction in big data context</article-title>. <source>IEEE Access</source> <volume>7</volume>, <fpage>91535</fpage>&#x2013;<lpage>91546</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2019.2927080</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Almugren</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Alshamlan</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>A survey on hybrid feature selection methods in microarray gene expression data for cancer classification</article-title>. <source>IEEE Access</source> <volume>7</volume>, <fpage>78533</fpage>&#x2013;<lpage>78548</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2019.2922987</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alomari</surname>
<given-names>O. A.</given-names>
</name>
<name>
<surname>Makhadmeh</surname>
<given-names>S. N.</given-names>
</name>
<name>
<surname>Al-Betar</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Alyasseri</surname>
<given-names>Z. A. A.</given-names>
</name>
<name>
<surname>Doush</surname>
<given-names>I. A.</given-names>
</name>
<name>
<surname>Abasi</surname>
<given-names>A. K.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Gene selection for microarray data classification based on Gray Wolf Optimizer enhanced with TRIZ-inspired operators</article-title>. <source>Knowledge-Based Syst.</source> <volume>223</volume>, <fpage>107034</fpage>. <pub-id pub-id-type="doi">10.1016/j.knosys.2021.107034</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alromema</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Syed</surname>
<given-names>A. H.</given-names>
</name>
<name>
<surname>Khan</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A hybrid machine learning approach to screen optimal predictors for the classification of primary breast tumors from gene expression microarray data</article-title>. <source>Diagnostics</source> <volume>13</volume> (<issue>4</issue>), <fpage>708</fpage>. <pub-id pub-id-type="doi">10.3390/diagnostics13040708</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Arun Prabha</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Mahesh</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Raja</surname>
<given-names>S. P.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Decision tree based salp swarm optimization for multi medical data classification with feature reduction technique</article-title>. <source>Braz. Archives Biol. Technol.</source> <volume>64</volume> (<issue>Jan</issue>). <pub-id pub-id-type="doi">10.1590/1678-4324-2021210240</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Asselman</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Khaldi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Aammou</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Enhancing the prediction of student performance based on the machine learning XGBoost algorithm</article-title>. <source>Interact. Learn. Environ.</source> <volume>31</volume>, <fpage>3360</fpage>&#x2013;<lpage>3379</lpage>. <pub-id pub-id-type="doi">10.1080/10494820.2021.1928235</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Aziz</surname>
<given-names>R. M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Nature-inspired metaheuristics model for gene selection and classification of biomedical microarray data</article-title>. <source>Med. and Biol. Eng. and Comput.</source> <volume>60</volume> (<issue>6</issue>), <fpage>1627</fpage>&#x2013;<lpage>1646</lpage>. <pub-id pub-id-type="doi">10.1007/s11517-022-02555-7</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Balakrishnan</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Dhanalakshmi</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Khaire</surname>
<given-names>U. M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Improved salp swarm algorithm based on the levy flight for feature selection</article-title>. <source>J. Supercomput.</source> <volume>77</volume> (<issue>11</issue>), <fpage>12399</fpage>&#x2013;<lpage>12419</lpage>. <pub-id pub-id-type="doi">10.1007/s11227-021-03773-w</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Batool</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Byun</surname>
<given-names>Y.-C.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Toward improving breast cancer classification using an adaptive voting ensemble learning algorithm</article-title>. <source>IEEE Access</source> <volume>12</volume>, <fpage>12869</fpage>&#x2013;<lpage>12882</lpage>. <pub-id pub-id-type="doi">10.1109/access.2024.3356602</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bol&#xf3;n-Canedo</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>S&#xe1;nchez-Maro&#xf1;o</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Alonso-Betanzos</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ben&#xed;tez</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Herrera</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>A review of microarray datasets and applied feature selection methods</article-title>. <source>Inf. Sci.</source> <volume>282</volume>, <fpage>111</fpage>&#x2013;<lpage>135</lpage>. <pub-id pub-id-type="doi">10.1016/j.ins.2014.05.042</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Castelli</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Manzoni</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Mariot</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Nobile</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Tangherloni</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Salp swarm optimization: a critical review</article-title>. <source>Expert Syst. Appl.</source> <volume>189</volume>, <fpage>116029</fpage>. <pub-id pub-id-type="doi">10.1016/j.eswa.2021.116029</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>de Leval</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Jaffe</surname>
<given-names>E. S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Lymphoma classification</article-title>. <source>Cancer J.</source> <volume>26</volume> (<issue>3</issue>), <fpage>176</fpage>&#x2013;<lpage>185</lpage>. <pub-id pub-id-type="doi">10.1097/PPO.0000000000000451</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ding</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Minimum Redundancy feature selection from microarray gene expression data</article-title>. <source>J. Bioinforma. Comput. Biol.</source> <volume>03</volume> (<issue>02</issue>), <fpage>185</fpage>&#x2013;<lpage>205</lpage>. <pub-id pub-id-type="doi">10.1142/s0219720005001004</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ding</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Nie</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Extreme learning machine and its applications</article-title>. <source>Neural Comput. Appl.</source> <volume>25</volume> (<issue>3&#x2013;4</issue>), <fpage>549</fpage>&#x2013;<lpage>556</lpage>. <pub-id pub-id-type="doi">10.1007/s00521-013-1522-8</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ghoniem</surname>
<given-names>R. M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A novel bio-inspired deep learning approach for liver cancer diagnosis</article-title>. <source>Information</source> <volume>11</volume> (<issue>2</issue>), <fpage>80</fpage>. <pub-id pub-id-type="doi">10.3390/info11020080</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hameed</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Hassan</surname>
<given-names>W. H.</given-names>
</name>
<name>
<surname>Latiff</surname>
<given-names>L. A.</given-names>
</name>
<name>
<surname>Muhammadsharif</surname>
<given-names>F. F.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A comparative study of nature-inspired metaheuristic algorithms using a three-phase hybrid approach for gene selection and classification in high-dimensional cancer datasets</article-title>. <source>Soft Comput.</source> <volume>25</volume> (<issue>13</issue>), <fpage>8683</fpage>&#x2013;<lpage>8701</lpage>. <pub-id pub-id-type="doi">10.1007/s00500-021-05726-0</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hegazy</surname>
<given-names>Ah. E.</given-names>
</name>
<name>
<surname>Makhlouf</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>El-Tawel</surname>
<given-names>Gh. S.</given-names>
</name>
</person-group> (<year>2018a</year>). <article-title>Feature selection using chaotic salp swarm algorithm for data classification</article-title>. <source>Arabian J. Sci. Eng.</source> <volume>44</volume> (<issue>4</issue>), <fpage>3801</fpage>&#x2013;<lpage>3816</lpage>. <pub-id pub-id-type="doi">10.1007/s13369-018-3680-6</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hegazy</surname>
<given-names>Ah. E.</given-names>
</name>
<name>
<surname>Makhlouf</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>El-Tawel</surname>
<given-names>Gh. S.</given-names>
</name>
</person-group> (<year>2018b</year>). <article-title>Improved salp swarm algorithm for feature selection</article-title>. <source>J. King Saud Univ. - Comput. Inf. Sci.</source> <volume>32</volume>, <fpage>335</fpage>&#x2013;<lpage>344</lpage>. <pub-id pub-id-type="doi">10.1016/j.jksuci.2018.06.003</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ibrahim</surname>
<given-names>H. T.</given-names>
</name>
<name>
<surname>Mazher</surname>
<given-names>W. J.</given-names>
</name>
<name>
<surname>Ucan</surname>
<given-names>O. N.</given-names>
</name>
<name>
<surname>Bayat</surname>
<given-names>O.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Feature selection using salp swarm algorithm for real biomedical datasets</article-title>. <source>Int. J. Comput. Netw. Inf. Secur.</source>, <fpage>1738</fpage>&#x2013;<lpage>7906</lpage>.</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ibrahim</surname>
<given-names>R. A.</given-names>
</name>
<name>
<surname>Ewees</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>Oliva</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Abd Elaziz</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Improved salp swarm algorithm based on particle swarm optimization for feature selection</article-title>. <source>J. Ambient Intell. Humaniz. Comput.</source> <volume>10</volume> (<issue>8</issue>), <fpage>3155</fpage>&#x2013;<lpage>3169</lpage>. <pub-id pub-id-type="doi">10.1007/s12652-018-1031-9</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ke</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ye</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Improved swarm-optimization-based filter-wrapper gene selection from microarray data for gene expression tumor classification</article-title>. <source>Pattern Analysis Appl.</source> <volume>26</volume> (<issue>2</issue>), <fpage>455</fpage>&#x2013;<lpage>472</lpage>. <pub-id pub-id-type="doi">10.1007/s10044-022-01117-9</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khalsan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Machado</surname>
<given-names>L. R.</given-names>
</name>
<name>
<surname>Al-Shamery</surname>
<given-names>E. S.</given-names>
</name>
<name>
<surname>Ajit</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Anthony</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Mu</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>A survey of machine learning approaches applied to gene expression analysis for cancer prediction</article-title>. <source>IEEE Access</source> <volume>10</volume>, <fpage>27522</fpage>&#x2013;<lpage>27534</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2022.3146312</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lewis</surname>
<given-names>W. D.</given-names>
</name>
<name>
<surname>Lilly</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Jones</surname>
<given-names>K. L.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Lymphoma: diagnosis and treatment</article-title>. <source>Am. Fam. Physician</source> <volume>101</volume> (<issue>1</issue>), <fpage>34</fpage>&#x2013;<lpage>41</lpage>.</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>L&#xf3;pez-Garc&#xed;a</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Jerez</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Franco</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Veredas</surname>
<given-names>F. J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Transfer learning with convolutional neural networks for cancer survival prediction using gene-expression data</article-title>. <source>PLOS ONE</source> <volume>15</volume> (<issue>3</issue>), <fpage>e0230536</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0230536</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ma</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Meng</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Chai</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Diagnostic classification of cancers using extreme gradient boosting algorithm and multi-omics data</article-title>. <source>Comput. Biol. Med.</source> <volume>121</volume>, <fpage>103761</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2020.103761</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mahesh</surname>
<given-names>T. R.</given-names>
</name>
<name>
<surname>Vinoth Kumar</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Vivek</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Karthick Raghunath</surname>
<given-names>K. M.</given-names>
</name>
<name>
<surname>Sindhu Madhuri</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Early predictive model for breast cancer classification using blended ensemble learning</article-title>. <source>Int. J. Syst. Assur. Eng. Manag.</source> <volume>15</volume>, <fpage>188</fpage>&#x2013;<lpage>197</lpage>. <pub-id pub-id-type="doi">10.1007/s13198-022-01696-0</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maurya</surname>
<given-names>N. S.</given-names>
</name>
<name>
<surname>Kushwah</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kushwaha</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Chawade</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mani</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Prognostic model development for classification of colorectal adenocarcinoma by using machine learning model based on feature selection technique boruta</article-title>. <source>Sci. Rep.</source> <volume>13</volume> (<issue>1</issue>), <fpage>6413</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-023-33327-4</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meenachi</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ramakrishnan</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Differential evolution and ACO based global optimal feature selection with fuzzy rough set for cancer data classification</article-title>. <source>Soft Comput.</source> <volume>24</volume> (<issue>24</issue>), <fpage>18463</fpage>&#x2013;<lpage>18475</lpage>. <pub-id pub-id-type="doi">10.1007/s00500-020-05070-9</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mirjalili</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Gandomi</surname>
<given-names>A. H.</given-names>
</name>
<name>
<surname>Mirjalili</surname>
<given-names>S. Z.</given-names>
</name>
<name>
<surname>Saremi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Faris</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Mirjalili</surname>
<given-names>S. M.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Salp Swarm Algorithm: a bio-inspired optimizer for engineering design problems</article-title>. <source>Adv. Eng. Softw.</source> <volume>114</volume>, <fpage>163</fpage>&#x2013;<lpage>191</lpage>. <pub-id pub-id-type="doi">10.1016/j.advengsoft.2017.07.002</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Naji</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Filali</surname>
<given-names>S. E.</given-names>
</name>
<name>
<surname>Bouhlal</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Benlahmar</surname>
<given-names>E. H.</given-names>
</name>
<name>
<surname>Abdelouhahid</surname>
<given-names>R. A.</given-names>
</name>
<name>
<surname>Debauche</surname>
<given-names>O.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Breast cancer prediction and diagnosis through a new approach based on majority voting ensemble classifier</article-title>. <source>Procedia Comput. Sci.</source> <volume>191</volume>, <fpage>481</fpage>&#x2013;<lpage>486</lpage>. <pub-id pub-id-type="doi">10.1016/j.procs.2021.07.061</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nouri-Moghaddam</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Ghazanfari</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Fathian</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A novel bio-inspired hybrid multi-filter wrapper gene selection method with ensemble classifier for microarray data</article-title>. <source>Neural Comput. Appl.</source> <volume>35</volume>, <fpage>11531</fpage>&#x2013;<lpage>11561</lpage>. <pub-id pub-id-type="doi">10.1007/s00521-021-06459-9</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Panigrahi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Pati</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sahu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Das</surname>
<given-names>M. N.</given-names>
</name>
<name>
<surname>Nayak</surname>
<given-names>D. S. K.</given-names>
</name>
<name>
<surname>Sahoo</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>En-MinWhale: an ensemble approach based on MRMR and whale optimization for cancer diagnosis</article-title>. <source>IEEE Access</source> <volume>11</volume>, <fpage>113526</fpage>&#x2013;<lpage>113542</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2023.3318261</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pati</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Panigrahi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Nayak</surname>
<given-names>D. S. K.</given-names>
</name>
<name>
<surname>Sahoo</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Predicting pediatric appendicitis using ensemble learning techniques</article-title>. <source>Procedia Comput. Sci.</source> <volume>218</volume>, <fpage>1166</fpage>&#x2013;<lpage>1175</lpage>. <pub-id pub-id-type="doi">10.1016/j.procs.2023.01.095</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Puza</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>O&#x2019;neill</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Generalised Clopper&#x2013;Pearson confidence intervals for the binomial proportion</article-title>. <source>J. Stat. Comput. Simul.</source> <volume>76</volume> (<issue>6</issue>), <fpage>489</fpage>&#x2013;<lpage>508</lpage>. <pub-id pub-id-type="doi">10.1080/10629360500107527</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rostami</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Forouzandeh</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Berahmand</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Soltani</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Shahsavari</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Oussalah</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Gene selection for microarray data classification via multi-objective graph theoretic-based method</article-title>. <source>Artif. Intell. Med.</source> <volume>123</volume>, <fpage>102228</fpage>. <pub-id pub-id-type="doi">10.1016/j.artmed.2021.102228</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rustagi</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Bhatnagar</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Mathur</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>K. G</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Hybrid salp swarm and grey wolf optimizer algorithm based ensemble approach for breast cancer diagnosis</article-title>. <source>Multimedia tools Appl.</source> <volume>83</volume>, <fpage>70117</fpage>&#x2013;<lpage>70141</lpage>. <pub-id pub-id-type="doi">10.1007/s11042-023-18015-9</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sayed</surname>
<given-names>G. I.</given-names>
</name>
<name>
<surname>Khoriba</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Haggag</surname>
<given-names>M. H.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>A novel chaotic salp swarm algorithm for global optimization and feature selection</article-title>. <source>Appl. Intell.</source> <volume>48</volume> (<issue>10</issue>), <fpage>3462</fpage>&#x2013;<lpage>3481</lpage>. <pub-id pub-id-type="doi">10.1007/s10489-018-1158-6</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shukla</surname>
<given-names>A. K.</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Vardhan</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2020a</year>). <article-title>Gene selection for cancer types classification using novel hybrid metaheuristics approach</article-title>. <source>Swarm Evol. Comput.</source> <volume>54</volume>, <fpage>100661</fpage>. <pub-id pub-id-type="doi">10.1016/j.swevo.2020.100661</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shukla</surname>
<given-names>A. K.</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Vardhan</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2020b</year>). <article-title>An adaptive inertia weight teaching-learning-based optimization algorithm and its applications</article-title>. <source>Appl. Math. Model.</source> <volume>77</volume>, <fpage>309</fpage>&#x2013;<lpage>326</lpage>. <pub-id pub-id-type="doi">10.1016/j.apm.2019.07.046</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shukla</surname>
<given-names>A. K.</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Vardhan</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>A new hybrid wrapper TLBO and SA with SVM approach for gene expression data</article-title>. <source>Inf. Sci.</source> <volume>503</volume>, <fpage>238</fpage>&#x2013;<lpage>254</lpage>. <pub-id pub-id-type="doi">10.1016/j.ins.2019.06.063</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Qian</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Feature selection using neighborhood entropy-based uncertainty measures for gene expression data classification</article-title>. <source>Inf. Sci. (N. Y).</source> <volume>502</volume>, <fpage>18</fpage>&#x2013;<lpage>41</lpage>. <pub-id pub-id-type="doi">10.1016/j.ins.2019.05.072</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Thawkar</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A hybrid model using teaching&#x2013;learning-based optimization and Salp swarm algorithm for feature selection and classification in digital mammography</article-title>. <source>J. Ambient Intell. Humaniz. Comput.</source> <volume>12</volume> (<issue>9</issue>), <fpage>8793</fpage>&#x2013;<lpage>8808</lpage>. <pub-id pub-id-type="doi">10.1007/s12652-020-02662-z</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>&#xdc;nalan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>G&#xfc;nay</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Akkurt</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Gunoglu</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Tekin</surname>
<given-names>H. O.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>A comparative study on breast cancer classification with stratified shuffle split and K-fold cross validation via ensembled machine learning</article-title>. <source>J. Radiat. Res. Appl. Sci.</source> <volume>17</volume> (<issue>4</issue>), <fpage>101080</fpage>. <pub-id pub-id-type="doi">10.1016/j.jrras.2024.101080</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Venkatesan</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Balamurugan</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Thamaraimanalan</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Ramkumar</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Efficient machine learning technique for tumor classification based on gene expression data</article-title>,&#x201d; in <source>2022 8th international conference on advanced computing and communication systems (ICACCS)</source>. <publisher-loc>Coimbatore, India</publisher-loc>, <fpage>1982</fpage>&#x2013;<lpage>1986</lpage>. <pub-id pub-id-type="doi">10.1109/ICACCS54159.2022.9785294</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Yan</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Suo</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Guan</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>A novel feature selection method based on salp swarm algorithm</article-title>,&#x201d; in <source>2021 IEEE international conference on information communication and software engineering (ICICSE)</source>. <publisher-loc>Chengdu, China</publisher-loc>, <fpage>126</fpage>&#x2013;<lpage>130</lpage>. <pub-id pub-id-type="doi">10.1109/ICICSE52190.2021.9404129</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yan</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Jia</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Intelligent fault diagnosis of rotating machinery using improved multiscale dispersion entropy and mRMR feature selection</article-title>. <source>Knowledge-Based Syst.</source> <volume>163</volume>, <fpage>450</fpage>&#x2013;<lpage>471</lpage>. <pub-id pub-id-type="doi">10.1016/j.knosys.2018.09.004</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Anand</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Maximum relevance and minimum redundancy feature selection methods for a marketing machine learning platform</article-title>,&#x201d; in <source>2019 IEEE international conference on data science and advanced analytics (DSAA)</source>. <publisher-loc>Washington, DC, USA</publisher-loc>, <fpage>442</fpage>&#x2013;<lpage>452</lpage>. <pub-id pub-id-type="doi">10.1109/DSAA.2019.00059</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Ong</surname>
<given-names>Y.-S.</given-names>
</name>
<name>
<surname>Dash</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Markov blanket-embedded genetic algorithm for gene selection</article-title>. <source>Pattern Recognit.</source> <volume>40</volume> (<issue>11</issue>), <fpage>3236</fpage>&#x2013;<lpage>3248</lpage>. <pub-id pub-id-type="doi">10.1016/j.patcog.2007.02.007</pub-id>
</citation>
</ref>
</ref-list>
<app-group>
<app id="app1">
<title>Appendix-I</title>
<fig id="FA1" position="float">
<label>FIGURE A1</label>
<caption>
<p>Flow diagram for search strategies and selection of studies for BIMSSA.</p>
</caption>
<graphic xlink:href="FGENE_fgene-2024-1491602_wc_app1.tif"/>
</fig>
</app>
</app-group>
</back>
</article>