<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Artif. Intell.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Artificial Intelligence</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Artif. Intell.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2624-8212</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frai.2025.1610856</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Medical pattern classification using a novel binary similarity approach based on an associative classifier</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Velazquez-Gonzalez</surname> <given-names>Osvaldo</given-names></name>
<xref ref-type="aff" rid="aff1"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<uri xlink:href="https://loop.frontiersin.org/people/2681476"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Alarc&#x000F3;n-Paredes</surname> <given-names>Antonio</given-names></name>
<xref ref-type="aff" rid="aff1"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Ya&#x000F1;ez-Marquez</surname> <given-names>Cornelio</given-names></name>
<xref ref-type="aff" rid="aff1"/>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<uri xlink:href="https://loop.frontiersin.org/people/2394290"/>
</contrib>
</contrib-group>
<aff id="aff1"><institution>Centro de Investigaci&#x000F3;n en Computaci&#x000F3;n, Instituto Polit&#x000E9;cnico Nacional</institution>, <city>Mexico City</city>, <country>M&#x000E9;xico</country></aff>
<author-notes>
<corresp id="c001"><label>&#x0002A;</label>Correspondence: Cornelio Ya&#x000F1;ez-Marquez, <email xlink:href="mailto:cyanez@cic.ipn.mx">cyanez@cic.ipn.mx</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-01-14">
<day>14</day>
<month>01</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>8</volume>
<elocation-id>1610856</elocation-id>
<history>
<date date-type="received">
<day>13</day>
<month>04</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>10</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>11</day>
<month>12</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2026 Velazquez-Gonzalez, Alarc&#x000F3;n-Paredes and Ya&#x000F1;ez-Marquez.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Velazquez-Gonzalez, Alarc&#x000F3;n-Paredes and Ya&#x000F1;ez-Marquez</copyright-holder>
<license>
<ali:license_ref start_date="2026-01-14">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Classification is a central task in machine learning, underpinning applications in domains such as finance, medicine, engineering, information technology, and biology. However, machine learning pattern classification can become a complex or even inexplicable task for current robust models due to the complexity of objective datasets, which is why there is a strong interest in achieving high classification performance. On the other hand, in particular cases, there is a need to achieve such performance while maintaining a certain level of explainability in the operation and decisions of classification algorithms, which can become complex. For this reason, an algorithm is proposed that is robust, simple, highly explainable, and applicable to datasets primarily in medicine with complex class imbalance. The main contribution of this research is a novel machine learning classification algorithm based on binary string similarity that is competitive, simple, interpretable, and transparent, as it is clear why a pattern is classified into a given class. Therefore, a comparative study of the performance of the best-known state-of-the-art classification algorithms and the proposed model is presented. The experimental results demonstrate the benefits of the proposal in this research work, which were validated through statistical hypothesis tests to assess significant performance differences.</p></abstract>
<kwd-group>
<kwd>binary similarity</kwd>
<kwd>classification algorithms</kwd>
<kwd>machine learning</kwd>
<kwd>medicine dataset</kwd>
<kwd>pattern classification</kwd>
<kwd>pattern recognition</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement>
</funding-group>
<counts>
<fig-count count="3"/>
<table-count count="9"/>
<equation-count count="31"/>
<ref-count count="67"/>
<page-count count="16"/>
<word-count count="11098"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Machine Learning and Artificial Intelligence</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>This study proposes a novel algorithm for pattern classification. The proposed algorithm is primarily based on binary string similarity and is called the N-Similarity Binary Classifier (n-SBC), as it uses the Hamming string similarity method and a binary-value encoder called the reflected binary code (RBC) or Gray code. The creation, design, implementation, and application of n-SBC support the solution-finding process for the problem represented by the supervised case in pattern classification.</p>
<p>Humans can recognize objects, actions, and everyday elements (patterns); however, what is simple for humans can be a very complex problem for a computational algorithm. The discipline that includes in its field of study the modeling and programming of automatic object and action recognition tasks is Pattern Recognition (PR) (<xref ref-type="bibr" rid="B51">Sarker, 2021</xref>). There are four basic tasks of PR: classification, regression, recovery, and clustering (<xref ref-type="bibr" rid="B15">De Sa, 2012</xref>; <xref ref-type="bibr" rid="B48">Rane et al., 2024</xref>). The first three are located in the supervised learning paradigm, while the last one is the emblematic task of the unsupervised learning paradigm (<xref ref-type="bibr" rid="B33">Janani and Vijayarani, 2019</xref>). In this study, emphasis will be placed on the first task of the supervised learning paradigm: intelligent pattern classification, or machine learning for pattern classification. In the state of the art, a wide variety of conceptual bases provide theoretical support for the task, such as Bayesian classifiers and distance-based models like kNN. Based on decision trees (C4.5 or Random Forest), based on and inspired by the neurons of the human brain (Multilayer Perceptron), or based on optimization of analytical functions, such as support vector machines (<xref ref-type="bibr" rid="B7">Bhargavi and Jyothi, 2009</xref>; <xref ref-type="bibr" rid="B14">Cover and Hart, 1967</xref>; <xref ref-type="bibr" rid="B39">McCulloch and Pitts, 1943</xref>; <xref ref-type="bibr" rid="B46">Quinlan, 1990</xref>; <xref ref-type="bibr" rid="B50">Rosenblatt, 1958</xref>; <xref ref-type="bibr" rid="B36">LeCun et al., 2015</xref>; <xref ref-type="bibr" rid="B12">Cortes, 1995</xref>). As important assistants in the development of PR and related disciplines, there are dataset repositories (<xref ref-type="bibr" rid="B19">Dua and Graff, 2019</xref>) and certain platforms where some research groups make available to users valuable computational tools, as well as implementations of algorithms and methods; such is the case of WEKA (<xref ref-type="bibr" rid="B27">Hall et al., 2009</xref>) and KEEL (<xref ref-type="bibr" rid="B7">Bhargavi and Jyothi, 2009</xref>; <xref ref-type="bibr" rid="B14">Cover and Hart, 1967</xref>; <xref ref-type="bibr" rid="B39">McCulloch and Pitts, 1943</xref>; <xref ref-type="bibr" rid="B46">Quinlan, 1990</xref>; <xref ref-type="bibr" rid="B50">Rosenblatt, 1958</xref>; <xref ref-type="bibr" rid="B36">LeCun et al., 2015</xref>; <xref ref-type="bibr" rid="B12">Cortes, 1995</xref>), two of the most useful, famous and popular platforms.</p>
<p>Before 1997, when the No Free Lunch Theorem (<xref ref-type="bibr" rid="B15">De Sa, 2012</xref>; <xref ref-type="bibr" rid="B20">Duda et al., 2001</xref>) was published, a large number of research groups were trying to find the best classifier; however, this theorem resulted in researchers concluding that this search is futile, since there is no intelligent pattern classifier that is the best in all cases. Therefore, researchers&#x00027; efforts are currently directed toward finding alternatives to improve the performance of pattern classifiers, recognizing that there is no best one. One of the main recent achievements has been the development of a new pattern classification paradigm, Minimalist Machine Learning (MML) (<xref ref-type="bibr" rid="B63">Y&#x000E1;&#x000F1;ez-M&#x000E1;rquez, 2020</xref>). It is in this context that the central proposition of this work arises.</p>
<p>Recently, significant efforts have been devoted to finding alternatives to improve the performance of intelligent pattern classifiers, recognizing that there is no single best approach. In these research processes, a wide variety of tools and methodologies developed over the decades are used. Thus, one recurring theme in the generated algorithms is the use of associative models. These associative models are not designed for intelligent pattern classification but rather for pattern retrieval; however, if the designer adequately represents the output patterns, they can perform the classification task correctly. The first associative model recorded is the Lernmatrix, created in 1961 by <xref ref-type="bibr" rid="B55">Steinbuch (1961)</xref>, followed by the associative model called the Correlograph, whose creation and publication occurred 8 years later (<xref ref-type="bibr" rid="B59">Willshaw et al., 1969</xref>). The year 1972 saw the birth of one of the best-known associative models: the Linear Associator, which emerged as the fusion of two independent models (<xref ref-type="bibr" rid="B34">Kohonen, 1972</xref>; <xref ref-type="bibr" rid="B6">Anderson, 1972</xref>); from then on, a considerable number of associative models have been generated in the world with successful applications in various areas of human activity (<xref ref-type="bibr" rid="B31">Hopfield, 1982</xref>; <xref ref-type="bibr" rid="B56">Talib, 2018</xref>; <xref ref-type="bibr" rid="B32">Ibrahim and Abdulazeez, 2021</xref>). It is pertinent to note that research on the subject is ongoing (<xref ref-type="bibr" rid="B29">Hoffmann, 2019</xref>; <xref ref-type="bibr" rid="B44">Nozari et al., 2024</xref>; <xref ref-type="bibr" rid="B67">Zhu et al., 2024</xref>; <xref ref-type="bibr" rid="B9">Bian and Priyadarshi, 2024</xref>).</p>
<p>Early detection of diseases has increased its relevance in recent years due to the various benefits that have a beneficial impact on public health, such as increasing the chances of survival in patients suffering from severe respiratory diseases (<xref ref-type="bibr" rid="B57">Vayadande, 2024</xref>; <xref ref-type="bibr" rid="B49">Rasool et al., 2023</xref>) and achieving a better recovery thanks to detection at an early stage of the disease. Research focused on pre-diagnosis of respiratory diseases has recently gained momentum worldwide, with widespread interest in improving early detection. Currently, both invasive and non-invasive methods are applied. However, lately, the use of machine learning classification algorithms for disease diagnosis has become an increasingly important area of research globally due to their ease of implementation and accessibility (<xref ref-type="bibr" rid="B47">Rana and Bhushan, 2023</xref>). This has caused frequent research in the literature on the development of novel specialized models for the medical pre-diagnosis of all types of diseases (<xref ref-type="bibr" rid="B35">Kumar et al., 2023</xref>; <xref ref-type="bibr" rid="B4">Ahsan et al., 2022</xref>).</p>
<p>In this paper, elements of associative models have been taken in order to create and design the main algorithm of the proposal, but in addition to these elements, the concept of string similarity has been used, as well as the Hamming distance and a binary pattern encoder, the reflected binary code (RBC or Gray code). The rest of this paper is organized as follows: Section 2 details the related works. Section 3 describes the novel proposal algorithm, with detailed examples of its operations in training and classification phases. In Section 4, the experimental phase and results are presented, and, finally, in Section 5, the conclusions and future research are included.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Related research</title>
<p>As discussed above, assuming the existence of a universally optimal pattern classification algorithm is unmotivated due to the no free lunch theorem, forcing researchers in machine learning-related areas to focus on improving the performance of existing models and thereby reducing classification error (<xref ref-type="bibr" rid="B11">Bui et al., 2020</xref>; <xref ref-type="bibr" rid="B54">Shehadeh et al., 2021</xref>; <xref ref-type="bibr" rid="B40">Misra and Yadav, 2020</xref>). Alternatively, some studies propose entirely novel machine learning models for pattern classification, with the aim of exploring new possibilities, as demonstrated by <xref ref-type="bibr" rid="B5">Amygdalos et al. (2023)</xref> and <xref ref-type="bibr" rid="B28">Hissou et al. (2023)</xref>. Similarly, researchers have pioneered the development of new algorithms based on associative memories, including those by <xref ref-type="bibr" rid="B41">Moreno-Ibarra et al. (2021)</xref>, (<xref ref-type="bibr" rid="B64">Yang and Ding 2020</xref>), and <xref ref-type="bibr" rid="B38">Luna-Ortiz et al. (2023)</xref>.</p>
<p>Section 2 is divided into three parts. Section 2.1 describes the Hamming Distance algorithm, a fundamental concept for our novel pattern classification algorithm. Section 2.2 explores the RBC (Reflected Binary Code), another crucial element of our novel method for converting the original dataset into binary strings. Finally, Section 2.3 provides an overview of the state-of-the-art machine learning algorithms, including both well-known models and associative memories used for classification tasks, as well as a deep dive into current research on Hamming Distance and RBC in machine learning.</p>
<sec>
<label>2.1</label>
<title>Hamming distance</title>
<p>The Hamming distance, the most used metric with binary strings and a natural similarity measure on binary codes, can be computed with just a few machine instructions per comparison (<xref ref-type="bibr" rid="B45">Pappalardo et al., 2009</xref>). The computational effort required to calculate the Hamming distance linearly depends on the size of the string, and it is often used to quantify the extent to which two bit-strings of the same dimension differ (<xref ref-type="bibr" rid="B43">Norouzi et al., 2012</xref>; <xref ref-type="bibr" rid="B10">Bookstein et al., 2002</xref>).</p>
<p>The distance is defined as the minimum number of errors that could transform a pattern <italic>A</italic> into a pattern <italic>B</italic>, i.e., it measures the minimum number of values that must be changed to transform a string into another target string (<xref ref-type="bibr" rid="B65">Zhang et al., 2013</xref>).</p>
<p>Another way to define it could be the number of positions at which the corresponding bits are different, that is, express it as the following (<xref ref-type="bibr" rid="B22">Gaitanis et al., 1993</xref>):</p>
<disp-formula id="EQ1"><mml:math id="M1"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>D</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>A</mml:mi><mml:mo>,</mml:mo><mml:mi>B</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>B</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:mo>,</mml:mo><mml:mtext>&#x000A0;&#x000A0;&#x000A0;</mml:mtext><mml:msub><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>B</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mtext>&#x000A0;</mml:mtext><mml:mo>&#x02208;</mml:mo><mml:mrow><mml:mo stretchy="false">{</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">}</mml:mo></mml:mrow><mml:mtext>&#x000A0;</mml:mtext><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(1)</label></disp-formula>
<p>where <italic>A</italic><sub><italic>i</italic></sub> and <italic>B</italic><sub><italic>i</italic></sub> are the bits at the <italic>i</italic>-th position of the respective strings. And the subtraction refers to the XOR logic gate operation. The use of the Hamming distance has many applications, the most relevant being in coding theory, the electronics field, and term clustering (<xref ref-type="bibr" rid="B43">Norouzi et al., 2012</xref>). It has been shown that one can perform exact nearest-neighbor search in Hamming space significantly faster than linear search, achieving sublinear run times.</p>
</sec>
<sec>
<label>2.2</label>
<title>Reflected Binary Code (RBC)</title>
<p>The Gray encoder, also known as Reflected Binary Code (RBC), was invented by Frank Gray in 1953 in a Bell Telephone Laboratories patent (<xref ref-type="bibr" rid="B3">Agrell et al., 2004</xref>; <xref ref-type="bibr" rid="B18">Doran, 2007</xref>; <xref ref-type="bibr" rid="B26">Goodall, 1951</xref>). It is a binary numbering system in which the main property is that two adjacent values differ by only a single digit. For example, value 2 differs from values 1 and 3 in RBC by a single digit. <xref ref-type="table" rid="T1">Table 1</xref> is an illustrative example.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Example of the single distance of the Gray binary code (RBC).</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Decimal</bold></th>
<th valign="top" align="center"><bold>Binary code</bold></th>
<th valign="top" align="center"><bold>RBC (gray code)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="center">001</td>
<td valign="top" align="center">001</td>
</tr>
<tr>
<td valign="top" align="left">2</td>
<td valign="top" align="center">010</td>
<td valign="top" align="center">011</td>
</tr>
<tr>
<td valign="top" align="left">3</td>
<td valign="top" align="center">011</td>
<td valign="top" align="center">010</td>
</tr>
<tr>
<td valign="top" align="left">4</td>
<td valign="top" align="center">100</td>
<td valign="top" align="center">110</td>
</tr>
<tr>
<td valign="top" align="left">5</td>
<td valign="top" align="center">101</td>
<td valign="top" align="center">111</td>
</tr></tbody>
</table>
</table-wrap>
<p>In this case, unlike the classic binary encoder, the bit difference between an adjacent decimal value is only one digit. In this sense, this advantage helps preserve similarity between neighboring patterns, unlike standard binary encoding, which can cause adjacent values to differ across multiple bits, creating more complex relationships between close patterns. Therefore, this helps and supports the performance of our proposed classifier, as explained in Section 3, since it is based on string binary simultaneities.</p>
<p>This system binary code is commonly used to refer to any single distance. Its unique characteristics make it very useful across different domains, especially for error correction, position encoders, genetic algorithms, and digital communication (<xref ref-type="bibr" rid="B3">Agrell et al., 2004</xref>; <xref ref-type="bibr" rid="B8">Bhat and Savage, 1996</xref>).</p>
<p>To obtain a binary string using RBC, it can be done as follows: First, convert the decimal value to classic binary code, and subsequently convert from binary code to RBC, applying XOR (exclusive OR) to each bit with the right bit, excluding the most significant bit. For example, let us say we want to convert the number 5 into RBC. First, the binary value of 5 is 101, and the MSB in this case is 1. Now, applying the XOR operation, starting from the right to the left but going on the right side, taking the second bit (0) and applying XOR with the first bit (1), the result is 1; then taking the third bit (1) and applying XOR with the second bit (0), the result is 1; and finally, concatenating the MSB as the first bit of the resulting string after the XOR operations; therefore, the RBC of the number 5 is 111 (<xref ref-type="bibr" rid="B8">Bhat and Savage, 1996</xref>).</p>
<p>Before converting decimal values to binary strings using the RBC method, the dataset values are preprocessed: the minimum value per feature is computed; if required, the decimals are truncated to 2 decimal places; and finally, the values are rounded to integers. This aims to obtain only positive integer values.</p>
<p>To illustrate the conversion to integer values and truncation, the following example is provided. Consider a continuous numeric feature:</p>
<disp-formula id="EQ2"><mml:math id="M2"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mrow><mml:mo stretchy="false">{</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>.</mml:mo><mml:mn>131</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mo>-</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>010</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>1</mml:mn><mml:mo>.</mml:mo><mml:mn>351</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mo>-</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>110</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>660</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>1</mml:mn><mml:mo>.</mml:mo><mml:mn>411</mml:mn></mml:mrow><mml:mo stretchy="false">}</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(2)</label></disp-formula>
<p>In this case, to obtain only positive numbers, the minimum value is the sum of all the values of the feature array, which in this case is &#x02212;0.11, obtaining the following result:</p>
<disp-formula id="EQ3"><mml:math id="M3"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mrow><mml:mo stretchy="false">{</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>.</mml:mo><mml:mn>241</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>100</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>1</mml:mn><mml:mo>.</mml:mo><mml:mn>461</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>000</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>770</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>1</mml:mn><mml:mo>.</mml:mo><mml:mn>521</mml:mn></mml:mrow><mml:mo stretchy="false">}</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(3)</label></disp-formula>
<p>Then, it is truncated to two decimals only:</p>
<disp-formula id="EQ4"><mml:math id="M4"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mrow><mml:mo stretchy="false">{</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>.</mml:mo><mml:mn>24</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>10</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>1</mml:mn><mml:mo>.</mml:mo><mml:mn>46</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>00</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>77</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>1</mml:mn><mml:mo>.</mml:mo><mml:mn>52</mml:mn></mml:mrow><mml:mo stretchy="false">}</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(4)</label></disp-formula>
<p>Subsequently, all the values of the feature are escalated to integer values, such as</p>
<disp-formula id="EQ5"><mml:math id="M5"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mrow><mml:mo stretchy="false">{</mml:mo><mml:mrow><mml:mn>124</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>10</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>146</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>77</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>152</mml:mn></mml:mrow><mml:mo stretchy="false">}</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(5)</label></disp-formula>
<p>Finally, using these feature values, the RBC binary string is computed. <xref ref-type="table" rid="T2">Table 2</xref> shows an example of how the binary codes look after RBC encoding.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Illustration of RBC after preprocessing.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Decimal</bold></th>
<th valign="top" align="center"><bold>Binary code</bold></th>
<th valign="top" align="center"><bold>RBC (gray code)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">124</td>
<td valign="top" align="center">01111100</td>
<td valign="top" align="center">01000010</td>
</tr>
<tr>
<td valign="top" align="left">20</td>
<td valign="top" align="center">00011000</td>
<td valign="top" align="center">00010100</td>
</tr>
<tr>
<td valign="top" align="left">146</td>
<td valign="top" align="center">10010010</td>
<td valign="top" align="center">11011011</td>
</tr>
<tr>
<td valign="top" align="left">0</td>
<td valign="top" align="center">00000000</td>
<td valign="top" align="center">00000000</td>
</tr>
<tr>
<td valign="top" align="left">77</td>
<td valign="top" align="center">01001101</td>
<td valign="top" align="center">01101011</td>
</tr>
<tr>
<td valign="top" align="left">152</td>
<td valign="top" align="center">10011000</td>
<td valign="top" align="center">11010100</td>
</tr></tbody>
</table>
</table-wrap>
</sec>
<sec>
<label>2.3</label>
<title>Pattern classification algorithms</title>
<p>In the current state of the art, many machine learning algorithms focus on classification tasks. Some of them are based on distance, such as the kNN (k-nearest neighbors) model (<xref ref-type="bibr" rid="B66">Zhang, 2021</xref>), while others are based on optimization, such as SVM (support vector machines) (<xref ref-type="bibr" rid="B1">Abdullah and Abdulazeez, 2021</xref>). Others are based on decision trees (<xref ref-type="bibr" rid="B13">Costa and Pedreira, 2023</xref>), such as C4.5, or bagging approaches such as the random forest algorithm. In more recent literature, models are inspired by biological concepts, such as the human brain. For instance, the multilayer perceptron (an artificial neural network) falls into this category. Currently, the most widely used are deep learning models (<xref ref-type="bibr" rid="B53">Sharifani and Amini, 2023</xref>), which are neural networks with many layers and additional specialized preprocessing stages, such as CNNs (convolutional neural networks) for image processing and transformers and embedding approaches for natural language processing tasks (<xref ref-type="bibr" rid="B23">Galli et al., 2024</xref>).</p>
<sec>
<label>2.3.1</label>
<title>Associative memories</title>
<p>An associative memory <bold><italic>M</italic> </bold>is a pattern-input/output system whose primary purpose is to learn to correctly retrieve complete patterns from inputs that may be corrupted by several sources of noise. Can be expressed as <italic>x</italic> &#x02192; <bold><italic>M</italic></bold> &#x02192; <italic>y</italic>. The input and output patterns are represented by the column vectors <italic>x</italic> and <italic>y</italic>, respectively. Each input is associated with a corresponding output pattern; such an association is expressed as (<italic>x, y</italic>). Memory <bold><italic>M</italic> </bold>is represented by a matrix. This matrix is formed from a finite set of previously known associations, known as the fundamental set (considered as the learning stage).</p>
<p>Finally, the retrieval process (which could be known as the classification stage if the designer made adequate changes) consists of performing the memory <italic>M</italic> with the given steps for that phase, with the aim of finding enough conditions to obtain the fundamental output pattern <italic>y</italic> from the pattern <italic>x</italic> (<xref ref-type="bibr" rid="B44">Nozari et al., 2024</xref>).</p>
<p>In the state of the art, there are pioneers of associative memory with their original purpose as retrieval machines, such as Steinbuch&#x00027;s Lernmatrix and Linear Associator (<xref ref-type="bibr" rid="B55">Steinbuch, 1961</xref>; <xref ref-type="bibr" rid="B44">Nozari et al., 2024</xref>). In these models, learning is typically implemented by updating a memory matrix with a set of rules. For example, in a Lernmatrix, each association (<italic>x, y</italic>) contributes an update of the form <italic>M</italic> &#x02190; <italic>M</italic> &#x0002B; <italic>yx</italic>. During inference, the unknown pattern <italic>x</italic> is projected through the learned memory, <italic>y</italic> &#x0003D; <italic>Mx</italic>, and a non-linear function (e.g., thresholding) produces the retrieved output pattern. If class labels are encoded as output vectors (e.g., one-hot) and the decision is taken from <italic>y</italic> by a winner-take-all rule, the same associative mechanism can be used as a classifier. This idea is exploited in modern associative classifiers (<xref ref-type="bibr" rid="B58">Velazquez-Rodriguez et al., 2020</xref>), which extend the classical Lernmatrix with a novel mathematical transformation that makes the matrix updates and recall rule suitable for supervised pattern classification rather than only for pattern completion.</p>
<p>Associative memories are relevant because the proposed n-SBC classifier was inspired by them. In n-SBC, training patterns are stored as rows in a kind of memory matrix. Then, given a test or unknown pattern, similarity is computed as the bitwise overlap under bipolar coding, which amounts to an affine transformation of an inner product, unlike classic associative memories that learn from projections. Thus, our novel model is conceptually linked to associative memories but implements a different representation (RBC codes) and a Hamming-based decision rule tailored to pattern classification.</p>
</sec>
<sec>
<label>2.3.2</label>
<title>Hamming distance and RCB in pattern classification</title>
<p>After an extensive documentary search, it was found that, throughout history, there have been very few attempts to create intelligent pattern classification algorithms based on the Hamming distance. Regarding the RBC code, no impactful work has been found; therefore, this proposal uses the Hamming distance and the RBC code simultaneously within the same pattern classifier algorithm. At the same time, the proposal&#x00027;s novelty and originality are ensured. The closest is a work using RBC codes, published in 2017 (<xref ref-type="bibr" rid="B52">&#x00160;arkovskis et al., 2017</xref>), the authors &#x00160;arkovskis, Jer&#x00161;ovs, Kolosovs and Grabs describe the functionality of a real-time classifier useful for the computation of statistical parameters of data streams, the detection of symbols of different modulation types and other applications where the fastest possible association of a sample of input signals with one of the predefined categories is required.</p>
<p>While explicit RBC and Hamming classifiers are rare in the literature, some frameworks and methodologies encode data into binary codes and perform comparisons, mainly in hashing and ECOC-style multiclass reduction. In the ECOC (error-correcting output codes) methodology, each class is assigned a binary codeword, and a bank of binary base learners (e.g., C4.5, SVM) is trained, one per code column, as described by <xref ref-type="bibr" rid="B17">Dietterich and Bakiri (1994)</xref>. Then, to perform classification, the column outputs are concatenated, and the label is chosen as the nearest class codeword, typically via the Hamming distance. Thus, ECOC is an ensemble framework, not a classifier <italic>per se</italic>: it improves the underlying learners but does not replace them (<xref ref-type="bibr" rid="B17">Dietterich and Bakiri, 1994</xref>).</p>
<p>On the other hand, in learning-to-hash or supervised hashing, features are transformed into compact binary codes by a learned encoder, and classification is commonly implemented via Hamming space (e.g., <italic>k-</italic>NN or ranking codes); this establishes that distance is an effective similarity for large-scale prediction when inputs are binary encoded (<xref ref-type="bibr" rid="B43">Norouzi et al., 2012</xref>). Thus, supervised hashing is not a classifier <italic>per se</italic> but a representation-learning method whose downstream machine-learning models operate on the learned bits. This approach offers fast lookups once trained, but it introduces training complexity, and results depend on the codebook.</p>
<p>Our contribution to n-SBC differs from these strands in two quantitative ways. First, unlike supervised hashing, which learns codebooks and then delegates prediction to a classic classifier (e.g., <italic>k-</italic>NN), n-SBC uses a deterministic RBC mapping per feature and classifies by Hamming distance, removing the encoder learning stage while preserving fast bit-wise comparisons. In supervised hashing, performance depends on the learned encoder; in n-SBC, performance hinges on the RBC representation and Hamming aggregation. Second, whereas ECOC emphasizes maximizing inter-class Hamming separations between class codewords and requires training a bank of binary base learners (e.g., C4.5, SVM), thus acting as a framework rather than a classifier <italic>per se</italic>, n-SBC treats the entire RBC binary string instance as the object of comparison, performing instance Hamming matching rather than decoding to a fixed class codeword. In short, both models are enabling methods that rely on baseline models (C4.5, SVM, <italic>k-</italic>NN, etc.), whereas n-SBC is the classifier itself. Together with our operational unification of RBC and Hamming, these distinctions place n-SBC at a different point in the design space (<xref ref-type="bibr" rid="B62">Xiao et al., 2022</xref>). A summary of the main differences is shown in <xref ref-type="table" rid="T3">Table 3</xref>.</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Comparison between n-SBC and related research.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Method</bold></th>
<th valign="top" align="left"><bold>Representation</bold></th>
<th valign="top" align="left"><bold>Decision rule</bold></th>
<th valign="top" align="left"><bold>Distance metric</bold></th>
<th valign="top" align="left"><bold>Explicability</bold></th>
<th valign="top" align="left"><bold>Key distinction</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">ECOC</td>
<td valign="top" align="left">Class codewords (binary) &#x0002B; any base classifier model</td>
<td valign="top" align="left">Nearest class codeword from trained base models</td>
<td valign="top" align="left">Hamming</td>
<td valign="top" align="left">Bits reflect learned columns, not original features</td>
<td valign="top" align="left">Framework to improve baseline classifiers with Hamming and encoders</td>
</tr>
<tr>
<td valign="top" align="left">Supervised hashing</td>
<td valign="top" align="left">Learned binary codes</td>
<td valign="top" align="left">k-NN, ranking in Hamming space</td>
<td valign="top" align="left">Hamming</td>
<td valign="top" align="left">Hash bits are opaque codes, so each feature is not accessible</td>
<td valign="top" align="left">Methodology to improve baseline classifiers with Hamming and encoders</td>
</tr>
<tr>
<td valign="top" align="left">Associative Memories</td>
<td valign="top" align="left">Learned binary codes</td>
<td valign="top" align="left">Linear projection</td>
<td valign="top" align="left">None</td>
<td valign="top" align="left">Shows which stored patterns are recalled but not feature-influenced</td>
<td valign="top" align="left">Classical associative retrieval</td>
</tr>
<tr>
<td valign="top" align="left">k-NN</td>
<td valign="top" align="left">Raw or normalized real values</td>
<td valign="top" align="left">k-NN in real space</td>
<td valign="top" align="left">Euclidean, Chebyshev, Manhattan, Minkowski</td>
<td valign="top" align="left">Show neighbors who influenced the decision</td>
<td valign="top" align="left">A classifier supporting different distance metrics</td>
</tr>
<tr>
<td valign="top" align="left">n-SBC</td>
<td valign="top" align="left">Deterministic RBC per feature, then full binary string</td>
<td valign="top" align="left">Instance-level Hamming over RBC encode</td>
<td valign="top" align="left">Hamming</td>
<td valign="top" align="left">Bit maps to feature segments and show similar patterns influenced by the decision</td>
<td valign="top" align="left">Classifier model using simple RBC and Hamming</td>
</tr></tbody>
</table>
</table-wrap>
<p>To make these differences concrete, a compact comparison table covering code construction and decision rule, along with a small ablation replacing RBC with a fixed-width standard binary encoding to isolate RBC&#x00027;s contribution. As discussed, RBC improves n-SBC because adjacent codes differ by only one bit, preserving similarity between neighboring values; in contrast, standard binary encodings may flip multiple bits between consecutive values, distorting local neighborhoods and weakening bit-wise interpretability.</p>
<p>Therefore, unlike ECOC and supervised hashing, and unlike classical <italic>k-</italic>NN in feature space, n-SBC is an associative classifier whose decision rule operates directly in Hamming space.</p>
</sec>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Our proposal model</title>
<p>In this section, the main idea of the N-Similarity Binary Classifier (n-SBC) algorithm is explained, along with its operation; the learning phase and, finally, the classification phase of the proposed algorithm are addressed. The proposed algorithm is primarily based on the Hamming string similarity method and the reflected binary code (RBC) encoder, also known as the Gray Code, both of which are fundamental components of the model. The purpose of this study is to improve the performance of associative approach classifiers across several medical datasets to enhance disease detection.</p>
<p>To address the issue of missing values and categorical data, our proposed method requires preprocessing the dataset to address this complexity beforehand. To handle missing values, the classic imputation method was applied, replacing missing values with the mean for numerical data and the mode for categorical data. This resulted in datasets without missing values when present. Finally, the categorical variables were converted using the classic label encoding method, which assigns each category a unique numeric value.</p>
<p>Then, the RBC method is applied to the entire dataset. In this case, every feature of the input patterns <italic>x</italic><sup>&#x003BC;</sup> is converted to their equivalent binary RBC code to obtain a p-dimensional binary string, where p represents the maximum length of the largest converted value, denoted as <inline-formula><mml:math id="M6"><mml:msubsup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003BC;</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mi>R</mml:mi><mml:mi>B</mml:mi><mml:mi>C</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003BC;</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>.</p>
<p>In order to obtain a single binary string, we concatenate each transformed feature together, expressed as follows:</p>
<disp-formula id="EQ6"><mml:math id="M7"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003BC;</mml:mi></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>&#x003BC;</mml:mi></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mi>&#x003BC;</mml:mi></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mo>&#x02026;</mml:mo><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003BC;</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(6)</label></disp-formula>
<p>Let us assume that there is a dataset <italic>D</italic>, divided into two subsets: <italic>L</italic> and <italic>T</italic>, for learning and testing, respectively.</p>
<sec>
<label>3.1</label>
<title>Learning phase for the proposed approach</title>
<p>The learning phase of the n-SBC model has only one step. It consists of creating a memory matrix, denoted by <italic>M</italic>, which contains every transposed binary string pattern of the learning dataset <italic>L</italic>, generated previously by applying the RBC code to each pattern. Finally, on the matrix <italic>M</italic>, each element corresponds to the entire binary string representation of <italic>b</italic><sup>&#x003BC;</sup>, expressed as follows:</p>
<disp-formula id="EQ7"><mml:math id="M8"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mrow><mml:mi>M</mml:mi><mml:mo>=</mml:mo><mml:mo>{</mml:mo><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none none none none none none none none none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mo>&#x022EE;</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>L</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:mtd></mml:mtr></mml:mtable><mml:mo>}</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(7)</label></disp-formula>
<statement content-type="algorithm" id="algorithm_1">
<label>Algorithm 1</label>
<title>Training of n-SBC with RBC coding.</title>
<p>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-08-1610856-i0001.tif"/>
</p>
</statement>
</sec>
<sec>
<label>3.2</label>
<title>Classification phase for the proposed approach</title>
<p>The classification phase of the n-SBC model has four stages; the first is the calculation of the Hamming Distance between the unknown pattern <italic>x</italic><sup>&#x003C9;</sup> to each pattern of the dataset <italic>L</italic>. To calculate it, first let us assume that the unknown pattern has already undergone the RBC transformation, yielding <italic>b</italic><sup>&#x003C9;</sup>. Therefore, the Hamming distance, <italic>H</italic>(<italic>b</italic><sup>&#x003C9;</sup>, <italic>b</italic><sup>&#x003BC;</sup>), represents the number of positions at which the corresponding bits are different. The above is expressed as follows:</p>
<disp-formula id="EQ8"><mml:math id="M14"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>H</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003C9;</mml:mi></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003BC;</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>u</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mo stretchy="true">|</mml:mo><mml:msubsup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003C9;</mml:mi></mml:mrow></mml:msubsup><mml:mo>-</mml:mo><mml:msubsup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003BC;</mml:mi></mml:mrow></mml:msubsup><mml:mo stretchy="true">|</mml:mo><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(8)</label></disp-formula>
<p>where <italic>u</italic> is the dimensionality of the patterns. <inline-formula><mml:math id="M15"><mml:msubsup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003C9;</mml:mi></mml:mrow></mml:msubsup><mml:mtext>&#x000A0;</mml:mtext><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>d</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:msubsup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003BC;</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> represent the <italic>j</italic>-th elements of the pattern <italic>b</italic><sup>&#x003C9;</sup> and the training dataset pattern <italic>b</italic><sup>&#x003BC;</sup>.</p>
<p>This first step resulted in a vector distance, denoted as <italic>Z</italic>, which contains the result of the subtraction of the cardinality per dataset, denoted as <italic>u</italic>, with the computed Hamming distance of each pattern for the dataset <italic>L</italic> to the interested pattern <italic>b</italic><sup>&#x003C9;</sup>. The dimension of <italic>Z</italic> is equivalent to the cardinality of the dataset <italic>L</italic>, we can represent it with the following expression:</p>
<disp-formula id="EQ9"><mml:math id="M16"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mrow><mml:msup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003C9;</mml:mi></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mo>(</mml:mo><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none none none none none none none none none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mi>u</mml:mi><mml:mo>-</mml:mo><mml:mi>H</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003C9;</mml:mi></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mi>u</mml:mi><mml:mo>-</mml:mo><mml:mi>H</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003C9;</mml:mi></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mo>&#x022EE;</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mi>u</mml:mi><mml:mo>-</mml:mo><mml:mi>H</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003C9;</mml:mi></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003BC;</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(9)</label></disp-formula>
<p>The second stage of the classification phase consists of handling the generated vector <italic>Z</italic><sup>&#x003C9;</sup> to determine the class.</p>
<p>First, let <italic>C</italic> be the set of all the classes, such that: <italic>C</italic> &#x0003D; {<italic>k</italic><sub>1</sub>, <italic>k</italic><sub>2</sub>, &#x02026;, <italic>k</italic><sub><italic>c</italic></sub>}, where <italic>c</italic> is the number of classes. Then, let us introduce <italic>K</italic><sub><italic>i</italic></sub> to denote the number of patterns present within the <italic>i</italic>-th class, expressed as <italic>K</italic><sub><italic>i</italic></sub> &#x0003D; |<italic>k</italic><sub><italic>i</italic></sub>|, &#x02200;<italic>i</italic> &#x02208; {1, &#x02026;, <italic>c</italic>}. Now, we determine the smallest pattern count across all the classes, termed <italic>K</italic><sub>min</sub>, such as follows:</p>
<disp-formula id="EQ10"><mml:math id="M17"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mi>K</mml:mi><mml:mrow><mml:mi>min</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo stretchy='false'>(</mml:mo><mml:msub><mml:mi>K</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(10)</label></disp-formula>
<p>Subsequently, for any integer <italic>n</italic> satisfying 1 &#x02264; <italic>n</italic> &#x02264; <italic>K</italic><sub>min</sub>, we extract the <italic>n</italic>-th largest component from the vector <italic>Z</italic><sup>&#x003C9;</sup> of each class, represented as <inline-formula><mml:math id="M18"><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula>. The hyperparameter <italic>n</italic> controls how many of the largest components are aggregated, and different values of <italic>n</italic> correspond to different versions of the classifier. Finally, a vector <italic>y</italic><sup><italic>n</italic></sup> is created by applying a sum to the selected <italic>n</italic>-th largest components, therefore, <italic>y</italic><sup><italic>n</italic></sup> is calculated by the following expression:</p>
<disp-formula id="EQ11"><mml:math id="M19"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mrow><mml:msup><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mo>(</mml:mo><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none none none none none none none none none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow></mml:msubsup></mml:mstyle><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msubsup></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow></mml:msubsup></mml:mstyle><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mo>&#x022EE;</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow></mml:msubsup></mml:mstyle><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msubsup></mml:mtd></mml:mtr></mml:mtable><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(11)</label></disp-formula>
<p>where in this case <italic>s</italic> represents the number of samples of each <italic>i</italic>-class in the dataset. The third step consists of assigning to the unknown pattern <italic>x</italic><sup>&#x003C9;</sup> his corresponding class <italic>y</italic><sup>&#x003C9;</sup>. For that, we update the vector <italic>y</italic><sup><italic>n</italic></sup> with the following rule:</p>
<disp-formula id="EQ12"><mml:math id="M20"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003C9;</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true">{</mml:mo><mml:mrow><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none none none none none none none none none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mn>1</mml:mn><mml:mtext>&#x000A0;</mml:mtext><mml:mi>i</mml:mi><mml:mi>f</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:msubsup><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msubsup><mml:mo>&#x02265;</mml:mo><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x02228;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msubsup></mml:mstyle><mml:msubsup><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msubsup></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn><mml:mtext>&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;</mml:mtext><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>h</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>w</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(12)</label></disp-formula>
<p>Finally, the fourth stage consists of calculating the predicted class of the unknown pattern <italic>x</italic><sup>&#x003C9;</sup> using the one-hot vector created in stage three. Therefore, the class is assigned based on the position of the hot value, which indicates the predicted class, because each row of the vector corresponds to a class in the dataset. Meeting the following expression <inline-formula><mml:math id="M21"><mml:msup><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003C9;</mml:mi></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:munderover><mml:msup><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msup><mml:msubsup><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003C9;</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula>.</p>
<p>One of the advantages of the n-SBC is that it aggregates only the top similar <italic>n</italic> components per class, so additional majority of samples do not grow a class&#x00027;s evidence unboundedly. Besides, the RBC encoding preserves similarity structure (adjacent numeric values differ by one bit), so compact minority clusters remain coherent in Hamming space and can dominate the selected top <italic>n</italic> samples. Consequently, decisions are driven by local match quality rather than by class prevalence, thereby mitigating the typical bias toward the majority class. This can enhance model performance with imbalanced complexity data.</p>
<p>Regarding the scope of applicability, n-SBC tends to perform well when classes exhibit locally coherent neighborhoods in feature space and when similarity is meaningfully captured by RBC. It may underperform when features are highly non-monotonic or noisy, when classes strongly overlap, or when <italic>B</italic> is inflated by many irrelevant bits.</p>
<statement content-type="algorithm" id="algorithm_2">
<label>Algorithm 2</label>
<title>Classification of n-SBC.</title>
<p>
<preformat>
<bold>Input:</bold> Query <italic>x</italic><sup>&#x003C9;</sup>; memory <italic>M</italic>; encoding parameters (<italic>Q</italic>, {<italic>b</italic><sub><italic>i</italic></sub>}, {<italic>G</italic><sub><italic>k</italic></sub>}); top-<italic>n</italic> policy (global <italic>n</italic>or per-class {<italic>n</italic><sub><italic>c</italic></sub>}).
<bold>Output:</bold> Predicted class <italic>y</italic>.
1 : <italic>b</italic><sup>&#x003C9;</sup> &#x02190; <italic>encode x</italic><sup>&#x003C9;</sup>
2 : <bold>For</bold> <italic>each class c in</italic> {1..<italic>C</italic>} :
3 : <bold>For</bold> <italic>k</italic> &#x0003D; 1..<italic>K</italic> :
4 : <italic>s</italic><sub><italic>c</italic></sub>, <italic>k</italic> &#x02190; 0
5 : <bold>For</bold> <italic>each &#x003BC; in L</italic><sub><italic>c</italic></sub> :
6 : <bold>For</bold> <italic>k</italic> &#x0003D; 1..<italic>K</italic> :
7 : <italic>u</italic> &#x02190; <italic>bits of b</italic><sup>&#x003C9;</sup> <italic>in G<sub>k</sub></italic>
8 : <italic>v</italic> &#x02190; <italic>bits of b&#x002C6;&#x003BC; in G</italic><sub><italic>k</italic></sub>
9 : <italic>s</italic><sub><italic>c</italic></sub>, <italic>k</italic> &#x02190; <italic>s</italic><sub><italic>c</italic></sub>, <italic>k</italic> &#x0002B; ( |<italic>G</italic><sub><italic>k</italic></sub>| &#x02212; <italic>Haming</italic> (<italic>u, v</italic>) )
10 : <italic>T</italic><sub><italic>c</italic></sub> &#x02190; <italic>indices of the n</italic> (<italic>or n</italic><sub><italic>c</italic></sub>) <italic>largest values in</italic> {<italic>s</italic><sub><italic>c</italic></sub>, 1.. <italic>s</italic><sub><italic>c</italic></sub>, <italic>K</italic>}
11 : <italic>S</italic><sub><italic>c</italic></sub> &#x02190; <italic>sum</italic> &#x0003C; <italic>uscore</italic> &#x0003E; {<italic>k</italic> &#x02208; <italic>T</italic><sub><italic>c</italic></sub>} <italic>s</italic><sub><italic>c</italic></sub>, <italic>k</italic>
12 : <italic>y</italic> &#x02190; <italic>argmax</italic><sub><italic>c</italic></sub> <italic>S</italic><sub><italic>c</italic></sub>
13 : <bold>Return</bold> <italic>y, along with</italic> {<italic>T</italic><sub><italic>c</italic></sub>} <italic>for explanation</italic>
</preformat>
</p>
</statement>
</sec>
<sec>
<label>3.3</label>
<title>Example of the train and classification phase for n-SBC</title>
<p>Below, a simplified example of the operation process of the learning and classification phases of our proposed classifier, the n-SBC model, is presented in detail. The patterns used for this practical example are detailed, where <italic>x</italic><sup>1</sup> and <italic>x</italic><sup>2</sup> belong to class <italic>A</italic>, while patterns <italic>x</italic><sup>3</sup>, <italic>x</italic><sup>4</sup> and <italic>x</italic><sup>5</sup> belong to class <italic>B</italic>.</p>
<disp-formula id="EQ13"><mml:math id="M23"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>28</mml:mn></mml:mrow><mml:mrow><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>17</mml:mn></mml:mrow></mml:mfrac></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:msup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>21</mml:mn></mml:mrow><mml:mrow><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>09</mml:mn></mml:mrow></mml:mfrac></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mtext>&#x000A0;</mml:mtext><mml:msup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>06</mml:mn></mml:mrow><mml:mrow><mml:mo>-</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>15</mml:mn></mml:mrow></mml:mfrac></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:msup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>4</mml:mn></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mo>-</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>24</mml:mn></mml:mrow><mml:mrow><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>01</mml:mn></mml:mrow></mml:mfrac></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>;</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x000A0;</mml:mtext><mml:msup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>5</mml:mn></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>07</mml:mn></mml:mrow><mml:mrow><mml:mo>-</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>28</mml:mn></mml:mrow></mml:mfrac></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(13)</label></disp-formula>
<p>After applying the reflected binary code (RBC) and in order to maintain a column vector when concatenating the binary strings obtained, the following patterns result:</p>
<disp-formula id="EQ14"><mml:math id="M25"><mml:mtable columnalign="right"><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mi>R</mml:mi><mml:mi>B</mml:mi><mml:mi>C</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none none none none none none none none none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mn>1</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>1</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>1</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>1</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>1</mml:mn></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mi>R</mml:mi><mml:mi>B</mml:mi><mml:mi>C</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none none none none none none none none none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mn>1</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>1</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>1</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>1</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>1</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>;</mml:mo><mml:mtext>&#x000A0;</mml:mtext></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mi>R</mml:mi><mml:mi>B</mml:mi><mml:mi>C</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none none none none none none none none none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>1</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mrow><mml:mtable columnalign="left"><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mn>0</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>1</mml:mn></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:mfrac></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>;</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>4</mml:mn></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mi>R</mml:mi><mml:mi>B</mml:mi><mml:mi>C</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>4</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none none none none none none none none none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>1</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>5</mml:mn></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mi>R</mml:mi><mml:mi>B</mml:mi><mml:mi>C</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>5</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none none none none none none none none none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mn>1</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>1</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(14)</label></disp-formula>
<p>Following the <xref ref-type="disp-formula" rid="EQ2">Equation 2</xref>, the matrix <italic>M</italic> is created, which contains every transposed binary string representation pattern that will be handled in the classification phase, in this case, is expressed as follows:</p>
<disp-formula id="EQ15"><mml:math id="M27"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>M</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none none none none none none none none none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>4</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>5</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(15)</label></disp-formula>
<p>At this point, the learning phase is complete. We have all the binary strings from the learning dataset ready to manipulate and proceed with the following steps for inference. Then, for the classification phase, the vector <italic>Z</italic><sup>&#x003C9;</sup> is created based on the Hamming distances of each pattern and the unknown pattern <italic>x</italic><sup>&#x003C9;</sup>. However, before obtaining the distance matrix, we define <italic>x</italic><sup>&#x003C9;</sup> as follows:</p>
<disp-formula id="EQ16"><mml:math id="M28"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003C9;</mml:mi></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>16</mml:mn></mml:mrow><mml:mrow><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>05</mml:mn></mml:mrow></mml:mfrac></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>&#x02192;</mml:mo><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003C9;</mml:mi></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none none none none none none none none none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mn>1</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>1</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>1</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(16)</label></disp-formula>
<p>Therefore, the <italic>Z</italic><sup>&#x003C9;</sup> is denoted as follows. In this case, <italic>u</italic> &#x0003D; 6 because the dimensionality of each pattern is six.</p>
<disp-formula id="EQ17"><mml:math id="M29"><mml:mrow><mml:msup><mml:mi>Z</mml:mi><mml:mi>&#x003C9;</mml:mi></mml:msup><mml:mo>=</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mtable columnalign='left'><mml:mtr columnalign='left'><mml:mtd columnalign='left'><mml:mrow><mml:mi>u</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mi>H</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msup><mml:mi>b</mml:mi><mml:mi>&#x003C9;</mml:mi></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>b</mml:mi><mml:mn>1</mml:mn></mml:msup></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mn>6</mml:mn><mml:mo>&#x02212;</mml:mo><mml:mn>4</mml:mn></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign='left'><mml:mtd columnalign='left'><mml:mrow><mml:mi>u</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mi>H</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msup><mml:mi>b</mml:mi><mml:mi>&#x003C9;</mml:mi></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>b</mml:mi><mml:mn>2</mml:mn></mml:msup></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mn>6</mml:mn><mml:mo>&#x02212;</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign='left'><mml:mtd columnalign='left'><mml:mrow><mml:mi>u</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mi>H</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msup><mml:mi>b</mml:mi><mml:mi>&#x003C9;</mml:mi></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>b</mml:mi><mml:mn>3</mml:mn></mml:msup></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mn>6</mml:mn><mml:mo>&#x02212;</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign='left'><mml:mtd columnalign='left'><mml:mrow><mml:mi>u</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mi>H</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msup><mml:mi>b</mml:mi><mml:mi>&#x003C9;</mml:mi></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>b</mml:mi><mml:mn>4</mml:mn></mml:msup></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mn>6</mml:mn><mml:mo>&#x02212;</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign='left'><mml:mtd columnalign='left'><mml:mrow><mml:mi>u</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mi>H</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msup><mml:mi>b</mml:mi><mml:mi>&#x003C9;</mml:mi></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>b</mml:mi><mml:mn>5</mml:mn></mml:msup></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mn>6</mml:mn><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mtable columnalign='left'><mml:mtr columnalign='left'><mml:mtd columnalign='left'><mml:mn>2</mml:mn></mml:mtd></mml:mtr><mml:mtr columnalign='left'><mml:mtd columnalign='left'><mml:mn>4</mml:mn></mml:mtd></mml:mtr><mml:mtr columnalign='left'><mml:mtd columnalign='left'><mml:mn>3</mml:mn></mml:mtd></mml:mtr><mml:mtr columnalign='left'><mml:mtd columnalign='left'><mml:mn>4</mml:mn></mml:mtd></mml:mtr><mml:mtr columnalign='left'><mml:mtd columnalign='left'><mml:mn>5</mml:mn></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math><label>(17)</label></disp-formula>
<p>At this stage, we must define the value of <italic>n</italic>, which in this example we define as <italic>n</italic> &#x0003D; 2. Having established the necessary parameters, we instantiate the vector <italic>y</italic><sup><italic>n</italic></sup> following <xref ref-type="disp-formula" rid="EQ11">Equation 11</xref>.</p>
<disp-formula id="EQ18"><mml:math id="M30"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none none none none none none none none none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mn>4</mml:mn><mml:mtext>&#x000A0;</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>2</mml:mn><mml:mtext>&#x000A0;</mml:mtext><mml:mo>=</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>6</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>5</mml:mn><mml:mtext>&#x000A0;</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>4</mml:mn><mml:mtext>&#x000A0;</mml:mtext><mml:mo>=</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>9</mml:mn></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(18)</label></disp-formula>
<p>where the <italic>n</italic> largest components for each class are summed to create the column vector <italic>y</italic><sup><italic>n</italic></sup>. These components correspond to positions 1 and 2 in class A, and to positions 3, 4, and 5 in class B. Therefore, since the components with a higher <italic>Z</italic><sup>&#x003C9;</sup> vector, which means they have greater similarity to the unknown pattern, suggest that they are similar to those samples that belong to class B. This information can be used to clarify the model&#x00027;s explainability. Finally, based on the rule defined previously in <xref ref-type="disp-formula" rid="EQ12">Equation 12</xref>, we update the vector <italic>y</italic><sup><italic>n</italic></sup> obtaining <italic>y</italic><sup>&#x003C9;</sup>.</p>
<disp-formula id="EQ19"><mml:math id="M31"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003C9;</mml:mi></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none none none none none none none none none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>1</mml:mn></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(19)</label></disp-formula>
<p>In this example, due to the result of the one-hot encoding vector, we can see the value of 1 in the second position, indicating that the pattern <italic>x</italic><sup>&#x003C9;</sup> belongs to the second-class <bold>B</bold>.</p>
<p>To understand the explainability and the reason why the unknown pattern <italic>x</italic><sup>&#x003C9;</sup> was classified as class <bold>B</bold>. Considering the unknown pattern <italic>x</italic><sup>&#x003C9;</sup> after RBC conversion is: {110010}. <xref ref-type="table" rid="T4">Table 4</xref> illustrates the samples and features that influenced the decision of the n-SBC model.</p>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>Example of explicability of n-SBC.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Pattern number</bold></th>
<th valign="top" align="center"><bold>Class</bold></th>
<th valign="top" align="center"><bold>Patterns in Train b<sup>L</sup></bold></th>
<th valign="top" align="center"><bold>Hamming difference vector against b<sup>&#x003C9;</sup></bold></th>
<th valign="top" align="center"><bold>Value of Z<sup>&#x003C9;</sup></bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><italic>b</italic><sup>1</sup></td>
<td valign="top" align="center">A</td>
<td valign="top" align="center">101111</td>
<td valign="top" align="center">100010</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left"><italic>b</italic><sup>2</sup></td>
<td valign="top" align="center">A</td>
<td valign="top" align="center">111110</td>
<td valign="top" align="center">110011</td>
<td valign="top" align="center">4</td>
</tr>
<tr>
<td valign="top" align="left"><italic>b</italic><sup>3</sup></td>
<td valign="top" align="center">B</td>
<td valign="top" align="center">010001</td>
<td valign="top" align="center">011100</td>
<td valign="top" align="center">3</td>
</tr>
<tr>
<td valign="top" align="left"><italic>b</italic><sup>4</sup></td>
<td valign="top" align="center">B</td>
<td valign="top" align="center">000010</td>
<td valign="top" align="center">001111</td>
<td valign="top" align="center">4</td>
</tr>
<tr>
<td valign="top" align="left"><italic>b</italic><sup>5</sup></td>
<td valign="top" align="center">B</td>
<td valign="top" align="center">110000</td>
<td valign="top" align="center">111101</td>
<td valign="top" align="center">5</td>
</tr></tbody>
</table>
</table-wrap>
<p>Since <italic>n</italic> &#x0003D; 2, the two closest samples from each class are selected, which means that <italic>b</italic><sup>&#x003C9;</sup> is classified as class B because they are very similar to patterns <italic>b</italic><sup>4</sup>, and <italic>b</italic><sup>5</sup>, which belong to class B. Moreover, since in the string <italic>b</italic><sup>&#x003C9;</sup> and <italic>b</italic><sub><italic>i</italic></sub>, in this case, each feature of the dataset is represented by 3 bits of the vector; it can be observed that the pattern <italic>b</italic><sup>&#x003C9;</sup> is similar to <italic>b</italic><sup>&#x003C9;</sup> because it matches with the second feature, and it is similar to <italic>b</italic><sup>5</sup> because it matches the first feature in totality. In this way, we can understand why the model decided to classify this pattern into its corresponding class.</p>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Results and discussion</title>
<p>In this part, we present the detailed analysis of the experimental stage of our proposed algorithm against well-known state-of-the-art classification models. Subsection 4.1 describes the dataset selected in the experimental stage. Subsection 4.2 explains the validation method used, while 4.3 describes the performance measures. Subsection 4.4 shows the results obtained using the experimental methods and metrics described, and subsection 4.5 discusses the statistical significance results comparison.</p>
<sec>
<label>4.1</label>
<title>Datasets</title>
<p>For the experimental phase of the present work, 20 datasets were selected, each representing a variety of diseases, with a focus on chronic conditions.</p>
<p>These data sets were mainly obtained from three widely known repositories: the KEEL repository (available at <ext-link ext-link-type="uri" xlink:href="https://sci2s.ugr.es/keel/index.php">https://sci2s.ugr.es/keel/index.php</ext-link>), the UCI Machine Learning repository (accessible at <ext-link ext-link-type="uri" xlink:href="https://archive.ics.uci.edu/ml/index.php">https://archive.ics.uci.edu/ml/index.php</ext-link>), and the Kaggle repository (found at <ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/datasets">https://www.kaggle.com/datasets</ext-link>). To facilitate a deeper understanding, a complete description of each selected data set has been compiled. This compilation is summarized in <xref ref-type="table" rid="T5">Table 5</xref>, which provides information on the dataset&#x00027;s features, including the nature of the diseases it represents, the data structure, and the class imbalance index.</p>
<table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>Datasets description.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Datasets</bold></th>
<th valign="top" align="center" colspan="2"><bold>Features</bold></th>
<th valign="top" align="center"><bold>Patterns</bold></th>
<th valign="top" align="center"><bold>IR</bold></th>
<th valign="top" align="center"><bold>Classes</bold></th>
</tr>
<tr>
<th/>
<th valign="top" align="center"><bold>Categorical</bold></th>
<th valign="top" align="center"><bold>Numerical</bold></th>
<th/>
<th/>
<th/>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Appendicitis</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">7</td>
<td valign="top" align="center">106</td>
<td valign="top" align="center">4.04</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">Exasens COPD</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">7</td>
<td valign="top" align="center">80</td>
<td valign="top" align="center">1.00</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">Acute Inflammations D1</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">120</td>
<td valign="top" align="center">1.03</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">Acute Inflammations D2</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">120</td>
<td valign="top" align="center">1.40</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">ACPs Lung Cancer</td>
<td valign="top" align="center">38</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">901</td>
<td valign="top" align="center">31.25</td>
<td valign="top" align="center">4</td>
</tr>
<tr>
<td valign="top" align="left">Vertical Column 2C</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">6</td>
<td valign="top" align="center">310</td>
<td valign="top" align="center">2.1</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">Contraceptive</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">1,473</td>
<td valign="top" align="center">1.88</td>
<td valign="top" align="center">3</td>
</tr>
<tr>
<td valign="top" align="left">Cryotherapy</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">6</td>
<td valign="top" align="center">90</td>
<td valign="top" align="center">1.14</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">Dermatology</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">33</td>
<td valign="top" align="center">366</td>
<td valign="top" align="center">5.6</td>
<td valign="top" align="center">6</td>
</tr>
<tr>
<td valign="top" align="left">Hepatitis</td>
<td valign="top" align="center">12</td>
<td valign="top" align="center">7</td>
<td valign="top" align="center">155</td>
<td valign="top" align="center">3.84</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">Mammographic Masses</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">961</td>
<td valign="top" align="center">1.15</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">Wisconsin</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">683</td>
<td valign="top" align="center">1.85</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">HCC Survival</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">50</td>
<td valign="top" align="center">165</td>
<td valign="top" align="center">1.61</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">Autism Adolescent</td>
<td valign="top" align="center">8</td>
<td valign="top" align="center">12</td>
<td valign="top" align="center">104</td>
<td valign="top" align="center">1.29</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">Autism Child</td>
<td valign="top" align="center">8</td>
<td valign="top" align="center">12</td>
<td valign="top" align="center">292</td>
<td valign="top" align="center">1.07</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">Survey Lung Cancer</td>
<td valign="top" align="center">14</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">309</td>
<td valign="top" align="center">6.90</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">Breast Cancer Coimbra</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">116</td>
<td valign="top" align="center">1.23</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">Saheart</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">8</td>
<td valign="top" align="center">462</td>
<td valign="top" align="center">1.88</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">Cirrhosis</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">44</td>
<td valign="top" align="center">267</td>
<td valign="top" align="center">3.85</td>
<td valign="top" align="center">3</td>
</tr>
<tr>
<td valign="top" align="left">Multiple Sclerosis</td>
<td valign="top" align="center">16</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">273</td>
<td valign="top" align="center">1.18</td>
<td valign="top" align="center">2</td>
</tr></tbody>
</table>
</table-wrap>
<p>The imbalance index for each dataset was calculated as follows:</p>
<disp-formula id="EQ20"><mml:math id="M32"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>I</mml:mi><mml:mi>R</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>n</mml:mi><mml:mi>u</mml:mi><mml:mi>m</mml:mi><mml:mi>b</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mi>o</mml:mi><mml:mi>f</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mi>m</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>y</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mi>c</mml:mi><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>s</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mi>p</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>n</mml:mi><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi><mml:mi>u</mml:mi><mml:mi>m</mml:mi><mml:mi>b</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mi>o</mml:mi><mml:mi>f</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>y</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mi>c</mml:mi><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>s</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mi>p</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>n</mml:mi><mml:mi>s</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(20)</label></disp-formula>
<p>In the following, we provide a brief description of the selected datasets.</p>
<p><bold>Appendicitis</bold>: This dataset was collected at <ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/datasets/timrie/appendicitis">https://www.kaggle.com/datasets/timrie/appendicitis</ext-link> from the Kaggle repository. The dataset comprises seven medical measures for 106 patients, with classes indicating whether each patient has appendicitis. Kaggle Snapshot: appendicitis/timrie, downloaded 2024-09-18.</p>
<p><bold>Exasens COPD</bold>: This data set aims (based on demographic information from saliva) to classify patients into four classes according to their membership: chronic obstructive pulmonary disease (COPD), asthma, respiratory infections, and completely healthy patients. The dataset was collected from the UCI Machine Learning Repository at <ext-link ext-link-type="uri" xlink:href="https://archive.ics.uci.edu/ml/datasets/Exasens">https://archive.ics.uci.edu/ml/datasets/Exasens</ext-link>. Downloaded 2024-06-14.</p>
<p><bold>Acute Inflammations D1</bold> and <bold>Acute Inflammations D2</bold>: These datasets are from a study aimed at detecting two urinary system diseases. Both datasets were obtained from the UCI Machine Learning repository at <ext-link ext-link-type="uri" xlink:href="https://archive.ics.uci.edu/ml/datasets/Acute&#x0002B;Inflammations">https://archive.ics.uci.edu/ml/datasets/Acute&#x0002B;Inflammations</ext-link>. Downloaded 2024-03-14.</p>
<p><bold>ACPs Lung Cancer</bold>: This dataset was obtained from the UCI repository at <ext-link ext-link-type="uri" xlink:href="https://archive.ics.uci.edu/ml/datasets/Anticancer&#x0002B;peptides">https://archive.ics.uci.edu/ml/datasets/Anticancer&#x0002B;peptides</ext-link>, which contains information on peptides (amino acid codes) and their anticancer activity in lung cancer cell lines. Downloaded 2024-03-14.</p>
<p><bold>Vertical Column</bold>: This dataset aims to detect if a patient has some vertebral column disease. It was recovered from the UCI Machine Learning repository at <ext-link ext-link-type="uri" xlink:href="http://archive.ics.uci.edu/ml/datasets/vertebral&#x0002B;column">http://archive.ics.uci.edu/ml/datasets/vertebral&#x0002B;column</ext-link>. In vertebral Column 2C, the classes Disk Hernia and Spondylolisthesis were merged into a single class, labeled Abnormal. Downloaded 2024-03-14.</p>
<p><bold>Contraceptive</bold>: This dataset was collected from the UCI Machine Learning repository at <ext-link ext-link-type="uri" xlink:href="http://archive.ics.uci.edu/dataset/30/contraceptive&#x0002B;method&#x0002B;choice">http://archive.ics.uci.edu/dataset/30/contraceptive&#x0002B;method&#x0002B;choice</ext-link>. It is used to predict the current contraceptive method from demographic and socioeconomic information. Downloaded 2024-03-14.</p>
<p><bold>Cryotherapy</bold>: This dataset was collected from the UCI Machine Learning repository at <ext-link ext-link-type="uri" xlink:href="https://archive.ics.uci.edu/dataset/429/cryotherapy&#x0002B;dataset">https://archive.ics.uci.edu/dataset/429/cryotherapy&#x0002B;dataset</ext-link>, which contains treatment outcomes for 90 patients who underwent cryotherapy. It has two classes: successful and unsuccessful. Downloaded 2024-03-14.</p>
<p><bold>Dermatology</bold>: This dataset was obtained from the UCI Machine Learning repository at <ext-link ext-link-type="uri" xlink:href="https://archive.ics.uci.edu/dataset/33/dermatology">https://archive.ics.uci.edu/dataset/33/dermatology</ext-link>, whose main aim is to determine the type of Eryhemato-Squamous Disease based on 34 patient attributes. Downloaded 2024-03-14.</p>
<p><bold>Hepatitis</bold>: This dataset aims to detect hepatitis using simple tabular data from patients, most of whom have categorical data. Furthermore, the dataset has two classes and was collected from the UCI Machine Learning repository at <ext-link ext-link-type="uri" xlink:href="http://archive.ics.uci.edu/dataset/46/hepatitis">http://archive.ics.uci.edu/dataset/46/hepatitis</ext-link>. Downloaded 2024-02-03.</p>
<p><bold>Mammographic Masses</bold>: This dataset aims to distinguish between benign and malignant mammographic masses using BI-RADS attributes and patient age. The dataset was collected from <ext-link ext-link-type="uri" xlink:href="https://archive.ics.uci.edu/dataset/161/mammographic&#x0002B;mass">https://archive.ics.uci.edu/dataset/161/mammographic&#x0002B;mass</ext-link>, in the UCI Machine Learning repository. Downloaded 2024-01-21.</p>
<p><bold>Wisconsin</bold>: This dataset was collected from the UCI Machine Learning repository at <ext-link ext-link-type="uri" xlink:href="https://archive.ics.uci.edu/ml/datasets/breast&#x0002B;cancer&#x0002B;wisconsin&#x0002B;(diagnostic)">https://archive.ics.uci.edu/ml/datasets/breast&#x0002B;cancer&#x0002B;wisconsin&#x0002B;(diagnostic)</ext-link>, which describes cases from a study conducted at the University of Wisconsin Hospitals in Madison involving patients who had undergone surgery for breast cancer. The classification task is to determine if the detected tumor is benign or malignant. Downloaded 2024-02-24.</p>
<p><bold>HCC Survival</bold>: This dataset was obtained from <ext-link ext-link-type="uri" xlink:href="https://archive.ics.uci.edu/dataset/423/hcc&#x0002B;survival">https://archive.ics.uci.edu/dataset/423/hcc&#x0002B;survival</ext-link>, in the UCI Machine Learning repository. It contains real clinical data from 165 patients diagnosed with HCC, with the aim of predicting 1-year survival after diagnosis. Downloaded 2024-03-28.</p>
<p><bold>Autism adolescent and Child</bold>: These datasets were collected from the UCI Machine Learning repository at <ext-link ext-link-type="uri" xlink:href="https://archive.ics.uci.edu/dataset/420/autistic&#x0002B;spectrum&#x0002B;disorder&#x0002B;screening&#x0002B;data&#x0002B;for&#x0002B;adolescent">https://archive.ics.uci.edu/dataset/420/autistic&#x0002B;spectrum&#x0002B;disorder&#x0002B;screening&#x0002B;data&#x0002B;for&#x0002B;adolescent</ext-link> and <ext-link ext-link-type="uri" xlink:href="https://archive.ics.uci.edu/dataset/419/autistic&#x0002B;spectrum&#x0002B;disorder&#x0002B;screening&#x0002B;data&#x0002B;for&#x0002B;children">https://archive.ics.uci.edu/dataset/419/autistic&#x0002B;spectrum&#x0002B;disorder&#x0002B;screening&#x0002B;data&#x0002B;for&#x0002B;children</ext-link>, respectively. The idea of both datasets is to detect Autistic Spectrum Disorder. Downloaded 2024-04-02.</p>
<p><bold>Survey Lung Cancer</bold>: The classification task in this dataset is to determine whether a given patient has lung cancer, based on variables collected via a survey. The set was obtained from the Kaggle repository at <ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/mysarahmadbhat/lung-cancer">https://www.kaggle.com/mysarahmadbhat/lung-cancer</ext-link>. Kaggle Snapshot: Lung Cancer/Mysar Ahmad Bhat, downloaded 2024-04-13.</p>
<p><bold>Breast Cancer Coimbra</bold>: This dataset was collected from <ext-link ext-link-type="uri" xlink:href="https://archive.ics.uci.edu/dataset/451/breast&#x0002B;cancer&#x0002B;coimbra">https://archive.ics.uci.edu/dataset/451/breast&#x0002B;cancer&#x0002B;coimbra</ext-link>, in the UCI Machine Learning Repository. The dataset comprises clinical features from 64 patients. Downloaded 2024-02-22.</p>
<p><bold>Saheart</bold>: This dataset aims to detect patients with heart diseases but was built for Stanford University and was collected at <ext-link ext-link-type="uri" xlink:href="https://web.stanford.edu/&#x0007E;hastie/ElemStatLearn//datasets/SAheart.data">https://web.stanford.edu/&#x0007E;hastie/ElemStatLearn//datasets/SAheart.data</ext-link>. Downloaded 2024-02-24.</p>
<p><bold>Cirrhosis</bold>: This dataset comprises 17 clinical features for predicting patient survival in patients with liver cirrhosis, collected from the UCI Machine Learning repository at <ext-link ext-link-type="uri" xlink:href="https://archive.ics.uci.edu/dataset/878/cirrhosis&#x0002B;patient&#x0002B;survival&#x0002B;prediction&#x0002B;dataset-1">https://archive.ics.uci.edu/dataset/878/cirrhosis&#x0002B;patient&#x0002B;survival&#x0002B;prediction&#x0002B;dataset-1</ext-link>. Downloaded 2024-02-22.</p>
<p><bold>Multiple Sclerosis</bold>: The classification task in this dataset is to detect multiple sclerosis using patient information, such as personal data, symptoms, and metrics from medical tests. The dataset was collected from <ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/datasets/desalegngeb/conversion-predictors-of-cis-to-multiple-sclerosis/data">https://www.kaggle.com/datasets/desalegngeb/conversion-predictors-of-cis-to-multiple-sclerosis/data</ext-link>, the Kaggle repository. Kaggle Snapshot: Multiple Sclerosis Disease/A Legacy Grandmaster!, downloaded 2024-04-11.</p>
</sec>
<sec>
<label>4.2</label>
<title>Validation methods</title>
<p>In this section, we describe the validation method used in the experimentation stage. To obtain reliable results when measuring classifier performance during the experimentation stage, it is necessary to have previously implemented a validation method that divides the original dataset into two sets: a test set and a learning set.</p>
<p>One of the most widely used methods is <italic>k-</italic>fold cross-validation, which randomly divides the original set into k equal-sized subsets (folds), using one fold as the test set and the rest as the training set. This process is repeated k times in order to use all folds at least once as test sets (<xref ref-type="bibr" rid="B61">Wong, 2015</xref>; <xref ref-type="bibr" rid="B51">Sarker, 2021</xref>). On the other hand, there is a stratified version of this validation method, called stratified <italic>k-</italic>fold cross-validation, which is highly recommended for data sets with class imbalance, since it attempts to preserve approximate class proportions within each fold. In this way, the test sets created in each iteration present as much as possible the class distribution of the original set, which helped mitigate errors caused by class bias (<xref ref-type="bibr" rid="B16">Derrac et al., 2015</xref>; <xref ref-type="bibr" rid="B42">Nakatsu, 2020</xref>). <xref ref-type="fig" rid="F1">Figure 1</xref> shows the operations of the stratified <italic>k-</italic>fold cross-validation when <italic>k</italic> = 5.</p>
<fig position="float" id="F1">
<label>Figure 1</label>
<caption><p>Stratified five-fold cross-validation method.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-08-1610856-g0001.tif">
<alt-text content-type="machine-generated">Diagram illustrating stratified five-fold cross-validation of a dataset with classes distributed as 80% Class 1 and 20% Class 2. The original dataset is split into five iterations. Each iteration has a test set (T) and learning sets (L), showing distribution with bar graphs. The test set is outlined in orange, and learning sets are outlined in blue for each iteration, ensuring class proportions remain consistent.</alt-text>
</graphic>
</fig>
<p>Given the class-imbalanced datasets used in the current study, stratified <italic>k-</italic>fold cross-validation with <italic>k</italic> = 10 has been employed to maintain approximately equal proportions of patterns per class across folds.</p>
</sec>
<sec>
<label>4.3</label>
<title>Performance measures</title>
<p>The evaluation of classifier performance is a crucial area of interest in specialized literature. The most popular and naturally simple way to measure performance is to use the accuracy metric, which calculates the percentage of patterns in the test set that are correctly classified; that is, it counts the total number of correctly classified patterns with respect to the total number of patterns. However, there is a way to more completely represent the results of the classifier&#x00027;s performance, which is called a confusion matrix, as shown in <xref ref-type="fig" rid="F2">Figure 2</xref>, which consists of four possible cases within a two-class classification problem (<xref ref-type="bibr" rid="B24">Garc&#x000ED;a et al., 2010a</xref>), where each cell in the confusion matrix represents TP (true positive), TN (true negative), FP (false positive), and FN (false negative).</p>
<fig position="float" id="F2">
<label>Figure 2</label>
<caption><p>Confusion matrix for a bi-class dataset.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-08-1610856-g0002.tif">
<alt-text content-type="machine-generated">Confusion matrix chart showing predictions versus true values. Rows represent true values: true positives (TP) and false positives (FP). Columns represent predictions: true negatives (TN) and false negatives (FN).</alt-text>
</graphic>
</fig>
<p>As mentioned above, one of the most popular metrics for measuring classifier performance is accuracy. In the case of bi-class problems, and using the confusion matrix as a basis, the metric can be expressed as in the equation:</p>
<disp-formula id="EQ21"><mml:math id="M33"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>A</mml:mi><mml:mi>c</mml:mi><mml:mi>c</mml:mi><mml:mi>u</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(21)</label></disp-formula>
<p>However, more robust metrics have emerged in the literature to mitigate the limitations of the accuracy metric, which is not suitable for class-imbalanced datasets, a common data complexity mainly found in medical datasets. This data complexity harms the evaluation of the classifier&#x00027;s performance, yielding metrics that do not truly reflect the algorithm&#x00027;s capacity (<xref ref-type="bibr" rid="B37">L&#x000F3;pez et al., 2013</xref>).</p>
<p>First, the sensitivity metric will be described, which measures the probability that the classifier returns a positive result when the instance is a true positive. The sensitivity metric can be expressed as follows (<xref ref-type="bibr" rid="B25">Garc&#x000ED;a et al., 2010b</xref>).</p>
<disp-formula id="EQ22"><mml:math id="M34"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>S</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>v</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(22)</label></disp-formula>
<p>On the other hand, there is another crucial metric, the counterpart of the sensitivity metric: the specificity metric. This metric estimates the probability that the classifier will return a negative result when the instance is actually negative (<xref ref-type="bibr" rid="B25">Garc&#x000ED;a et al., 2010b</xref>).</p>
<disp-formula id="EQ23"><mml:math id="M35"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>S</mml:mi><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi><mml:mi>i</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>N</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(23)</label></disp-formula>
<p>There are different metrics for different purposes, such as the area under the ROC curve (AUC), precision, F1 score, and balanced accuracy (BA), but the majority of them are calculated from the confusion matrix (<xref ref-type="bibr" rid="B25">Garc&#x000ED;a et al., 2010b</xref>). Because the datasets selected for this study exhibit class imbalance, it was decided to use the Balanced Accuracy (BA) performance metric, which is recommended for such cases (<xref ref-type="bibr" rid="B37">L&#x000F3;pez et al., 2013</xref>; <xref ref-type="bibr" rid="B25">Garc&#x000ED;a et al., 2010b</xref>). The BA metric is calculated from the performance metrics Sensitivity and Specificity, which represent the average of both measures.</p>
<disp-formula id="EQ24"><mml:math id="M36"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>B</mml:mi><mml:mi>A</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>S</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>v</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>y</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>S</mml:mi><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi><mml:mi>i</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(24)</label></disp-formula>
<p>On the other hand, the value of BA in multi-class datasets, for k classes, is calculated as follows:</p>
<disp-formula id="EQ25"><mml:math id="M37"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>B</mml:mi><mml:mi>A</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mi>T</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(25)</label></disp-formula>
<p>where <italic>T</italic><sub><italic>i</italic></sub> is the number of patterns correctly classified in class <italic>i</italic>, and <italic>N</italic><sub><italic>i</italic></sub> represents the total number of patterns within the dataset of class <italic>i</italic>.</p>
<p>Example. <xref ref-type="fig" rid="F3">Figure 3</xref> shows a confusion matrix for an unbalanced dataset, with 170 patterns in class A and 30 in class B. Therefore, the similar dataset has a very severe class imbalance; its imbalance index is IR = 5.6.</p>
<fig position="float" id="F3">
<label>Figure 3</label>
<caption><p>Example of a confusion matrix for a multi-class dataset.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-08-1610856-g0003.tif">
<alt-text content-type="machine-generated">Confusion matrix table showing predictions versus true values for three classes. True Class A: 50 predicted as A, 1 as B, 1 as C. True Class B: 8 as A, 70 as B, 12 as C. True Class C: 10 as A, 26 as B, 24 as C.</alt-text>
</graphic>
</fig>
<p>For class <italic>i</italic> &#x02208; {<italic>A, B, C</italic>} sensitivity (<italic>T</italic><sub><italic>i</italic></sub>/<italic>N</italic>):</p>
<disp-formula id="EQ26"><mml:math id="M38"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>S</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>v</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>A</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>A</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>A</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>A</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>50</mml:mn></mml:mrow><mml:mrow><mml:mn>50</mml:mn><mml:mtext>&#x000A0;</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>1</mml:mn><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>96</mml:mn></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(26)</label></disp-formula>
<disp-formula id="EQ27"><mml:math id="M39"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>S</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>v</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>B</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>B</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>B</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>B</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>70</mml:mn></mml:mrow><mml:mrow><mml:mn>70</mml:mn><mml:mtext>&#x000A0;</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>8</mml:mn><mml:mo>&#x0002B;</mml:mo><mml:mn>12</mml:mn></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>77</mml:mn></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(27)</label></disp-formula>
<disp-formula id="EQ28"><mml:math id="M40"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>S</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>v</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>24</mml:mn></mml:mrow><mml:mrow><mml:mn>24</mml:mn><mml:mtext>&#x000A0;</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>10</mml:mn><mml:mtext>&#x000A0;</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>26</mml:mn></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>40</mml:mn></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(28)</label></disp-formula>
<p>In this example, the Balance Accuracy (BA) value of the confusion matrix is as follows:</p>
<disp-formula id="EQ29"><mml:math id="M41"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>B</mml:mi><mml:mi>A</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:mfrac><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>96</mml:mn><mml:mtext>&#x000A0;</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>77</mml:mn><mml:mtext>&#x000A0;</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>40</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>713</mml:mn></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(29)</label></disp-formula>
</sec>
<sec>
<label>4.4</label>
<title>Time complexity analysis</title>
<p><xref ref-type="table" rid="T6">Table 6</xref> compares the time complexities of the classification algorithms used in the present study and of the proposed n-SBC model.</p>
<table-wrap position="float" id="T6">
<label>Table 6</label>
<caption><p>Comparison of time complexities between algorithms.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Algorithm</bold></th>
<th valign="top" align="center"><bold>Spatial</bold></th>
<th valign="top" align="center" colspan="2"><bold>Time</bold></th>
</tr>
<tr>
<th/>
<th/>
<th valign="top" align="center"><bold>Training</bold></th>
<th valign="top" align="center"><bold>Inference</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">n-SBC</td>
<td valign="top" align="center"><italic>O</italic>(|<italic>L</italic>|&#x0002A;<italic>B</italic>)</td>
<td valign="top" align="center"><italic>O</italic>(|<italic>L</italic>|&#x0002A;<italic>B</italic>)<sup>a</sup></td>
<td valign="top" align="center"><italic>O</italic>(|<italic>L</italic>|&#x0002A;<italic>B</italic>)</td>
</tr>
<tr>
<td valign="top" align="left">k-NN</td>
<td valign="top" align="center"><italic>O</italic>(|<italic>L</italic>|&#x0002A;<italic>B</italic>)</td>
<td valign="top" align="center"><italic>O</italic>(1)</td>
<td valign="top" align="center"><italic>O</italic>(|<italic>L</italic>|&#x0002A;<italic>B</italic>)</td>
</tr>
<tr>
<td valign="top" align="left">SMO</td>
<td valign="top" align="center"><italic>O</italic>(<italic>n</italic><sub><italic>sv</italic></sub>&#x0002A;<italic>B</italic>)</td>
<td valign="top" align="center"><italic>O</italic>(|<italic>L</italic>|<sup>2</sup>&#x0002A;<italic>B</italic>)</td>
<td valign="top" align="center"><italic>O</italic>(<italic>n</italic><sub><italic>sv</italic></sub>&#x0002A;<italic>B</italic>)</td>
</tr>
<tr>
<td valign="top" align="left">Na&#x000EF;ve Bayes</td>
<td valign="top" align="center"><italic>O</italic>(<italic>C</italic>&#x0002A;<italic>B</italic>)</td>
<td valign="top" align="center"><italic>O</italic>(|<italic>L</italic>|&#x0002A;<italic>B</italic>)</td>
<td valign="top" align="center"><italic>O</italic>(<italic>C</italic>&#x0002A;<italic>B</italic>)</td>
</tr>
<tr>
<td valign="top" align="left">C4.5</td>
<td valign="top" align="center"><italic>O</italic>(|<italic>L</italic>|)</td>
<td valign="top" align="center"><italic>O</italic>(|<italic>L</italic>|&#x0002A;<italic>B</italic>&#x0002A;log|<italic>L</italic>|)</td>
<td valign="top" align="center"><italic>O</italic>(log|<italic>L</italic>|)</td>
</tr>
<tr>
<td valign="top" align="left">Random Forest</td>
<td valign="top" align="center"><italic>O</italic>(<italic>T</italic>&#x0002A;|<italic>L</italic>|)</td>
<td valign="top" align="center"><italic>O</italic>(<italic>T</italic>&#x0002A;|<italic>L</italic>|&#x0002A;<italic>B</italic>&#x0002A;log|<italic>L</italic>|)</td>
<td valign="top" align="center"><italic>O</italic>(<italic>T</italic>&#x0002A;log|<italic>L</italic>|)</td>
</tr>
<tr>
<td valign="top" align="left">MLP</td>
<td valign="top" align="center"><italic>O</italic>(<italic>B</italic>&#x0002A;<italic>H</italic> &#x0002B; <italic>H</italic>&#x0002A;<italic>C</italic>)</td>
<td valign="top" align="center"><italic>O</italic>(<italic>I</italic>&#x0002A;|<italic>L</italic>|&#x0002A;(<italic>B</italic>&#x0002A;<italic>H</italic> &#x0002B; <italic>H</italic>&#x0002A;<italic>C</italic>))</td>
<td valign="top" align="center"><italic>O</italic>(<italic>B</italic>&#x0002A;<italic>H</italic> &#x0002B; <italic>H</italic>&#x0002A;<italic>C</italic>)</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p><sup>a</sup>Time complexity for converting all dataset features to their RBC binary strings and concatenating them to create the memory <italic>M</italic> (preparing the training process in the n-SBC classifier).</p>
</table-wrap-foot>
</table-wrap>
<p><bold>Notation</bold>. <bold>n</bold><sub><bold>sv</bold></sub>: Number of support vectors in SVM; <bold>T:</bold> Number of trees in Random Forest; <bold>H:</bold> Number of hidden units in MLP; <bold>I:</bold> Number of epochs in MLP; |<bold>L</bold>|<bold>:</bold> Total number of patterns (instances) in the training dataset; <bold>C:</bold> Number of classes in the data set; <bold>X</bold><sup>&#x003C9;</sup><bold>:</bold> Unknown pattern (test) to be classified; <bold>d:</bold> number of features; <bold>b</bold><sub><bold>i</bold></sub><bold>:</bold> RBC bit-length of feature <italic>i</italic>; <inline-formula><mml:math id="M42"><mml:mi>B</mml:mi><mml:mo>=</mml:mo><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:munderover><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> (total bits per encoded pattern); <bold>B:</bold> Length of each pattern in the binary string generated by the RBC encoder.</p>
</sec>
<sec>
<label>4.5</label>
<title>Classification results</title>
<p><xref ref-type="table" rid="T7">Table 7</xref> compares the performance of the proposed algorithm with that of different classifiers across the 20 datasets described earlier. The algorithms used for comparison were run in <italic>Weka version 3.8.2</italic>, using the tool&#x00027;s default hyperparameters. The results of the n-SBC algorithm were obtained using <italic>MATLAB R2021b</italic> with a random seed of 1. For the experimental process, we evaluated two pre-specified SBC variants with <italic>n</italic> &#x02208; {3, 5}. These values were pre-selected once from a preliminary sweep <italic>n</italic> &#x02208; {1, 2, 3, 4, 5} using training-only validation and were then held fixed across all datasets. To ensure a fair comparison, the study does not cherry-pick the best <italic>n</italic> values in the classification results; instead, those are explicitly excluded from the Friedman and Holm statistical tests to avoid inflating the number of comparisons.</p>
<table-wrap position="float" id="T7">
<label>Table 7</label>
<caption><p>Results of the balanced accuracy measurement obtained by the classifiers.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Dataset</bold></th>
<th valign="top" align="center"><bold>Na&#x000EF;ve Bayes</bold></th>
<th valign="top" align="center"><bold>IB1</bold></th>
<th valign="top" align="center"><bold>IB3</bold></th>
<th valign="top" align="center"><bold>MLP</bold></th>
<th valign="top" align="center"><bold>SMO</bold></th>
<th valign="top" align="center"><bold>C4.5</bold></th>
<th valign="top" align="center"><bold>Random Forest</bold></th>
<th valign="top" align="center"><bold>3-SBC</bold></th>
<th valign="top" align="center"><bold>5-SBC</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Appendicitis</td>
<td valign="top" align="center"><bold>0.786</bold></td>
<td valign="top" align="center">0.745</td>
<td valign="top" align="center">0.738</td>
<td valign="top" align="center">0.75</td>
<td valign="top" align="center">0.744</td>
<td valign="top" align="center">0.732</td>
<td valign="top" align="center">0.744</td>
<td valign="top" align="center">0.703</td>
<td valign="top" align="center">0.746</td>
</tr>
<tr>
<td valign="top" align="left">Exasens_copd</td>
<td valign="top" align="center">0.9</td>
<td valign="top" align="center">0.937</td>
<td valign="top" align="center">0.875</td>
<td valign="top" align="center"><bold>0.95</bold></td>
<td valign="top" align="center">0.887</td>
<td valign="top" align="center">0.887</td>
<td valign="top" align="center">0.912</td>
<td valign="top" align="center">0.898</td>
<td valign="top" align="center">0.9</td>
</tr>
<tr>
<td valign="top" align="left">Acute inflammation d1</td>
<td valign="top" align="center">0.992</td>
<td valign="top" align="center"><bold>1</bold></td>
<td valign="top" align="center"><bold>1</bold></td>
<td valign="top" align="center"><bold>1</bold></td>
<td valign="top" align="center"><bold>1</bold></td>
<td valign="top" align="center"><bold>1</bold></td>
<td valign="top" align="center">0.933</td>
<td valign="top" align="center"><bold>1</bold></td>
<td valign="top" align="center"><bold>1</bold></td>
</tr>
<tr>
<td valign="top" align="left">Acute inflammation d2</td>
<td valign="top" align="center"><bold>1</bold></td>
<td valign="top" align="center"><bold>1</bold></td>
<td valign="top" align="center"><bold>1</bold></td>
<td valign="top" align="center"><bold>1</bold></td>
<td valign="top" align="center"><bold>1</bold></td>
<td valign="top" align="center"><bold>1</bold></td>
<td valign="top" align="center">0.958</td>
<td valign="top" align="center"><bold>1</bold></td>
<td valign="top" align="center"><bold>1</bold></td>
</tr>
<tr>
<td valign="top" align="left">ACPs lung cancer</td>
<td valign="top" align="center">0.695</td>
<td valign="top" align="center">0.683</td>
<td valign="top" align="center">0.648</td>
<td valign="top" align="center">0.707</td>
<td valign="top" align="center">0.707</td>
<td valign="top" align="center">0.559</td>
<td valign="top" align="center">0.645</td>
<td valign="top" align="center"><bold>0.984</bold></td>
<td valign="top" align="center"><bold>0.984</bold></td>
</tr>
<tr>
<td valign="top" align="left">Column 2c</td>
<td valign="top" align="center">0.801</td>
<td valign="top" align="center">0.809</td>
<td valign="top" align="center">0.751</td>
<td valign="top" align="center">0.807</td>
<td valign="top" align="center">0.704</td>
<td valign="top" align="center">0.77</td>
<td valign="top" align="center"><bold>0.82</bold></td>
<td valign="top" align="center">0.712</td>
<td valign="top" align="center">0.735</td>
</tr>
<tr>
<td valign="top" align="left">Contraceptive</td>
<td valign="top" align="center">0.514</td>
<td valign="top" align="center">0.417</td>
<td valign="top" align="center">0.42</td>
<td valign="top" align="center">0.54</td>
<td valign="top" align="center">0.488</td>
<td valign="top" align="center">0.488</td>
<td valign="top" align="center">0.507</td>
<td valign="top" align="center"><bold>0.637</bold></td>
<td valign="top" align="center">0.636</td>
</tr>
<tr>
<td valign="top" align="left">Cryotherapy</td>
<td valign="top" align="center">0.841</td>
<td valign="top" align="center">0.9</td>
<td valign="top" align="center">0.911</td>
<td valign="top" align="center">0.879</td>
<td valign="top" align="center">0.879</td>
<td valign="top" align="center">0.936</td>
<td valign="top" align="center">0.936</td>
<td valign="top" align="center">0.934</td>
<td valign="top" align="center"><bold>0.941</bold></td>
</tr>
<tr>
<td valign="top" align="left">Dermatology</td>
<td valign="top" align="center"><bold>0.976</bold></td>
<td valign="top" align="center">0.952</td>
<td valign="top" align="center">0.969</td>
<td valign="top" align="center">0.968</td>
<td valign="top" align="center">0.971</td>
<td valign="top" align="center">0.955</td>
<td valign="top" align="center">0.958</td>
<td valign="top" align="center">0.967</td>
<td valign="top" align="center">0.971</td>
</tr>
<tr>
<td valign="top" align="left">Hepatitis</td>
<td valign="top" align="center">0.83</td>
<td valign="top" align="center">0.736</td>
<td valign="top" align="center">0.763</td>
<td valign="top" align="center">0.755</td>
<td valign="top" align="center">0.807</td>
<td valign="top" align="center"><bold>0.85</bold></td>
<td valign="top" align="center">0.835</td>
<td valign="top" align="center">0.818</td>
<td valign="top" align="center">0.835</td>
</tr>
<tr>
<td valign="top" align="left">Mammographic Masses</td>
<td valign="top" align="center">0.828</td>
<td valign="top" align="center">0.754</td>
<td valign="top" align="center">0.763</td>
<td valign="top" align="center">0.822</td>
<td valign="top" align="center">0.796</td>
<td valign="top" align="center">0.822</td>
<td valign="top" align="center">0.797</td>
<td valign="top" align="center"><bold>0.84</bold></td>
<td valign="top" align="center">0.834</td>
</tr>
<tr>
<td valign="top" align="left">Wisconsin</td>
<td valign="top" align="center">0.964</td>
<td valign="top" align="center">0.94</td>
<td valign="top" align="center">0.964</td>
<td valign="top" align="center">0.939</td>
<td valign="top" align="center"><bold>0.965</bold></td>
<td valign="top" align="center">0.937</td>
<td valign="top" align="center">0.963</td>
<td valign="top" align="center">0.939</td>
<td valign="top" align="center">0.941</td>
</tr>
<tr>
<td valign="top" align="left">HCC Survival</td>
<td valign="top" align="center">0.677</td>
<td valign="top" align="center">0.6</td>
<td valign="top" align="center">0.584</td>
<td valign="top" align="center">0.6</td>
<td valign="top" align="center">0.711</td>
<td valign="top" align="center">0.546</td>
<td valign="top" align="center">0.668</td>
<td valign="top" align="center"><bold>0.828</bold></td>
<td valign="top" align="center">0.822</td>
</tr>
<tr>
<td valign="top" align="left">Autism Adolescent</td>
<td valign="top" align="center">0.959</td>
<td valign="top" align="center">0.882</td>
<td valign="top" align="center">0.841</td>
<td valign="top" align="center">0.887</td>
<td valign="top" align="center">0.891</td>
<td valign="top" align="center"><bold>1</bold></td>
<td valign="top" align="center"><bold>1</bold></td>
<td valign="top" align="center">0.923</td>
<td valign="top" align="center">0.902</td>
</tr>
<tr>
<td valign="top" align="left">Autism Child</td>
<td valign="top" align="center">0.827</td>
<td valign="top" align="center">0.748</td>
<td valign="top" align="center">0.784</td>
<td valign="top" align="center">0.798</td>
<td valign="top" align="center">0.829</td>
<td valign="top" align="center">0.819</td>
<td valign="top" align="center">0.812</td>
<td valign="top" align="center">0.969</td>
<td valign="top" align="center"><bold>0.976</bold></td>
</tr>
<tr>
<td valign="top" align="left">Survey lung cancer</td>
<td valign="top" align="center">0.688</td>
<td valign="top" align="center">0.758</td>
<td valign="top" align="center">0.745</td>
<td valign="top" align="center"><bold>0.802</bold></td>
<td valign="top" align="center">0.782</td>
<td valign="top" align="center">0.747</td>
<td valign="top" align="center">0.754</td>
<td valign="top" align="center">0.792</td>
<td valign="top" align="center">0.765</td>
</tr>
<tr>
<td valign="top" align="left">Breast Cancer Coimbra</td>
<td valign="top" align="center">0.63</td>
<td valign="top" align="center">0.67</td>
<td valign="top" align="center">0.674</td>
<td valign="top" align="center">0.651</td>
<td valign="top" align="center">0.663</td>
<td valign="top" align="center">0.688</td>
<td valign="top" align="center">0.735</td>
<td valign="top" align="center"><bold>1</bold></td>
<td valign="top" align="center"><bold>1</bold></td>
</tr>
<tr>
<td valign="top" align="left">Saheart</td>
<td valign="top" align="center">0.655</td>
<td valign="top" align="center">0.58</td>
<td valign="top" align="center">0.619</td>
<td valign="top" align="center">0.63</td>
<td valign="top" align="center"><bold>0.658</bold></td>
<td valign="top" align="center">0.657</td>
<td valign="top" align="center">0.622</td>
<td valign="top" align="center">0.577</td>
<td valign="top" align="center">0.57</td>
</tr>
<tr>
<td valign="top" align="left">Cirrhosis</td>
<td valign="top" align="center">0.515</td>
<td valign="top" align="center">0.429</td>
<td valign="top" align="center">0.444</td>
<td valign="top" align="center">0.525</td>
<td valign="top" align="center">0.52</td>
<td valign="top" align="center">0.549</td>
<td valign="top" align="center">0.524</td>
<td valign="top" align="center"><bold>0.642</bold></td>
<td valign="top" align="center">0.617</td>
</tr>
<tr>
<td valign="top" align="left">Multiple Sclerosis</td>
<td valign="top" align="center">0.902</td>
<td valign="top" align="center">0.788</td>
<td valign="top" align="center">0.802</td>
<td valign="top" align="center">0.907</td>
<td valign="top" align="center">0.812</td>
<td valign="top" align="center">0.783</td>
<td valign="top" align="center">0.786</td>
<td valign="top" align="center"><bold>0.984</bold></td>
<td valign="top" align="center">0.983</td>
</tr>
<tr>
<td valign="top" align="left"><bold>Times Best BA</bold></td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center"><bold>9</bold></td>
<td valign="top" align="center"><bold>7</bold></td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>They were used with the class-imbalance complexity specified in <xref ref-type="table" rid="T5">Table 5</xref> across all classifiers, and the results were compared in <xref ref-type="table" rid="T7">Table 7</xref>. The results that achieve competitive performance relative to the other classifiers for each dataset are highlighted in bold.</p>
</table-wrap-foot>
</table-wrap>
<p>No preprocessing other than handling missing values and converting categorical values to numeric was applied, as explained in Section 3. No samples were removed from the datasets, nor were synthetic samples added; they also kept the original sizes and format.</p>
<p>The proposed algorithm achieved competitive performance across nine of the twenty-one datasets. For example, it performed well on Acute Inflammation d1 and d2, ACP lung cancer, contraceptive use, mammographic masses, HCC survival, breast cancer Coimbra, cirrhosis, and multiple sclerosis.</p>
<p>Furthermore, <xref ref-type="table" rid="T7">Table 7</xref> shows some cases where the classifiers achieved 1 on the balanced accuracy metric. This indicates that it was perfect, i.e., the classifier made zero errors. Thus, if we count the frequency at which classifiers obtained these cases, our proposed model was one of the highest-performing BA in both versions (3-SBC and 5-SBC), receiving it in 3 out of 21 datasets.</p>
<p>Similarly, the algorithm that performed best across the majority of datasets was our proposed 3-SBC model, which was the best in 9 of 20 datasets, followed by our other model, 5-SBC, which was the best in 7 of 20 datasets.</p>
<p>Nevertheless, datasets with high data complexity that obtained inadequate scores were Cirrhosis, Saheart, HCC Survival, and Contraceptive, among which our proposed models achieved the highest performance in 3 out of 5 cases. This happens due to the No Free Lunch theorem; therefore, it is expected that our proposed models will not be the best-performing classifiers across all datasets. This theorem indicates that no classifier is capable of being the best on all types of problems (<xref ref-type="bibr" rid="B60">Wolpert and Macready, 1997</xref>; <xref ref-type="bibr" rid="B2">Adam et al., 2019</xref>). Furthermore, the classifier with the best performance also performed poorly, such as Saheart, which achieved 0.658 on SMO.</p>
<p>However, in favor of our proposal, it can be noted that, in most cases, the performances of the 3-SBC classifier do not vary overly from the high balanced accuracy values obtained by other classifiers; such is the case of the Survey lung cancer, Dermatology, and Wisconsin datasets in which our proposed model 3-SBC obtained very similar results against the best models in those cases, such as SMO or Na&#x000EF;ve Bayes.</p>
</sec>
<sec>
<label>4.6</label>
<title>Statistical analysis</title>
<p>Comparing various machine learning algorithms and selecting a final model or algorithm as the winner is a common practice in machine learning, model research, and applications. Models in relation to a set of experiments are evaluated using a validation method, e.g., <italic>k-</italic>fold cross-validation or leave-one-out cross-validation (a particular case of <italic>k-</italic>fold cross-validation where k equals the number of instances in the dataset), and the results are directly compared by calculating a performance measure. While this is a simple and somewhat intuitive approach, it is difficult to determine whether a difference is due to the algorithm&#x00027;s real capability or a statistical fluke.</p>
<p>It is crucial to distinguish genuine performance differences from statistical flukes. Therefore, it is necessary to apply statistical hypothesis testing, which addresses this issue by quantifying the probability of observing score differences under the null hypothesis that scores are drawn from the same distribution. Rejection of this null hypothesis indicates that the observed differences are statistically significant, rather than due to chance.</p>
<p>In this context, to conduct a more reliable comparative analysis, it was proposed to use Friedman&#x00027;s test (<xref ref-type="bibr" rid="B21">Friedman, 1937</xref>) to determine whether there are significant differences in the yields observed during the experiment.</p>
<p><xref ref-type="table" rid="T8">Table 8</xref> shows the performance obtained by the different classification algorithms proposed. After performing Friedman&#x00027;s statistical test, the null hypothesis was rejected at the 95% confidence level (<italic>p</italic>-value = 0.000516), indicating statistically significant differences among the classifiers.</p>
<table-wrap position="float" id="T8">
<label>Table 8</label>
<caption><p>Friedman&#x00027;s means ranks table.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Algorithm</bold></th>
<th valign="top" align="center"><bold>Mean ranks<sup>a</sup></bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">5-SBC</td>
<td valign="top" align="center">3.2143</td>
</tr>
<tr>
<td valign="top" align="left">3-SBC</td>
<td valign="top" align="center">3.7857</td>
</tr>
<tr>
<td valign="top" align="left">Na&#x000EF;ve Bayes</td>
<td valign="top" align="center">4.5952</td>
</tr>
<tr>
<td valign="top" align="left">MLP</td>
<td valign="top" align="center">4.8095</td>
</tr>
<tr>
<td valign="top" align="left">SVM</td>
<td valign="top" align="center">4.881</td>
</tr>
<tr>
<td valign="top" align="left">Random Forest</td>
<td valign="top" align="center">5.1429</td>
</tr>
<tr>
<td valign="top" align="left">C4.5</td>
<td valign="top" align="center">5.4762</td>
</tr>
<tr>
<td valign="top" align="left">1-NN</td>
<td valign="top" align="center">6.4762</td>
</tr>
<tr>
<td valign="top" align="left">3-NN</td>
<td valign="top" align="center">6.619</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p><sup>a</sup>sorted from best ranked to worst.</p>
</table-wrap-foot>
</table-wrap>
<p>The proposed models (5-SBC and 3-SBC) rank first in the Friedman mean rank calculation concerning the remaining seven algorithms, while the <italic>k-</italic>NN family algorithms rank last in the Friedman mean rank table.</p>
<p>On the other hand, a <italic>post-hoc</italic> test, the Holm test (<xref ref-type="bibr" rid="B30">Holm, 1979</xref>), was applied. The results in <xref ref-type="table" rid="T9">Table 9</xref> reject the hypothesis at an adjusted <italic>p</italic>-value of &#x02264; 0.05. Therefore, significant performance differences between the two versions of the proposed algorithm and the remaining state-of-the-art algorithms used in the study are demonstrated. In particular, it can be observed that, considering the best algorithm according to the Friedman test, the 5-SBC algorithm, it has significant differences above the 95% confidence level for the 1-NN and 3-NN algorithms; on the other hand, the SVM, MLP, and Na&#x000EF;ve Bayes algorithms obtained <italic>p</italic>-values (although higher than the corrected threshold) that indicate a possible significant difference to the 5-SBC model, which could be interpreted as marginal evidence in exploratory contexts.</p>
<table-wrap position="float" id="T9">
<label>Table 9</label>
<caption><p>Results obtained by Holm&#x00027;s <italic>post-hoc</italic> test.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold><italic>i</italic></bold>.</th>
<th valign="top" align="center"><bold>Algorithm</bold></th>
<th valign="top" align="center"><bold>z<bold> &#x0003D; (</bold>R<sub>0</sub>&#x02212;R<sub>i</sub><bold>)</bold>/SE</bold></th>
<th valign="top" align="center"><bold><italic>p</italic></bold></th>
<th valign="top" align="center"><bold>Holm</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">8</td>
<td valign="top" align="center">3-NN</td>
<td valign="top" align="center">3.695042</td>
<td valign="top" align="center">0.000220</td>
<td valign="top" align="center">0.00625</td>
</tr>
<tr>
<td valign="top" align="left">7</td>
<td valign="top" align="center">1-NN</td>
<td valign="top" align="center">3.579572</td>
<td valign="top" align="center">0.000344</td>
<td valign="top" align="center">0.00714</td>
</tr>
<tr>
<td valign="top" align="left">6</td>
<td valign="top" align="center">C4.5</td>
<td valign="top" align="center">2.540341</td>
<td valign="top" align="center">0.011074</td>
<td valign="top" align="center">0.00833</td>
</tr>
<tr>
<td valign="top" align="left">5</td>
<td valign="top" align="center">Random Forest</td>
<td valign="top" align="center">2.078461</td>
<td valign="top" align="center">0.037667</td>
<td valign="top" align="center">0.01000</td>
</tr>
<tr>
<td valign="top" align="left">4</td>
<td valign="top" align="center">SVM</td>
<td valign="top" align="center">1.876388</td>
<td valign="top" align="center">0.060602</td>
<td valign="top" align="center">0.01250</td>
</tr>
<tr>
<td valign="top" align="left">3</td>
<td valign="top" align="center">MLP</td>
<td valign="top" align="center">1.616581</td>
<td valign="top" align="center">0.105969</td>
<td valign="top" align="center">0.01666</td>
</tr>
<tr>
<td valign="top" align="left">2</td>
<td valign="top" align="center">Na&#x000EF;ve Bayes</td>
<td valign="top" align="center">1.587713</td>
<td valign="top" align="center">0.112351</td>
<td valign="top" align="center">0.02500</td>
</tr>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="center">3-SBC</td>
<td valign="top" align="center">0.692820</td>
<td valign="top" align="center">0.488422</td>
<td valign="top" align="center">0.05000</td>
</tr></tbody>
</table>
</table-wrap>
<p>After presenting the experiments, it was observed that the proposed algorithm obtained competitive results. This conclusion is supported by statistically significant differences in the n-SBC algorithm&#x00027;s observed performance across two of the seven selected classifiers on the same set of classification datasets.</p>
<p>Consequently, the results corroborate the hypothesis that the proposed n-SBC algorithm is indeed competitive for classification and disease prediction, as the majority of the datasets used focus on detecting different diseases.</p>
</sec>
</sec>
<sec id="s5">
<label>5</label>
<title>Conclusion and future research</title>
<p>In this research work, a new proposed model, n-SBC (n Similarity Binary Classifier), was presented, along with an experimental analysis to verify its effectiveness against other state-of-the-art algorithms on datasets related to medicine.</p>
<p>Similarly, the advantages of the proposed model were described, as were its simplicity, explainability, and its ability to address imbalance, a data complexity that is very common in the literature.</p>
<p>The detailed results in this research, presented in Section 4, highlight the capacity of the proposed algorithm, specifically the version 5-SBC, due to its competitive performance compared to other popular classification algorithms in the literature. Similarly, this research explores and presents in detail a new approach that uses the similarity between binary strings as the basis for a machine learning model while maintaining simplicity and effectiveness. Above all, the proposed novel algorithm promotes the research and application of explainable AI, which is of great contribution to specific areas, such as health or finance.</p>
<p>The proposed model has a limitation in handling pattern cardinality, as it converts patterns to binary strings, which can increase computational complexity during classification. Therefore, as future research, it is proposed to develop a method or pursue a completely new approach that can solve the problem identified in the proposed model while maintaining its simplicity, explainability, and performance. It is under consideration for implementation and demonstrates a novel similarity measure that can improve the model&#x00027;s performance while maintaining the algorithm&#x00027;s simplicity and explainability. On the other hand, an important objective is to apply the proposed n-SBC model to image classification tasks, specifically to medical images (x-ray mainly), due to its ease in preprocessing to adapt it and be able to use the n-SBC model with evolutionary algorithms or metaheuristic processes, with the goal of optimizing the model&#x00027;s performance. Another aspect to consider in future studies is analyzing the model&#x00027;s behavior on datasets with outliers to evaluate its robustness and adaptability to more complex, noisy scenarios. Finally, we plan to extend n-SBC by incorporating the three-way decision (3WD) rule based on model margin to explicitly handle classification uncertainty, compare granular 3WD variants, and report risk-coverage improvements on medical data.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: <ext-link ext-link-type="uri" xlink:href="https://archive.ics.uci.edu/datasets">https://archive.ics.uci.edu/datasets</ext-link>.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>OV-G: Visualization, Writing &#x02013; review &#x00026; editing, Investigation, Writing &#x02013; original draft. AA-P: Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing, Formal analysis, Methodology. CY-M: Writing &#x02013; review &#x00026; editing, Writing &#x02013; original draft, Conceptualization, Supervision.</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
<p>The author CY-M declared that they were an editorial board member of Frontiers, at the time of submission. This had no impact on the peer review process and the final decision.</p>
</sec>
<sec sec-type="ai-statement" id="s9">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="s11">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/frai.2025.1610856/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/frai.2025.1610856/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Abdullah</surname> <given-names>D. M.</given-names></name> <name><surname>Abdulazeez</surname> <given-names>A. M.</given-names></name></person-group> (<year>2021</year>). <article-title>Machine learning applications based on Svm classification a review</article-title>. <source>Qubahan Acad. J.</source> <volume>1</volume>, <fpage>81</fpage>&#x02013;<lpage>90</lpage>. doi: <pub-id pub-id-type="doi">10.48161/qaj.v1n2a50</pub-id></mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Adam</surname> <given-names>S. P.</given-names></name> <name><surname>Alexandropoulos</surname> <given-names>S-. A. N.</given-names></name> <name><surname>Pardalos</surname> <given-names>P. M.</given-names></name> <name><surname>Vrahatis</surname> <given-names>M. N.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;No free lunch theorem: a review,&#x0201D;</article-title> in <source>Approximation Optimization: Algorithms, Complexity Applications</source> (<publisher-loc>Berlin</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>57</fpage>&#x02013;<lpage>82</lpage>.</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Agrell</surname> <given-names>E.</given-names></name> <name><surname>Lassing</surname> <given-names>J.</given-names></name> <name><surname>Strom</surname> <given-names>E. G.</given-names></name> <name><surname>Ottosson</surname> <given-names>T.</given-names></name></person-group> (<year>2004</year>). <article-title>On the optimality of the binary reflected gray code</article-title>. <source>Eee Trans. Inform. Theor.</source> <volume>50</volume>, <fpage>3170</fpage>&#x02013;<lpage>3182</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TIT.2004.838367</pub-id></mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ahsan</surname> <given-names>M. M.</given-names></name> <name><surname>Luna</surname> <given-names>S. A.</given-names></name> <name><surname>Siddique</surname> <given-names>Z.</given-names></name></person-group> (<year>2022</year>). <article-title>Machine-learning-based disease diagnosis: a comprehensive review</article-title>. <source>Healthcare</source>. <volume>10</volume>:<fpage>541</fpage>. doi: <pub-id pub-id-type="doi">10.3390/healthcare10030541</pub-id><pub-id pub-id-type="pmid">35327018</pub-id></mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Amygdalos</surname> <given-names>I.</given-names></name> <name><surname>M&#x000FC;ller-Franzes</surname> <given-names>G.</given-names></name> <name><surname>Bednarsch</surname> <given-names>J.</given-names></name> <name><surname>Czigany</surname> <given-names>Z.</given-names></name> <name><surname>Ulmer</surname> <given-names>T. F.</given-names></name> <name><surname>Bruners</surname> <given-names>P.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Novel machine learning algorithm can identify patients at risk of poor overall survival following curative resection for colorectal liver metastases</article-title>. <source>J. Hepato-Biliary-Pancreat. Sci.</source> <volume>30</volume>, <fpage>602</fpage>&#x02013;<lpage>614</lpage>. doi: <pub-id pub-id-type="doi">10.1002/jhbp.1249</pub-id><pub-id pub-id-type="pmid">36196525</pub-id></mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Anderson</surname> <given-names>J. A.</given-names></name></person-group> (<year>1972</year>). <article-title>A simple neural network generating an interactive memory</article-title>. <source>Math. Biosci.</source> <volume>14</volume>, <fpage>197</fpage>&#x02013;<lpage>220</lpage>. doi: <pub-id pub-id-type="doi">10.1016/0025-5564(72)90075-2</pub-id></mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bhargavi</surname> <given-names>P.</given-names></name> <name><surname>Jyothi</surname> <given-names>S.</given-names></name></person-group> (<year>2009</year>). <article-title>Applying naive bayes data mining technique for classification of agricultural land soils</article-title>. <source>J Int. J. Comput. Sci. Netw. Secur.</source> <volume>9</volume>, <fpage>117</fpage>&#x02013;<lpage>122</lpage>.</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bhat</surname> <given-names>G. S.</given-names></name> <name><surname>Savage</surname> <given-names>C. D.</given-names></name></person-group> (<year>1996</year>). <article-title>Balanced gray codes</article-title>. <source>Electron. J. Combinat.</source> <volume>3</volume>:<fpage>R25</fpage>. doi: <pub-id pub-id-type="doi">10.37236/1249</pub-id></mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bian</surname> <given-names>K.</given-names></name> <name><surname>Priyadarshi</surname> <given-names>R.</given-names></name></person-group> (<year>2024</year>). <article-title>Machine learning optimization techniques: a survey, classification, challenges, and future research issues</article-title>. <source>Arch. Comput. Methods Eng.</source> <volume>31</volume>, <fpage>4209</fpage>&#x02013;<lpage>4233</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11831-024-10110-w</pub-id></mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bookstein</surname> <given-names>A.</given-names></name> <name><surname>Kulyukin</surname> <given-names>V. A.</given-names></name> <name><surname>Raita</surname> <given-names>T.</given-names></name></person-group> (<year>2002</year>). <article-title>Generalized Hamming Distance</article-title>. <source>Inf. Retr. Boston.</source> <volume>5</volume>, <fpage>353</fpage>&#x02013;<lpage>375</lpage>. doi: <pub-id pub-id-type="doi">10.1023/A:1020499411651</pub-id></mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bui</surname> <given-names>D. T.</given-names></name> <name><surname>Khosravi</surname> <given-names>K.</given-names></name> <name><surname>Tiefenbacher</surname> <given-names>J.</given-names></name> <name><surname>Nguyen</surname> <given-names>H.</given-names></name> <name><surname>Kazakis</surname> <given-names>N.</given-names></name></person-group> (<year>2020</year>). <article-title>Improving prediction of water quality indices using novel hybrid machine-learning algorithms</article-title>. <source>Sci. Total Env.</source> <volume>721</volume>:<fpage>137612</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.scitotenv.2020.137612</pub-id><pub-id pub-id-type="pmid">32169637</pub-id></mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cortes</surname> <given-names>C.</given-names></name></person-group> (<year>1995</year>). <article-title>Support-vector networks</article-title>. <source>Machine Learn</source>. <volume>20</volume>, <fpage>273</fpage>&#x02013;<lpage>297</lpage>. doi: <pub-id pub-id-type="doi">10.1023/A:1022627411411</pub-id></mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Costa</surname> <given-names>V. G.</given-names></name> <name><surname>Pedreira</surname> <given-names>C. E.</given-names></name></person-group> (<year>2023</year>). <article-title>Recent advances in decision trees: an updated survey</article-title>. <source>Artif. Intell. Rev.</source> <volume>56</volume>, <fpage>4765</fpage>&#x02013;<lpage>4800</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10462-022-10275-5</pub-id></mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cover</surname> <given-names>T.</given-names></name> <name><surname>Hart</surname> <given-names>P.</given-names></name></person-group> (<year>1967</year>). <article-title>Nearest Neighbor Pattern Classification</article-title>. <source>J IEEE Transac. Inform. Theor.</source> <volume>13</volume>, <fpage>21</fpage>&#x02013;<lpage>27</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TIT.1967.1053964</pub-id></mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>De Sa</surname> <given-names>J. M.</given-names></name></person-group> (<year>2012</year>). <source>Pattern Recognition: Concepts, Methods and Applications</source>. Berlin: Springer Science and Business Media.</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Derrac</surname> <given-names>J.</given-names></name> <name><surname>Garcia</surname> <given-names>S.</given-names></name> <name><surname>Sanchez</surname> <given-names>L.</given-names></name> <name><surname>Herrera</surname> <given-names>F.</given-names></name></person-group> (<year>2015</year>). <article-title>Keel data-mining software tool: data set repository, integration of algorithms and experimental analysis framework</article-title>. <source>Mult. Valued Logic. Soft. Comput.</source> <volume>17</volume>, <fpage>255</fpage>&#x02013;<lpage>287</lpage>.</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dietterich</surname> <given-names>T. G.</given-names></name> <name><surname>Bakiri</surname> <given-names>G.</given-names></name></person-group> (<year>1994</year>). <article-title>Solving multiclass learning problems via error-correcting output codes</article-title>. <source>J. Artif. Intell. Res.</source> <volume>2</volume>, <fpage>263</fpage>&#x02013;<lpage>286</lpage>. doi: <pub-id pub-id-type="doi">10.1613/jair.105</pub-id></mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Doran</surname> <given-names>R. W.</given-names></name></person-group> (<year>2007</year>). <article-title>The gray code</article-title>. <source>J. Univ. Comput. Sci</source>. <volume>13</volume>, <fpage>1573</fpage>&#x02013;<lpage>1597</lpage>. doi: <pub-id pub-id-type="doi">10.3217/jucs-013-11-1573</pub-id></mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="web"><person-group person-group-type="author"><name><surname>Dua</surname> <given-names>D.</given-names></name> <name><surname>Graff</surname> <given-names>C.</given-names></name></person-group> (<year>2019</year>). <article-title>Uci machine learning repository [<ext-link ext-link-type="uri" xlink:href="Http://Archive">Http://Archive</ext-link>. Ics. Uci. Edu/Ml]. Irvine, Ca: University Of California, School Of Information And Computer Science</article-title>. <source>IEEE Transac. Pattern Anal. Machine Intell.</source> <volume>1</volume>, <fpage>1</fpage>&#x02013;<lpage>29</lpage>.</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Duda</surname> <given-names>R. O.</given-names></name> <name><surname>Hart</surname> <given-names>P. E.</given-names></name> <name><surname>Stork</surname> <given-names>D. G.</given-names></name></person-group> (<year>2001</year>). <source>Pattern Classification, Second Edition Edition.</source> <publisher-loc>Hoboken, NJ</publisher-loc>: <publisher-name>John Willey and Sons Inc</publisher-name>.</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Friedman</surname> <given-names>M.</given-names></name></person-group> (<year>1937</year>). <article-title>The use of ranks to avoid the assumption of normality implicit in the analysis of variance</article-title>. <source>J. Am. Stat. Assoc.</source> <volume>32</volume>, <fpage>675</fpage>&#x02013;<lpage>701</lpage>. doi: <pub-id pub-id-type="doi">10.1080/01621459.1937.10503522</pub-id></mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Gaitanis</surname> <given-names>N.</given-names></name> <name><surname>Kapogianopoulos</surname> <given-names>G.</given-names></name> <name><surname>Karras</surname> <given-names>D.</given-names></name></person-group> (<year>1993</year>). <article-title>&#x0201C;Pattern classification using a generalised hamming distance metric,&#x0201D;</article-title> in <source>Proceedings Of 1993 International Conference On Neural Networks (Ijcnn-93-Nagoya, Japan)</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1293</fpage>&#x02013;<lpage>1296</lpage>.</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Galli</surname> <given-names>C.</given-names></name> <name><surname>Cusano</surname> <given-names>C.</given-names></name> <name><surname>Meleti</surname> <given-names>M.</given-names></name> <name><surname>Donos</surname> <given-names>N.</given-names></name> <name><surname>Calciolari</surname> <given-names>E.</given-names></name></person-group> (<year>2024</year>). <article-title>Topic modeling for faster literature screening using transformer-based embeddings</article-title>. <source>Metrics</source> <volume>2</volume>:<fpage>2</fpage>. doi: <pub-id pub-id-type="doi">10.3390/metrics1010002</pub-id></mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Garc&#x000ED;a</surname> <given-names>S.</given-names></name> <name><surname>Fern&#x000E1;ndez</surname> <given-names>A.</given-names></name> <name><surname>Luengo</surname> <given-names>J.</given-names></name> <name><surname>Herrera</surname> <given-names>F.</given-names></name></person-group> (<year>2010a</year>). <article-title>Advanced nonparametric tests for multiple comparisons in the design of experiments in computational intelligence and data mining: experimental analysis of power</article-title>. <source>Inf. Sci.</source> <volume>180</volume>, <fpage>2044</fpage>&#x02013;<lpage>2064</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ins.2009.12.010</pub-id></mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Garc&#x000ED;a</surname> <given-names>V.</given-names></name> <name><surname>Mollineda</surname> <given-names>R. A.</given-names></name> <name><surname>S&#x000E1;nchez</surname> <given-names>J. S.</given-names></name></person-group> (<year>2010b</year>). <article-title>&#x0201C;Theoretical analysis of a performance measure for imbalanced data,&#x0201D;</article-title> in <source>2010 20th International Conference on Pattern Recognition</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>617</fpage>&#x02013;<lpage>620</lpage>. doi: <pub-id pub-id-type="doi">10.1109/ICPR.2010.156</pub-id></mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Goodall</surname> <given-names>W.</given-names></name></person-group> (<year>1951</year>). <article-title>Television by pulse code modulation</article-title>. <source>Bell Syst. Tech. J.</source> <volume>30</volume>, <fpage>33</fpage>&#x02013;<lpage>49</lpage>. doi: <pub-id pub-id-type="doi">10.1002/j.1538-7305.1951.tb01365.x</pub-id></mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hall</surname> <given-names>M.</given-names></name> <name><surname>Frank</surname> <given-names>E.</given-names></name> <name><surname>Holmes</surname> <given-names>G.</given-names></name> <name><surname>Pfahringer</surname> <given-names>B.</given-names></name> <name><surname>Reutemann</surname> <given-names>P.</given-names></name> <name><surname>Witten</surname> <given-names>I. H.</given-names></name> <etal/></person-group>. (<year>2009</year>). <article-title>The weka data mining software: an update</article-title>. <source>Acm Sigkdd Explor. Newslett.</source> <volume>11</volume>, <fpage>10</fpage>&#x02013;<lpage>18</lpage>. doi: <pub-id pub-id-type="doi">10.1145/1656274.1656278</pub-id></mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hissou</surname> <given-names>H.</given-names></name> <name><surname>Benkirane</surname> <given-names>S.</given-names></name> <name><surname>Guezzaz</surname> <given-names>A.</given-names></name> <name><surname>Azrour</surname> <given-names>M.</given-names></name> <name><surname>Beni-Hssane</surname> <given-names>A.</given-names></name></person-group> (<year>2023</year>). <article-title>A novel machine learning approach for solar radiation estimation</article-title>. <source>Sustainability</source> <volume>15</volume>:<fpage>10609</fpage>. doi: <pub-id pub-id-type="doi">10.3390/su151310609</pub-id></mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hoffmann</surname> <given-names>H.</given-names></name></person-group> (<year>2019</year>). <article-title>Sparse associative memory</article-title>. <source>Neural Comput.</source> <volume>31</volume>, <fpage>998</fpage>&#x02013;<lpage>1014</lpage>. doi: <pub-id pub-id-type="doi">10.1162/neco_a_01181</pub-id><pub-id pub-id-type="pmid">30883276</pub-id></mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Holm</surname> <given-names>S.</given-names></name></person-group> (<year>1979</year>). <article-title>A simple sequentially rejective multiple test procedure</article-title>. <source>Scand. J. Statistics</source> <volume>6</volume>, <fpage>65</fpage>&#x02013;<lpage>70</lpage>. doi: <pub-id pub-id-type="doi">10.2307/4615733</pub-id></mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hopfield</surname> <given-names>J. J.</given-names></name></person-group> (<year>1982</year>). <article-title>Neural networks and physical systems with emergent collective computational abilities</article-title>. <source>Proc. Nat. Acad. Sci.</source> <volume>79</volume>, <fpage>2554</fpage>&#x02013;<lpage>2558</lpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.79.8.2554</pub-id><pub-id pub-id-type="pmid">6953413</pub-id></mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ibrahim</surname> <given-names>I.</given-names></name> <name><surname>Abdulazeez</surname> <given-names>A.</given-names></name></person-group> (<year>2021</year>). <article-title>The role of machine learning algorithms for diagnosing diseases</article-title>. <source>J. Appl. Sci. Technol. Trends</source> <volume>2</volume>, <fpage>10</fpage>&#x02013;<lpage>19</lpage>. doi: <pub-id pub-id-type="doi">10.38094/jastt20179</pub-id></mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Janani</surname> <given-names>R.</given-names></name> <name><surname>Vijayarani</surname> <given-names>S.</given-names></name></person-group> (<year>2019</year>). <article-title>Text document clustering using spectral clustering algorithm with particle swarm optimization</article-title>. <source>J. Expert Syst. Appl.</source> <volume>134</volume>, <fpage>192</fpage>&#x02013;<lpage>200</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.eswa.2019.05.030</pub-id></mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kohonen</surname> <given-names>T.</given-names></name></person-group> (<year>1972</year>). <article-title>Correlation matrix memories</article-title>. <source>IEEE Transac. Comput.</source> <volume>100</volume>, <fpage>353</fpage>&#x02013;<lpage>359</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TC.1972.5008975</pub-id></mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kumar</surname> <given-names>Y.</given-names></name> <name><surname>Koul</surname> <given-names>A.</given-names></name> <name><surname>Singla</surname> <given-names>R.</given-names></name> <name><surname>Ijaz</surname> <given-names>M. F.</given-names></name></person-group> (<year>2023</year>). <article-title>Artificial intelligence in disease diagnosis: a systematic literature review, synthesizing framework and future research agenda</article-title>. <source>J. Ambient Intell. Humanized Comput.</source> <volume>14</volume>, <fpage>8459</fpage>&#x02013;<lpage>8486</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s12652-021-03612-z</pub-id><pub-id pub-id-type="pmid">35039756</pub-id></mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>LeCun</surname> <given-names>Y.</given-names></name> <name><surname>Bengio</surname> <given-names>Y.</given-names></name> <name><surname>Hinton</surname> <given-names>G.</given-names></name></person-group> (<year>2015</year>). <article-title>Deep learning</article-title>. <source>Nature</source> <volume>521</volume>, <fpage>436</fpage>&#x02013;<lpage>444</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nature14539</pub-id></mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>L&#x000F3;pez</surname> <given-names>V.</given-names></name> <name><surname>Fern&#x000E1;ndez</surname> <given-names>A.</given-names></name> <name><surname>Garc&#x000ED;a</surname> <given-names>S.</given-names></name> <name><surname>Palade</surname> <given-names>V.</given-names></name> <name><surname>Herrera</surname> <given-names>F.</given-names></name></person-group> (<year>2013</year>). <article-title>An insight into classification with imbalanced data: empirical results and current trends on using data intrinsic characteristics</article-title>. <source>Inf. Sci.</source> <volume>250</volume>, <fpage>113</fpage>&#x02013;<lpage>141</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ins.2013.07.007</pub-id></mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Luna-Ortiz</surname> <given-names>I.</given-names></name> <name><surname>Aldape-P&#x000E9;rez</surname> <given-names>M.</given-names></name> <name><surname>Uriarte-Arcia</surname> <given-names>A. V.</given-names></name> <name><surname>Rodr&#x000ED;guez-Molina</surname> <given-names>A.</given-names></name> <name><surname>Alarc&#x000F3;n-Paredes</surname> <given-names>A.</given-names></name> <name><surname>Ventura-Molina</surname> <given-names>E.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Parkinson&#x00027;s disease detection from voice recordings using associative memories</article-title>. <source>Healthcare</source> <volume>11</volume>:<fpage>1601</fpage>. doi: <pub-id pub-id-type="doi">10.3390/healthcare11111601</pub-id><pub-id pub-id-type="pmid">37297740</pub-id></mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>McCulloch</surname> <given-names>W. S.</given-names></name> <name><surname>Pitts</surname> <given-names>W.</given-names></name></person-group> (<year>1943</year>). <article-title>A logical calculus of the ideas immanent in nervous activity</article-title>. <source>Bull. Math. Biophys.</source> <volume>5</volume>, <fpage>115</fpage>&#x02013;<lpage>133</lpage>. doi: <pub-id pub-id-type="doi">10.1007/BF02478259</pub-id><pub-id pub-id-type="pmid">2185863</pub-id></mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Misra</surname> <given-names>P.</given-names></name> <name><surname>Yadav</surname> <given-names>A. S.</given-names></name></person-group> (<year>2020</year>). <article-title>Improving the classification accuracy using recursive feature elimination with cross-validation</article-title>. <source>Int. J. Emerg. Technol.</source> <volume>11</volume>, <fpage>659</fpage>&#x02013;<lpage>665</lpage>.</mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Moreno-Ibarra</surname> <given-names>M-. A.</given-names></name> <name><surname>Villuendas-Rey</surname> <given-names>Y.</given-names></name> <name><surname>Lytras</surname> <given-names>M. D.</given-names></name> <name><surname>Y&#x000E1;&#x000F1;ez-M&#x000E1;rquez</surname> <given-names>C.</given-names></name> <name><surname>Salgado-Ram&#x000ED;rez</surname> <given-names>J-. C.</given-names></name></person-group> (<year>2021</year>). <article-title>Classification of diseases using machine learning algorithms: a comparative study</article-title>. <source>Mathematics</source> <volume>9</volume>:<fpage>1817</fpage>. doi: <pub-id pub-id-type="doi">10.3390/math9151817</pub-id></mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nakatsu</surname> <given-names>R. T.</given-names></name></person-group> (<year>2020</year>). <article-title>An evaluation of four resampling methods used in machine learning classification</article-title>. <source>IEEE Intell. Syst.</source> <volume>36</volume>, <fpage>51</fpage>&#x02013;<lpage>57</lpage>. doi: <pub-id pub-id-type="doi">10.1109/MIS.2020.2978066</pub-id></mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Norouzi</surname> <given-names>M.</given-names></name> <name><surname>Fleet</surname> <given-names>D. J.</given-names></name> <name><surname>Salakhutdinov</surname> <given-names>R. R.</given-names></name></person-group> (<year>2012</year>). <article-title>&#x0201C;Hamming distance metric learning,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems, vol. 25 (NIPS 2012)</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>Curran Associates, Inc</publisher-name>.</mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nozari</surname> <given-names>H.</given-names></name> <name><surname>Ghahremani-Nahr</surname> <given-names>J.</given-names></name> <name><surname>Szmelter-Jarosz</surname> <given-names>A.</given-names></name></person-group> (<year>2024</year>). <article-title>&#x0201C;AI and machine learning for real-world problems,&#x0201D;</article-title> in <source>Advances in Computers</source>. Amsterdam: Elsevier.</mixed-citation>
</ref>
<ref id="B45">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Pappalardo</surname> <given-names>F.</given-names></name> <name><surname>Calonaci</surname> <given-names>C.</given-names></name> <name><surname>Pennisi</surname> <given-names>M.</given-names></name> <name><surname>Mastriani</surname> <given-names>E.</given-names></name> <name><surname>Motta</surname> <given-names>S.</given-names></name></person-group> (<year>2009</year>). <article-title>&#x0201C;Hamfast: fast hamming distance computation,&#x0201D;</article-title> in <source>2009 WRI World Congress on Computer Science and Information Engineering</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>IEEE</publisher-name>), 569&#x02013;572.</mixed-citation>
</ref>
<ref id="B46">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Quinlan</surname> <given-names>J. R.</given-names></name></person-group> (<year>1990</year>). <article-title>Decision trees and decision-making</article-title>. <source>J. IEEE Transac. Syst. Man Cybernet.</source> <volume>20</volume>, <fpage>339</fpage>&#x02013;<lpage>346</lpage>. doi: <pub-id pub-id-type="doi">10.1109/21.52545</pub-id></mixed-citation>
</ref>
<ref id="B47">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rana</surname> <given-names>M.</given-names></name> <name><surname>Bhushan</surname> <given-names>M.</given-names></name></person-group> (<year>2023</year>). <article-title>Machine learning and deep learning approach for medical image analysis: diagnosis to detection</article-title>. <source>Multimedia Tools Appl.</source> <volume>82</volume>, <fpage>26731</fpage>&#x02013;<lpage>26769</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11042-022-14305-w</pub-id><pub-id pub-id-type="pmid">36588765</pub-id></mixed-citation>
</ref>
<ref id="B48">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rane</surname> <given-names>N.</given-names></name> <name><surname>Choudhary</surname> <given-names>S. P.</given-names></name> <name><surname>Rane</surname> <given-names>J.</given-names></name></person-group> (<year>2024</year>). <article-title>Ensemble deep learning and machine learning: applications, opportunities, challenges, and future directions</article-title>. <source>Stud. Med. Health Sci.</source> <volume>1</volume>, <fpage>18</fpage>&#x02013;<lpage>41</lpage>. doi: <pub-id pub-id-type="doi">10.48185/smhs.v1i2.1225</pub-id></mixed-citation>
</ref>
<ref id="B49">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rasool</surname> <given-names>S.</given-names></name> <name><surname>Husnain</surname> <given-names>A.</given-names></name> <name><surname>Saeed</surname> <given-names>A.</given-names></name> <name><surname>Gill</surname> <given-names>A. Y.</given-names></name> <name><surname>Hussain</surname> <given-names>H. K.</given-names></name></person-group> (<year>2023</year>). <article-title>Harnessing predictive power: exploring the crucial role of machine learning in early disease detection</article-title>. <source>Jurihum: Jurnal Inovasi Dan Humaniora</source> <volume>1</volume>, <fpage>302</fpage>&#x02013;<lpage>315</lpage>.</mixed-citation>
</ref>
<ref id="B50">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rosenblatt</surname> <given-names>F.</given-names></name></person-group> (<year>1958</year>). <article-title>The perceptron: a probabilistic model for information storage and organization in the brain</article-title>. <source>Psychol. Rev.</source> <volume>65</volume>:<fpage>386</fpage>. doi: <pub-id pub-id-type="doi">10.1037/h0042519</pub-id><pub-id pub-id-type="pmid">13602029</pub-id></mixed-citation>
</ref>
<ref id="B51">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sarker</surname> <given-names>I. H.</given-names></name></person-group> (<year>2021</year>). <article-title>Machine learning: algorithms, real-world applications and research directions</article-title>. <source>SN Comput. Sci</source>. <volume>2</volume>:<fpage>160</fpage>. doi: <pub-id pub-id-type="doi">10.1007/s42979-021-00592-x</pub-id><pub-id pub-id-type="pmid">33778771</pub-id></mixed-citation>
</ref>
<ref id="B52">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>&#x00160;arkovskis</surname> <given-names>S.</given-names></name> <name><surname>Jer&#x00161;ovs</surname> <given-names>A.</given-names></name> <name><surname>Kolosovs</surname> <given-names>D.</given-names></name> <name><surname>Grabs</surname> <given-names>E.</given-names></name></person-group> (<year>2017</year>). <article-title>Encoder improvement for simple amplitude fully parallel classifiers based on grey codes</article-title>. <source>Procedia Eng.</source> <volume>178</volume>, <fpage>604</fpage>&#x02013;<lpage>614</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.proeng.2017.01.119</pub-id></mixed-citation>
</ref>
<ref id="B53">
<mixed-citation publication-type="web"><person-group person-group-type="author"><name><surname>Sharifani</surname> <given-names>K.</given-names></name> <name><surname>Amini</surname> <given-names>M.</given-names></name></person-group> (<year>2023</year>). <article-title>Machine learning and deep learning: a review of methods and applications</article-title>. <source>World Inform. Technol. Eng. J.</source> <volume>10</volume>, <fpage>3897</fpage>&#x02013;<lpage>3904</lpage>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://ssrn.com/abstract=4458723">https://ssrn.com/abstract=4458723</ext-link> (Accessed March 10, 2024).</mixed-citation>
</ref>
<ref id="B54">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shehadeh</surname> <given-names>A.</given-names></name> <name><surname>Alshboul</surname> <given-names>O.</given-names></name> <name><surname>Al Mamlook</surname> <given-names>R. E.</given-names></name> <name><surname>Hamedat</surname> <given-names>O.</given-names></name></person-group> (<year>2021</year>). <article-title>Machine learning models for predicting the residual value of heavy construction equipment: an evaluation of modified decision tree, Lightgbm, And Xgboost Regression</article-title>. <source>Autom. Constr.</source> <volume>129</volume>:<fpage>103827</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.autcon.2021.103827</pub-id></mixed-citation>
</ref>
<ref id="B55">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Steinbuch</surname> <given-names>K.</given-names></name></person-group> (<year>1961</year>). <article-title>Die Lernmatrix</article-title>. <source>Kybernetik</source> <volume>1</volume>, <fpage>36</fpage>&#x02013;<lpage>45</lpage>. doi: <pub-id pub-id-type="doi">10.1007/BF00293853</pub-id></mixed-citation>
</ref>
<ref id="B56">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Talib</surname> <given-names>S. A.</given-names></name></person-group> (<year>2018</year>). <article-title>New strategies for associative memories</article-title>. <source>Eng. Technol. J.</source> <volume>36</volume>, <fpage>207</fpage>&#x02013;<lpage>212</lpage>. doi: <pub-id pub-id-type="doi">10.30684/etj.36.2A.13</pub-id></mixed-citation>
</ref>
<ref id="B57">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Vayadande</surname> <given-names>K.</given-names></name></person-group> (<year>2024</year>). <article-title>Innovative approaches for skin disease identification in machine learning: a comprehensive study</article-title>. <source>Oral. Oncol. Rep</source>. <volume>10</volume>:<fpage>100365</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.oor.2024.100365</pub-id></mixed-citation>
</ref>
<ref id="B58">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Velazquez-Rodriguez</surname> <given-names>J-. L.</given-names></name> <name><surname>Villuendas-Rey</surname> <given-names>Y.</given-names></name> <name><surname>Camacho-Nieto</surname> <given-names>O.</given-names></name> <name><surname>Yanez-Marquez</surname> <given-names>C. J. M.</given-names></name></person-group> (<year>2020</year>). <article-title>A novel and simple mathematical transform improves the perfomance of lernmatrix in pattern classification</article-title>. <source>Mathematics</source> <volume>8</volume>:<fpage>732</fpage>. doi: <pub-id pub-id-type="doi">10.3390/math8050732</pub-id></mixed-citation>
</ref>
<ref id="B59">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Willshaw</surname> <given-names>D. J.</given-names></name> <name><surname>Buneman</surname> <given-names>O. P.</given-names></name> <name><surname>Longuet-Higgins</surname> <given-names>H. C.</given-names></name></person-group> (<year>1969</year>). <article-title>Non-holographic associative memory</article-title>. <source>Nature</source> <volume>222</volume>, <fpage>960</fpage>&#x02013;<lpage>962</lpage>. doi: <pub-id pub-id-type="doi">10.1038/222960a0</pub-id><pub-id pub-id-type="pmid">5789326</pub-id></mixed-citation>
</ref>
<ref id="B60">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wolpert</surname> <given-names>D. H.</given-names></name> <name><surname>Macready</surname> <given-names>W. G.</given-names></name></person-group> (<year>1997</year>). <article-title>No free lunch theorems for optimization</article-title>. <source>IEEE Transac. Evol. Comput.</source> <volume>1</volume>, <fpage>67</fpage>&#x02013;<lpage>82</lpage>. doi: <pub-id pub-id-type="doi">10.1109/4235.585893</pub-id></mixed-citation>
</ref>
<ref id="B61">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wong</surname> <given-names>T-. T.</given-names></name></person-group> (<year>2015</year>). <article-title>Performance evaluation of classification algorithms by <italic>K-</italic>fold and leave-one-out cross validation</article-title>. <source>Pattern Recognit.</source> <volume>48</volume>, <fpage>2839</fpage>&#x02013;<lpage>2846</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.patcog.2015.03.009</pub-id></mixed-citation>
</ref>
<ref id="B62">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xiao</surname> <given-names>W.</given-names></name> <name><surname>Luo</surname> <given-names>Z.</given-names></name> <name><surname>Hu</surname> <given-names>Q.</given-names></name></person-group> (<year>2022</year>). <article-title>A review of research on signal modulation recognition based on deep learning</article-title>. <source>Electronics</source> <volume>11</volume>:<fpage>2764</fpage>. doi: <pub-id pub-id-type="doi">10.3390/electronics11172764</pub-id></mixed-citation>
</ref>
<ref id="B63">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Y&#x000E1;&#x000F1;ez-M&#x000E1;rquez</surname> <given-names>C.</given-names></name></person-group> (<year>2020</year>). <article-title>Toward the bleaching of the black boxes: minimalist machine learning</article-title>. <source>IT Prof.</source> <volume>22</volume>, <fpage>51</fpage>&#x02013;<lpage>56</lpage>. doi: <pub-id pub-id-type="doi">10.1109/MITP.2020.2994188</pub-id></mixed-citation>
</ref>
<ref id="B64">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>G.</given-names></name> <name><surname>Ding</surname> <given-names>F.</given-names></name></person-group> (<year>2020</year>). <article-title>Associative memory optimized method on deep neural networks for image classification</article-title>. <source>Inf. Sci.</source> <volume>533</volume>, <fpage>108</fpage>&#x02013;<lpage>119</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ins.2020.05.038</pub-id></mixed-citation>
</ref>
<ref id="B65">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>L.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Tang</surname> <given-names>J.</given-names></name> <name><surname>Lu</surname> <given-names>K.</given-names></name> <name><surname>Tian</surname> <given-names>Q.</given-names></name></person-group> (<year>2013</year>). <article-title>&#x0201C;Binary code ranking with weighted hamming distance,&#x0201D;</article-title> in <source>Proceedings of the IEEE Conference Computer Vision Pattern Recognition</source>, 1586&#x02013;1593.</mixed-citation>
</ref>
<ref id="B66">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>S.</given-names></name></person-group> (<year>2021</year>). <article-title>Challenges in Knn classification</article-title>. <source>IEEE Transac. Knowl. Data Eng.</source> <volume>34</volume>, <fpage>4663</fpage>&#x02013;<lpage>4675</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TKDE.2021.3049250</pub-id></mixed-citation>
</ref>
<ref id="B67">
<mixed-citation publication-type="web"><person-group person-group-type="author"><name><surname>Zhu</surname> <given-names>F.</given-names></name> <name><surname>Ma</surname> <given-names>S.</given-names></name> <name><surname>Cheng</surname> <given-names>Z.</given-names></name> <name><surname>Zhang</surname> <given-names>X-. Y.</given-names></name> <name><surname>Zhang</surname> <given-names>Z.</given-names></name> <name><surname>Liu</surname> <given-names>C-. L.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Open-world machine learning: a review and new outlooks</article-title>. <source>arxiv</source> [Preprint] <italic>arxiv:2403.01759</italic>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/2403.01759">https://arxiv.org/abs/2403.01759</ext-link> (Accessed March 4, 2024).</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2737022/overview">Alaa Eleyan</ext-link>, American University of the Middle East, Kuwait</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1074752/overview">Reggie Gustilo</ext-link>, De La Salle University, Philippines</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1358375/overview">Vince Hooper</ext-link>, SPJ Global, United Arab Emirates</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3090415/overview">Suprativ Saha</ext-link>, JIS University, India</p>
</fn>
</fn-group>
</back>
</article>