<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Appl. Math. Stat.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Applied Mathematics and Statistics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Appl. Math. Stat.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2297-4687</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fams.2025.1634300</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Maxwell<sup>&#x000AE;</sup> an AAAA classifier well-suited to biomedical data clustering</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Gardes</surname> <given-names>Jo&#x000EB;l</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3078310"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Tchatchueng-Mbougua</surname> <given-names>Jules Brice</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<uri xlink:href="https://loop.frontiersin.org/people/2116667"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Maldivi</surname> <given-names>Christophe</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Jelassi</surname> <given-names>Mariem</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>ben Khalfallah</surname> <given-names>Houssem</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Demongeot</surname> <given-names>Jacques</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<uri xlink:href="https://loop.frontiersin.org/people/1591581"/>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Orange Laboratorys</institution>, <city>Meylan</city>, <country country="fr">France</country></aff>
<aff id="aff2"><label>2</label><institution>Centre Pasteur du Cameroun</institution>, <city>Yaound&#x000E9;</city>, <country country="cm">Cameroon</country></aff>
<aff id="aff3"><label>3</label><institution>ENSI - Ecole Nationale des Sciences de l&#x00027;Informatique, Campus Universitaire de la Manouba</institution>, <city>La Manouba</city>, <country country="tn">Tunisia</country></aff>
<aff id="aff4"><label>4</label><institution>University of Grenoble Alpes, AGEIS EA 7407, Faculty of Medicine</institution>, <city>La Tronche</city>, <country country="fr">France</country></aff>
<author-notes>
<corresp id="c001"><label>&#x0002A;</label>Correspondence: Jacques Demongeot, <email xlink:href="mailto:Jacques.Demongeot@univ-grenoble-alpes.fr">Jacques.Demongeot@univ-grenoble-alpes.fr</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2025-12-10">
<day>10</day>
<month>12</month>
<year>2025</year>
</pub-date>
<pub-date publication-format="electronic" date-type="corrected" iso-8601-date="2025-12-15">
<day>15</day>
<month>12</month>
<year>2025</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>11</volume>
<elocation-id>1634300</elocation-id>
<history>
<date date-type="received">
<day>24</day>
<month>05</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>18</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>24</day>
<month>11</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2025 Gardes, Tchatchueng-Mbougua, Maldivi, Jelassi, ben Khalfallah and Demongeot.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Gardes, Tchatchueng-Mbougua, Maldivi, Jelassi, ben Khalfallah and Demongeot</copyright-holder>
<license>
<ali:license_ref start_date="2025-12-10">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>A new classifier called Maxwell<sup>&#x000AE;</sup>, Adiabatic, Agnostic and Almost Autonomous, is presented and used to classify species according to their early occurrence in evolution.</p></sec>
<sec>
<title>Methods</title>
<p>After a precise description of all the steps of the clustering process, two examples of application are given: first, the classification of simulated genomic data, whose simulation mode is processed by an algorithm allowing the successive application of known operators having acted during the evolution of species. The clustering thus obtained makes it possible to identify correctly the genomes of species having evolved in the same ecosystem. Then, mitochondrial genomes of mammals and giant viruses associated with their bacterial or fungal targets they infect, are classified according to the same criteria.</p></sec>
<sec>
<title>Results</title>
<p>The results show a good adequacy of the obtained classifications to the evolutionary reality and a high consistency with the known knowledge on the evolution of the oldest species.</p></sec>
<sec>
<title>Discussion</title>
<p>The Maxwell<sup>&#x000AE;</sup> classifier presents a unique set of properties, adiabatic, agnostic and almost autonomous, making it particularly suitable for biomedical applications.</p></sec></abstract>
<kwd-group>
<kwd>classification</kwd>
<kwd>Maxwell<sup>&#x000AE;</sup> classifier</kwd>
<kwd>evolution</kwd>
<kwd>co-evolution cluster</kwd>
<kwd>mitochondrial genome</kwd>
<kwd>giant virus genome</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declare that no financial support was received for the research and/or publication of this article.</funding-statement>
</funding-group>
<counts>
<fig-count count="15"/>
<table-count count="0"/>
<equation-count count="3"/>
<ref-count count="55"/>
<page-count count="15"/>
<word-count count="7245"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Mathematical Biology</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>To be relevant, modeling in biology and medicine requires access to a large amount of data, often available in public databases such as NCBI [<xref ref-type="bibr" rid="B1">1</xref>]. Recently, the Covid-19 pandemic has shown that these data can feed into relevant and effective models, which can be used to explain a posteriori or predict <italic>a priori</italic> complex phenomena such as the transition between endemic and epidemic phases [<xref ref-type="bibr" rid="B2">2</xref>] or the role of quarantine and vaccination in preventing epidemic peaks [<xref ref-type="bibr" rid="B3">3</xref>&#x02013;<xref ref-type="bibr" rid="B5">5</xref>]. The crucial problem posed by access to these biomedical data, particularly genomic data, is that it is constantly increasing and requires processing using descriptive statistical techniques such as classification before being incorporated into models. The aim of this article is to propose a new classification tool called Maxwell<sup>&#x000AE;</sup> and to verify its relevance on genomic data. This new tool is a classifier perfectly suited to AI approaches in biology and medicine because of its reversibility. Its methodology is based on a lossless compression tool, the Burrows-Wheeler transform. It can classify any digital object (image, signal, document) by retaining the intermediate results of each data processing step and allowing for reverse processing, a so-called Adiabatic quality, useful in the event of a possible medico-legal trial following the computer-assisted medical decision. Since it does not require any <italic>a priori</italic> knowledge, it is said to be Agnostic. Belonging to the family of unsupervised classifiers, but requiring meta-knowledge to refine the last clustering step, it is said to be Almost Autonomous. Maxwell<sup>&#x000AE;</sup> can therefore be considered an AAAA classifier. In <bold>Section 2</bold>, we present the Maxwell<sup>&#x000AE;</sup>&#x00027;s successive operating stages, namely the lossless compression of the digital objects to be classified, then the calculation of the distances between these compressed objects allowing the construction of their clusters, and finally their identification using semantic metadata, followed by a refinement of the classification by playing on the classification thresholds. In <bold>Section 3</bold>, we propose an example of application aiming to classify species according to their antiquity in evolution. Then, in <bold>Section 4</bold>, we present a discussion on the place of Maxwell<sup>&#x000AE;</sup> among the known classification tools and finally in <bold>Section 5</bold> perspectives and conclusion.</p></sec>
<sec id="s2">
<label>2</label>
<title>Materials and methods: Maxwell<sup>&#x000AE;</sup> s operating principles</title>
<sec>
<label>2.1</label>
<title>Lossless compression</title>
<p>The first step in Maxwell<sup>&#x000AE;</sup>&#x00027;s operation is the lossless compression following an algorithm due to Burrows and Wheeler allowing to calculate a distance between compressed digital objects to classify [<xref ref-type="bibr" rid="B6">6</xref>&#x02013;<xref ref-type="bibr" rid="B11">11</xref>]. This algorithmic approach has been already partly published [<xref ref-type="bibr" rid="B12">12</xref>, <xref ref-type="bibr" rid="B13">13</xref>]. Consider two similar sequences of letters, X=BAIGNADE and Y=BADINAGE, with three mutations, I:D, G:I, and D:G, i.e., I changed to D, G to I, and D to G (<xref ref-type="fig" rid="F1">Figure 1</xref>).</p>
<fig position="float" id="F1">
<label>Figure 1</label>
<caption><p>The first step of the lossless Burrows-Wheeler compression transform of the words BAIGNADE and BADINAGE.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fams-11-1634300-g0001.tif">
<alt-text content-type="machine-generated">Diagram illustrating the Burrows-Wheeler Transform (BWT) and Run-Length Encoding (RLE) for &#x0201C;BAIGNADE&#x0201D; and &#x0201C;BADINAGE&#x0201D;. Both words are rotated to generate lists, sorted, and indexed. The third row of each list is highlighted in red. BWT results are shown as &#x0201C;3NBEADIAG&#x0201D; for &#x0201C;BAIGNADE&#x0201D; and &#x0201C;3BNEAGADI&#x0201D; for &#x0201C;BADINAGE&#x0201D;. RLE compresses both results as &#x0201C;131B1N1E1A1D1I1A1G&#x0201D;, length 18 octets.</alt-text>
</graphic>
</fig>
<p>After considering all the circular permutations of the words BAIGNADE and BADINAGE, we reorder these permutations by using the alphabetic order, then we note the rank at which appears the initial word (here three for both BAIGNADE and BADINAGE) and retain the ordered last letters of the permutations, that is NBEADIAG for BAIGNADE and BNEAGADI for BADINAGE. The concatenation of the rank with this last sequence gives the Burrows&#x02013;Wheeler transform BWT, e.g., BWT(BAIGNADE) = BWT(X) = 3NBEADIAG. The last step of compression is to calculate the length of the Run-length encoding (RLE) of BWT(X): here RLE(BWT(X)) = 131N1B1E1A1D1I1A1G, where consecutive occurrences of the same symbol are stored as a single occurrence of that symbol preceded by the count of its consecutive occurrences rather than as the original run: C<sub>X</sub> = Length(RLE(BWT<sub>(X)</sub>)) = 18 Octets. In the same way, C<sub>Y</sub> = Length(RLE(BWT(Y))) = 18 Octets. Then, we do the same for the concatenated word BAIGNADEBADINAGE, whose RLE length of its Burrows-Wheeler transform BWT(XY) is C<sub>XY</sub> = 24 Octets (<xref ref-type="fig" rid="F2">Figure 2</xref>).</p>
<fig position="float" id="F2">
<label>Figure 2</label>
<caption><p>Second step of lossless Burrows-Wheeler compression transform of the concatenated word BAIGNADEBADINAGE.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fams-11-1634300-g0002.tif">
<alt-text content-type="machine-generated">Transformation visual illustrating Burrows-Wheeler Transform steps. Left: input string rotations with characters highlighted. Middle: sorted rotations. Right: transformation result, showing BWT output and run-length encoding with compression calculation, explained as BWT(XY) and RLE(BWT(XY)) = 24 octets.</alt-text>
</graphic>
</fig>
</sec>
<sec>
<label>2.2</label>
<title>Normalized compression distance matrix</title>
<p>The concept of Normalized Compression Distance (NCD) comes from a universal approach for comparing arbitrary objects. Derived from Kolmogorov Complexity (KC), NCD offers a domain-independent alternative to specific methods (sequence alignment, image analysis, text comparison, etc.) and can be considered as a computable variant of KC. The generality of its potential applications makes it a conceptually powerful tool, but it also poses practical challenges. KC calculation is unapproachable, but forms the basis of the theory. Brillouin defined Information as a form of negentropy, measurable and costly to create and to erase [<xref ref-type="bibr" rid="B6">6</xref>]: erasing a bit of information has a minimal energy cost of 2k<sub>B</sub>Tln2, where k<sub>B</sub> is the Boltzmann constant and T the temperature [<xref ref-type="bibr" rid="B7">7</xref>]; then, Bennett introduced the notion of logical depth taking into account the &#x0201C;computation time&#x0201D; [<xref ref-type="bibr" rid="B8">8</xref>]. The Kolmogorov complexity K(X) of an object X is defined by the length of the shortest program generating X and allows to define a Normalized Information Distance (NID) between two objects X and Y:</p>
<disp-formula id="EQ1"><mml:math id="M1"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtext>NID</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>X</mml:mtext><mml:mo>,</mml:mo><mml:mtext>Y</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mtext>max</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>K</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>X</mml:mtext><mml:mo>&#x02223;</mml:mo><mml:mtext>Y</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>K</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>Y</mml:mtext><mml:mo>&#x02223;</mml:mo><mml:mtext>X</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>/</mml:mo><mml:mtext>max</mml:mtext><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(1)</label></disp-formula>
<p>where K(X|Y) is the conditional complexity [<xref ref-type="bibr" rid="B9">9</xref>, <xref ref-type="bibr" rid="B10">10</xref>]. NID is approached by Vit&#x000E1;nyi&#x00027;s NCD [<xref ref-type="bibr" rid="B11">11</xref>] calculated as follows:</p>
<disp-formula id="EQ2"><mml:math id="M2"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtext>NCD</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>X</mml:mtext><mml:mo>,</mml:mo><mml:mtext>Y</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mtext>&#x000A0;</mml:mtext><mml:mo>=</mml:mo><mml:mtext>&#x000A0;d</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>X</mml:mtext><mml:mo>,</mml:mo><mml:mtext>Y</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mtext>&#x000A0;</mml:mtext><mml:mo>=</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mtext>C</mml:mtext></mml:mrow><mml:mrow><mml:mtext>XY</mml:mtext></mml:mrow></mml:msub><mml:mtext>&#x000A0;</mml:mtext><mml:mi>m</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mtext>X</mml:mtext></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mtext>Y</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>/</mml:mo><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mtext>X</mml:mtext></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mtext>Y</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(2)</label></disp-formula>
<p>where C<sub>X</sub> is the length of the Run-Length Encoding (RLE) of the Burrows-Wheeler Transform of X, BWT(X).</p>
<p>From the calculation of all the distances between several objects, we can extract the distance matrix D, whose general term D<sub>XY</sub> is equal to d(X,Y). In the example above, the distance d(X,Y)=[C<sub>XY</sub> &#x02013; min(C<sub>X</sub>,C<sub>Y</sub>)]/max(C<sub>X</sub>,C<sub>Y</sub>)=1/3 and the matrix D is given by:</p>
<disp-formula id="EQ3"><mml:math id="M3"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtext>D</mml:mtext><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none none none none none none none none none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mtext>d</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>X</mml:mtext><mml:mo>,</mml:mo><mml:mtext>X</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd><mml:mtd><mml:mtext>d</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>X</mml:mtext><mml:mo>,</mml:mo><mml:mtext>Y</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>d</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>Y</mml:mtext><mml:mo>,</mml:mo><mml:mtext>X</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd><mml:mtd><mml:mtext>d</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>Y</mml:mtext><mml:mo>,</mml:mo><mml:mtext>Y</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr></mml:mtr></mml:mtable></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none none none none none none none none none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd><mml:mtd><mml:mn>1</mml:mn><mml:mo>/</mml:mo><mml:mn>3</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>1</mml:mn><mml:mo>/</mml:mo><mml:mn>3</mml:mn></mml:mtd><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr><mml:mtr></mml:mtr></mml:mtable></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(3</label></disp-formula>
</sec>
<sec>
<label>2.3</label>
<title>From distance matrix to clusters</title>
<p>After getting the distance matrix D, the goal of the next step is to search for homogeneous and isotropic regions in the distance matrix, following the successive calculations (<xref ref-type="fig" rid="F3">Figure 3</xref>) through 10 consecutive steps:</p>
<list list-type="simple">
<list-item><p>1) Construction of a triangulation based on the current element, the digital object X, its first neighbor Y and the first neighbor Z of Y,</p></list-item>
<list-item><p>2) Calculation of the triangle T=(X,Y,Z) area and evaluation of isotropy index S(T) using equations of <xref ref-type="fig" rid="F3">Figure 3</xref>. Formula for calculating a triangle area was discovered by Heron of Alexandria in 1st century AD [<xref ref-type="bibr" rid="B14">14</xref>],</p></list-item>
<list-item><p>3) Calculation of a mean &#x003BC; and standard deviation &#x003C3; from the triangle area A histograms then obtaining by eliminating &#x0201C;large triangles&#x0201D; whose area A is over a threshold S(A) based on the number of standard deviations retained. For example, we can exclude elements corresponding to vertices of triangles whose area is more than a threshold S(A) = &#x003BC; &#x0002B; 2&#x003C3; (<xref ref-type="fig" rid="F3">Figure 3A</xref>). In the same way, we reject vertices whose triangle is too far from equilaterality. The isotropy index <italic>Q</italic> = <inline-formula><mml:math id="M4"><mml:mfrac><mml:mrow><mml:mn>3</mml:mn><mml:msqrt><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msqrt><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:msup><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mtext>&#x000A0;</mml:mtext></mml:mrow></mml:mfrac></mml:math></inline-formula> equals 1 if the corresponding triangle is equilateral and we reject &#x0201C;distorded triangles&#x0201D; whose Q value is more than a fixed threshold S(Q) in order to ensure the distance homogeneity inside subgraphs, future cluster candidates (<xref ref-type="fig" rid="F3">Figure 3B</xref>),</p></list-item>
<list-item><p>4) Edge processing of the obtained subgraphs using area and equilaterality thresholds S(A) and S(Q) to remove edges that are &#x0201C;useless&#x0201D; to the subgraph&#x00027;s topology and identify the &#x0201C;best representative&#x0201D; vertex as the most connected or the closest to the cluster geometric barycenter depending on the preferred criterion, i.e., realizing a local optimum (max connected or min distanced),</p></list-item>
<list-item><p>5) Identification of the subgraphs with multiple best representatives, by using a Vorono&#x000EF; tessellation algorithm (from GraphViz) for detecting internal boundaries within these subgraphs,</p></list-item>
<list-item><p>6) Decision test on new triangles made from internal Vorono&#x000EF; boundaries, at the end of which thresholds of new mean and standard deviation of area and isotropy index no longer detect new internal boundaries,</p></list-item>
<list-item><p>7) Storing as &#x0201C;singleton clusters&#x0201D; all elements rejected by the previous statistical calculations,</p></list-item>
<list-item><p>8) Recall of initial process (1) performed on the singleton population to detect new clusters until stabilization of &#x0201C;singleton clusters&#x0201D; and their affectation to the closest not singleton cluster,</p></list-item>
<list-item><p>9) Identification of final clusters by using metadata,</p></list-item>
<list-item><p>10) Final validation by experts of the field with a possible cluster concatenation under semantic arguments.</p></list-item>
</list>
<fig position="float" id="F3">
<label>Figure 3</label>
<caption><p><bold>(A)</bold> Calculation of Heron area formula; <bold>(B)</bold> calculation of Isotropy index (distance to equilaterality); <bold>(C)</bold> thresholding.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fams-11-1634300-g0003.tif">
<alt-text content-type="machine-generated">Diagram illustrating geometric concepts. 
A: Heron Area formula shown with variables for distances between points X, Y, and Z, and a schematic triangle labeled with distances d(X, Y), d(Y, Z), and d(Z, X). 

B: Isotropy index Q formula equals three times the square root of three A over a squared, noted as one for equilateral triangles.

C: Two geometric diagrams. Left: Red and blue lines with note A is less than or equal to S(A). Right: Red and blue lines with note Q is less than or equal to S(Q).</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Results: example of application with the evolutionary random genetic operators with a constant rate in an evolutive ecosystem</title>
<sec>
<label>3.1</label>
<title>The genetic operators of the evolution</title>
<p>Maxwell<sup>&#x000AE;</sup> classifier is applied to genetic data from a sample of species in the three main domains appearing progressively during the evolution and evolving parallelly until present time: Archaea, Bacteria, and Eukarya (<xref ref-type="fig" rid="F4">Figure 4</xref>).</p>
<fig position="float" id="F4">
<label>Figure 4</label>
<caption><p>Phylogeny of the three domains of life&#x02014;Archaea, Bacteria, and Eukarya&#x02014;with indication of some species.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fams-11-1634300-g0004.tif">
<alt-text content-type="machine-generated">Phylogenetic tree diagram displaying the three domains of life: Archaea in red, Bacteria in green, and Eukarya in blue. Archaea branches include Thermoproteus, Pyrodictium, Methanococcus maripaludis, Thermococcus, and Halophiles. Eukarya branches include Entamoeba histolytica, animals, Homo sapiens, slime molds, fungi, plants, ciliates, flagellates, Trichomonas vaginalis, microsporidia, and diplomonads.</alt-text>
</graphic>
</fig>
<p>The first modification of the genomes described on <italic>Oenothera Chilena Grandiflora</italic> as a mutation by de Vries [<xref ref-type="bibr" rid="B15">15</xref>] was in reality a translocation (<xref ref-type="fig" rid="F5">Figure 5</xref>). This genetic change belonged to the main genetic operators, i.e., transformations of nucleotide sequences involved in the evolution of genomes, which are the following: point mutation, insertion (e.g., after reverse transcription), deletion, inversion, translocation, transposition, duplication, symmetrization, palindrome generation, permutation, and recombination (crossing-over).</p>
<fig position="float" id="F5">
<label>Figure 5</label>
<caption><p>de Vries in his garden with <italic>Oenethera Chilena Grandiflora</italic>.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fams-11-1634300-g0005.tif">
<alt-text content-type="machine-generated">Elderly man with a beard examines and tends to flowering plants in a garden, surrounded by tall, leafy blooms. He wears a suit and glasses, appearing engaged and focused.</alt-text>
</graphic>
</fig>
</sec>
<sec>
<label>3.2</label>
<title>A proposal for the first RNA</title>
<p>Many authors have proposed explanations for the first stages of life on Earth, 3.5 billion years ago. Among these, after supporters of DNA as the primordial molecule [<xref ref-type="bibr" rid="B16">16</xref>, <xref ref-type="bibr" rid="B17">17</xref>] a school gradually emerged proposing RNA as the initial molecule [<xref ref-type="bibr" rid="B18">18</xref>&#x02013;<xref ref-type="bibr" rid="B23">23</xref>], then a group proposing a third way, between DNA first and RNA first, &#x0201C;the third way of evolution&#x0201D; [<xref ref-type="bibr" rid="B24">24</xref>] in favor of an early interaction between RNAs in equilibrium between an active linear form or ring and a memory hairpin form, the first catalyzing the polymerization of the first peptides, from nucleotides and amino acids synthesized following the hypothesis of Miller [<xref ref-type="bibr" rid="B25">25</xref>]. Inspired by these works, we have proposed in a series of previous articles an RNA candidate for serving as catalyzer of the first peptide syntheses [<xref ref-type="bibr" rid="B26">26</xref>&#x02013;<xref ref-type="bibr" rid="B32">32</xref>]. The first steps for finding this RNA we will call in the following AL (for ALpha or Archetypal Loop) was supposing that it had two satisfy four criteria for optimizing the primordial catalysis of peptide synthesis. Then, the concept of a ring for the structure of AL has been considered as a sort of circular consensus capable of embedding all possible genetic encodings, with the four choice criteria summarized as follows:</p>
<list list-type="simple">
<list-item><p>1) AL must satisfy the principle &#x0201C;be as short as possible and contain at least one codon per synonymy class of the genetic code,&#x0201D;</p></list-item>
<list-item><p>2) AL codon sequence obtained with overlap after three turns of its circular form must begin with the start codon and end with the stop codon,</p></list-item>
<list-item><p>3) the AL must have a hairpin configuration in balance with its circular shape, and this hairpin must have a minimum head length (3nt) and a maximum number (9) of codon pairs,</p></list-item>
<list-item><p>4) if multiple rings possess properties (1) to (3), they must have a single barycenter for classical inter-ring distances (circular Hamming, permutation, and editing distances), i.e., the AL ring.</p></list-item>
</list>
<p>By formalizing the problem in a search for a Hamiltonian path between the nodes of a circular graph representing the 20 amino acids (<xref ref-type="fig" rid="F6">Figure 6</xref>), we get among 4<sup>22</sup> possible solutions:</p>
<list list-type="simple">
<list-item><p><sup>&#x0002A;</sup> No solution if AL contains 20 or 21 nucleotides, i.e., 20 ou 21 overlaping triplets,</p></list-item>
<list-item><p><sup>&#x0002A;</sup> 29 520 solutions if AL contains 22 nucleotides, i.e., 22 triplets ending with an END codon,</p></list-item>
<list-item><p><sup>&#x0002A;</sup> from where 25 with a maximal hairpin form (3 head free bases &#x0002B; 9 &#x000D7; 2 stem bases &#x0002B; 1 tail free base),</p></list-item>
<list-item><p><sup>&#x0002A;</sup> with only one starting with AUG, repeating AUG and being barycenter of the 24 others as the ring built from sequence AUGGUACUGCCAUUCAAGAUGA. More, it the closest to the set of all known tRNAs.</p></list-item>
</list>
<fig position="float" id="F6">
<label>Figure 6</label>
<caption><p>Hamiltonian path used on the amino-acids set to find the archetypal RNAs.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fams-11-1634300-g0006.tif">
<alt-text content-type="machine-generated">Circular diagram showing amino acids represented as black squares connected by red lines. Amino acids include Gly, Phe, Leu, Ile, Met, Val, Ser, Pro, Thr, Ala, Tyr, His, Gln, Asn, Lys, Asp, Glu, Cys, Trp, Arg. Lines indicate interactions or relationships between them.</alt-text>
</graphic>
</fig>
<p>In the following, we will use a proximity of a given genome to the RNA AL, denoted P<sub>AL</sub> Doublet [<xref ref-type="bibr" rid="B32">32</xref>] by calculating the number of standard deviations between the observed and expected numbers of motifs common with AL (here pairs of closed trimers from AL), which allows to calculate the probability of getting by chance [<xref ref-type="bibr" rid="B33">33</xref>] these traces from AL in current genomes (e.g., the mitochondrial one obtained from NCBI [<xref ref-type="bibr" rid="B1">1</xref>]).</p>
</sec>
<sec>
<label>3.3</label>
<title>The generation of synthetic genomes having been changed by a sequence of successive of genetic operators respecting a certain proportion of each</title>
<p>We developed a custom genome evolution simulator to generate artificial nucleotide sequences across multiple generations, mimicking biological diversification processes due to evolution operators. The simulation is implemented in R and is centered around the function, which takes as input a reference genome and simulates its descent through <italic>n</italic> generations.</p>
<sec>
<label>3.3.1</label>
<title>. Initial input and generation process</title>
<p>The simulation begins with a single reference genome, represented as a character string (e.g., RNA or DNA sequence). This genome constitutes Generation 1. For each subsequent generation (i = 2, ..., <italic>n</italic>), a random subset of genomes from the previous generation (i &#x02013; 1) is selected to act as parental genomes. The number of selected parents is capped at p, a user-defined parameter controlling the branching factor of the simulation. Each parent can produce descendant genomes, each of which being derived by applying a sequence of randomly selected evolutionary transformations. The number of transformations applied to each descendant is also randomly chosen between 1 and 11.</p></sec>
<sec>
<label>3.3.2</label>
<title>Implemented evolutionary operations</title>
<p>Each descendant genome is subjected to a pipeline of transformations chosen from the following set:</p>
<list list-type="bullet">
<list-item><p>Point Mutation: random substitution of individual nucleotides,</p></list-item>
<list-item><p>Insertion: addition of a random subsequence at a random position,</p></list-item>
<list-item><p>Deletion: removal of a segment of the genome,</p></list-item>
<list-item><p>Inversion: reversal of a segment&#x00027;s nucleotide order,</p></list-item>
<list-item><p>Translocation: movement of a segment from one position to another,</p></list-item>
<list-item><p>Transposition: segment is excised and reinserted at a new location,</p></list-item>
<list-item><p>Duplication: a region is copied and inserted elsewhere,</p></list-item>
<list-item><p>Symmetrization: production of the reverse-complement (for RNA sequences)</p></list-item>
<list-item><p>Palindrome generation: creation of palindromic sequences to simulate structural patterns,</p></list-item>
<list-item><p>Permutation: random shuffling of a genomic segment,</p></list-item>
<list-item><p>Recombination (Crossing-over): exchange of regions between sequences.</p></list-item>
</list>
<p>These operations are designed to replicate biologically plausible events observed in genome evolution and introduce substantial sequence variability across generations.</p></sec>
<sec>
<label>3.3.3</label>
<title>Output format and file naming convention</title>
<p>Each descendant genome is saved as a text file in the format: descendant_i_k_j, where:</p>
<list list-type="bullet">
<list-item><p>i refers to the generation number (e.g., i = 2 for Generation 2),</p></list-item>
<list-item><p>k refers to the index of the parent genome within Generation i&#x02212;1,</p></list-item>
<list-item><p>j refers to the index of the descendant genome in Generation i derived from this parent k.</p></list-item>
</list>
<p>This naming scheme ensures traceability and allows reconstruction of the full ancestral path of any genome, which is critical when evaluating hierarchical clustering or tree-based reconstruction methods such as Maxwell<sup>&#x000AE;</sup>&#x00027;s classification algorithm (<xref ref-type="fig" rid="F7">Figures 7A</xref>, <xref ref-type="fig" rid="F7">B</xref>).</p>
<fig position="float" id="F7">
<label>Figure 7</label>
<caption><p><bold>(A)</bold> Five successive generations of simulated genomes (with random action at each generation of some of the eleven evolution operators) classified by Maxwell<sup>&#x000AE;</sup>. <bold>(B)</bold> A part of the clustering showing a subtree respecting the generation order (in red) and another part mixing the generations (in blue).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fams-11-1634300-g0007.tif">
<alt-text content-type="machine-generated">Genealogical tree diagram with two sections labeled A and B. Section A shows multiple branching patterns. Section B, divided into two parts, highlights &#x0201C;Homogeneous genealogy&#x0201D; in a red box and &#x0201C;Heterogeneous genealogy&#x0201D; in a blue box, each with distinct branching structures.</alt-text>
</graphic>
</fig>
</sec>
<sec>
<label>3.3.4</label>
<title>Interpretation of the Maxwell<sup>&#x000AE;</sup> classification results</title>
<p>The hierarchical classification obtained using the Maxwell<sup>&#x000AE;</sup> algorithm (<xref ref-type="fig" rid="F7">Figures 7A</xref>, <xref ref-type="fig" rid="F7">B</xref>) was evaluated against the known genealogy of simulated genomes. In cases of homogeneous classification, the dendrogram exhibited a clear structure in which genomes derived from the same ancestor (identified by shared parent and generation indices in their filenames, e.g., descendant_i_k_j) were grouped within the same branches. This concordance suggests that the classifier successfully captured the evolutionary relationships embedded in the data, even in the presence of complex mutational events such as insertions, deletions, and transpositions. Conversely, in heterogeneous classifications, descendant genomes originating from the same parent were dispersed across multiple branches, and sequences from distant generations were occasionally grouped together (<xref ref-type="fig" rid="F8">Figure 8</xref>). This pattern indicates potential challenges for the classifier in preserving genealogical coherence when evolutionary noise increases or when sequence divergence becomes too pronounced. Such discrepancies underscore the limits of structural similarity measures in reconstructing deep or highly perturbed evolutionary histories. Overall, these results demonstrate the ability of the Maxwell<sup>&#x000AE;</sup> classifier to retrieve hierarchical structure under moderate evolutionary variation, while also highlighting the importance of controlling for transformation intensity in simulated datasets when evaluating classification robustness.</p>
<fig position="float" id="F8">
<label>Figure 8</label>
<caption><p>Examples showing the management of clusters with some not respecting the surface nor equilaterality criteria <bold>(left)</bold>, a homogeneous cluster made of members of the same descendance <bold>(middle)</bold> and singletons from clusters broken expecting their rearrangement in new clusters respecting the both criteria <bold>(right)</bold>.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fams-11-1634300-g0008.tif">
<alt-text content-type="machine-generated">Split screen image displaying a log on the left with timestamps and operational details about a process called Feed Loop, involving metadata balancing and cluster data. On the right, a table shows a list of text files named &#x0201C;descendant&#x0201D; with associated numerical values. Behind the table are gray placeholders labeled &#x0201C;No Thumbnail,&#x0201D; indicating missing images or files.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec>
<label>3.4</label>
<title>Classification of mitochondrial genomes</title>
<p>The mitochondrial genomes of 10 mammalian species were obtained from the NCBI site and their classification allowed to obtain from interspecies distance matrix four classes of size 2 (hominidae, whales, seals, and murines) and two singletons (horse and cat) of which we will only comment on two intra-class and two inter-class proximities (<xref ref-type="fig" rid="F9">Figure 9</xref>), comparing the results of Maxwell<sup>&#x000AE;</sup> to those of the NCD classical classifier [<xref ref-type="bibr" rid="B10">10</xref>].</p>
<fig position="float" id="F9">
<label>Figure 9</label>
<caption><p>Matrices of distances between different species calculated by two different classifiers, NCD on the top [<xref ref-type="bibr" rid="B10">10</xref>] and Maxwell<sup>&#x000AE;</sup> on the bottom, with indication (in red) of the human to chimpanzee distance.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fams-11-1634300-g0009.tif">
<alt-text content-type="machine-generated">Two tables titled &#x0201C;NCD&#x0201D; and &#x0201C;Maxwell&#x0201D; show numerical similarity scores between various animal names: gray seal, rat, mouse, fin whale, harbor seal, human, horse, cat, blue whale, and chimpanzee. Some numbers, specifically in red, indicate lower or notable similarity values. The arrangement displays symmetrical results in a matrix format.</alt-text>
</graphic>
</fig>
<p>The primate cluster {human, chimpanzee} is obtained in both classifiers with a distance intraclass smaller for Maxwell<sup>&#x000AE;</sup> (0.553) than for NCD (0.655). It is the same for the whale cluster {blueWhale, finWhale} (0.49 vs. 0.61). The mean interclass distance between both primate and whale cluster is smaller for Maxwell<sup>&#x000AE;</sup> (0.90166) than for NCD<sup>&#x000AE;</sup> (0.9226). Hence with Maxwell<sup>&#x000AE;</sup>, the internal homogeneity was favored over external heterogeneity, contrary to what is observed for NCD.</p>
</sec>
<sec>
<label>3.5</label>
<title>Mitochondrial genome classification and proximity to AL</title>
<p>Comparing the primate classification to equid one (<xref ref-type="fig" rid="F10">Figures 10A</xref>, <xref ref-type="fig" rid="F10">B</xref>), several points of convergence are observed:</p>
<fig position="float" id="F10">
<label>Figure 10</label>
<caption><p><bold>(A)</bold> Taxonomy of hominoides; <bold>(B)</bold> Three clusters obtained by Maxwell<sup>&#x000AE;</sup> confirming the antiquity of the species as known in classical taxonomic studies. The proximity to AL (P<sub>AL</sub> Doublet) of mitochondrial genome is indicated in red; <bold>(C)</bold> Taxonomy of Perissodactyla.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fams-11-1634300-g0010.tif">
<alt-text content-type="machine-generated">Diagram labeled &#x0201C;A&#x0201D; shows a phylogenetic tree illustrating the classification of Hominoidea, Hylobatidae, and Hominidae, including Gibbons, Orangutans, Gorillas, Chimpanzees, and Humans. Diagram labeled &#x0201C;B&#x0201D; presents a series of images with numerical scores: Gorilla (4.64), Orangutan (4.65), Gibbon (4.44), Baboon (5.84), White Rhinoceros (8.05), Indian Rhinoceros (7.20), Horse (5.89), Donkey (3.00), Human (2.44), Pigmy Chimpanzee (4.28), and Chimpanzee (4.50). &#x0201C;C&#x0201D; A separate phylogenetic tree lists various species, including rhinoceroses and equids.</alt-text>
</graphic>
</fig>
<p>- The consistency with previous studies on the evolution of mammalian families: chimpanzees are close to humans and more distant from other simians (gorilla, orangutan, gibbon and baboon) [<xref ref-type="bibr" rid="B35">35</xref>],</p>
<p>- Perissodactyla species (<xref ref-type="fig" rid="F10">Figure 10C</xref>) are in the same cluster, with rhinoceroses appearing older than equids, i.e., having a greater proximity (in red in <xref ref-type="fig" rid="F10">Figure 10B</xref>, see <xref ref-type="supplementary-material" rid="SM1">Supplementary material S1</xref>) to archetypal RNA AL [<xref ref-type="bibr" rid="B36">36</xref>, <xref ref-type="bibr" rid="B37">37</xref>].</p>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<label>4</label>
<title>Discussion</title>
<p>The choice of the compressor strongly influences the metric structure of the NCD. The BWT followed by RLE with introducing a locking pattern (or sentinel) outside the alphabet preserves the reversibility during concatenations, and this reversible transformation tends to satisfy both symmetry and triangle inequality axioms of the distance C: C<sub>XY</sub>&#x02248;C<sub>YX</sub> and C<sub>XZ</sub> &#x02264; C<sub>XY</sub> &#x0002B; C<sub>YZ</sub>. This brings the NCD closer to a Hilbert-type distance and reinforces its Euclidean character. For DNA sequences where the alphabet is {A,C,G,T}, the letter Z can be used. For example, with X=ACGTTAAAA and Y=AATGCT, a naive concatenation can produce ambiguities during decompression. ZACGTTAAAAZAATGCT explicitly marks the boundary, ensuring consistent cross-compression and a more stable distance, improving the Euclidean quality of the distance matrices. The NCD, based on compression, offers a conceptual and practical framework for comparing heterogeneous objects. This universality in the application to any type of data and any type of domain brings adavantages such as: (i) no need for alignment, annotation, or expert features and (ii) consistent results on a small corpus. On a small corpus of biological sequences, NCD approach finds expected structures without requiring specific knowledge. Its limitations&#x02014;cost and dependence on the compressor&#x02014;call for further work, but its interdisciplinary potential is evident. The above results in genomics show that the information carried by nucleotide sequences alone allows genomes to be organized in a way that respects known knowledge of the evolution of species. This information concerns the way in which species have evolved while being subject to the operators of evolution. If they have co-evolved while belonging to the same ecosystem where they maintained relationships of the parasitism, saprophytism or simply commensalism type, it is not surprising to find them in the same cluster. In <xref ref-type="fig" rid="F11">Figure 11</xref> for example, the well-characterized giant viruses have well-identified clusters, linked for example to Pandoravirus and Mimivirus. The trace of the contamination of giant viruses by the virophages Zamilon and Sputnik and that of the contamination of the amoeba Acanthamoeba by a virus of the Mimivirus class (trace in its mitochondrial DNA) and by a virus of the Pandoravirus class (trace in its tyrosine-tRNA ligase) is justified by their evolution in the same marine ecosystem. Legionella bacteria are isolated by Maxwell<sup>&#x000AE;</sup> clustering classification, but having traces of ancient contamination by Acanthamoeba, it joins the Pandoravirus cluster, if we relax the clustering constraints. This clustering based only on nucleotide sequences shows that they could have a common origin, possibly due to the fact that first peptides are supposed to be formed with catalysis by primordial RNAs [<xref ref-type="bibr" rid="B19">19</xref>, <xref ref-type="bibr" rid="B38">38</xref>&#x02013;<xref ref-type="bibr" rid="B45">45</xref>], analogous to experimental synthesis of dipeptides on RNA template [<xref ref-type="bibr" rid="B46">46</xref>&#x02013;<xref ref-type="bibr" rid="B50">50</xref>]. These peptides left relics in current proteins of ancient organisms like Entamoebae (PAL Doublet proximity to AL in red, see <xref ref-type="supplementary-material" rid="SM1">Supplementary material S1</xref>), reinforcing the hypothesis of existence of weak bonds between RNA and amino acids, in connection with a progressive appearance of the current genetic code [<xref ref-type="bibr" rid="B46">46</xref>&#x02013;<xref ref-type="bibr" rid="B50">50</xref>].</p>
<fig position="float" id="F11">
<label>Figure 11</label>
<caption><p><bold>(A)</bold> Cluster of giant viruses from Pandoravirus family with their target Acanthamoeba (ribosomal genome); <bold>(B)</bold> Cluster Legionella (ribosomal genome); (<bold>(C)</bold> Cluster Mimivirus with its virophages Zamilon and Sputnik, and their targets Acanthamoeba (mitochondrial genome) and Woesearchaeota (whole genome); <bold>(D)</bold> Giant viruses classification based on the RNA-dependent RNA polymerase using FastTree algorithm [<xref ref-type="bibr" rid="B34">34</xref>]. AL proximity P<sub>AL</sub> Doublet is indicated (in red).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fams-11-1634300-g0011.tif">
<alt-text content-type="machine-generated">Diagram showing a hierarchical classification of pandoraviruses and related groups. Panel A includes &#x0201C;Class 1&#x0201D; with several viruses like Pandoravirus salinus. Panel B shows &#x0201C;Class 3 singleton&#x0201D; with Legionella pneumophila. Panel C highlights &#x0201C;Class 2,&#x0201D; featuring Mimivirus and Megavirus chilensis. Panel D represents a phylogenetic tree with groups: Pandoraviruses, Mimiviruses, Phycodnaviruses, and Mollivirus, color-coded for clarity.</alt-text>
</graphic>
</fig>
<p>Concerning the metrology, Maxwell<sup>&#x000AE;</sup> relies on high-performance computing standards, guaranteeing a good scalability:</p>
<p>- Performance automatically adapts to the software environment (memory, CPU cores number),</p>
<p>- NCD formula (2) demonstrates its algorithmic simplicity. The relative slowness in calculation is common to all lossless compressors and is the reasons for the weak lack of gains with the GPU,</p>
<p>- Clustering is handled by graph theory and performance is that of the graph management library, here Graphviz<sup>&#x000AE;</sup>, which is a benchmark,</p>
<p>- The use of a NoSQL database (MongoDB) for the data storing frees from memory management and input/output issues, delegating these to PostgreSQL.</p>
<p>Concerning the usage, Maxwell<sup>&#x000AE;</sup> can operate in incremental mode thanks to an epoch mechanism, similar to neural network learning systems, where each epoch alternates between a classification phase and a clustering phase for unclassified elements. This property means that it is not necessary to repeat the entire learning process when a new dataset is added. This ensures compliance with a reference base while allowing it to evolve. The result is a system capable of merging datasets.</p>
<p>One of the main advantages of Maxwell is the absence of semantic data, because Maxwell uses only information in octets allowing decision tests based on deliberately simple statistical values (means and standard deviations) due to their algorithmic simplicity. However, once the cluster calculations have been performed, we can evaluate the robustness of the relationships between groups of measured data and create a posteriori semantic classes using metadata to detect semantically ambiguous clusters that will be distributed across several semantic classes.</p>
<p>Another important advantage is the reversibility of the classification process, that is, the ability to return to all stages of this process, which may be necessary to find a detachable error in a medico-legal process of proving the origin of bad diagnostic or therapeutic advice.</p></sec>
<sec id="s5">
<label>5</label>
<title>Perspectives and conclusion</title>
<p>We have presented a new classification tool called Maxwell<sup>&#x000AE;</sup>, whose main characteristics are to be adiabatic (reversible), agnostic (without the need to introduce <italic>a priori</italic> knowledge) and almost autonomous (given the possibility of refining a posteriori clusters that are too large, therefore often heterogeneous, or too small, such as singletons). The application of the Maxwell<sup>&#x000AE;</sup> classifier to the genome shows that it is particularly well-suited to detect transformations of primary nucleotide information due to evolutionary operators, therefore to classify in the same cluster species that have evolved in the same eco-system. Only a thousand synthetic genomes and forty species were studied in this article, but the results demonstrate the relevance of the classifier to bring together neighboring evolved genomes.</p>
<p>In this direction, three research topics can be considered, in order of increasing complexity: (i) generalizing to all species the calculation of the age of their genome and their attachment to existing clusters in the three kingdoms of life: Archaea, Bacteria, and Eukaryotes; (ii) processing data related to the interaction between genes in genetic control networks. Classifying networks requires the introduction of new distances between graphs, but the problem can be solved using Maxwell&#x00027;s algorithm; (iii) processing epigenetic control networks, which encompass genetic networks and their exogenous control parameters related to infectious agents, environmental factors, and biological clocks. These three domains of research are challenging but represent essential opportunities in biomedical research. In view of the current work, two areas of ongoing work can be identified:</p>
<p>1) restitution of the results of the data processing</p>
<p>Linear phylogenetic trees are limited by their height. We have therefore started to replace them with more compact circular cladograms (<xref ref-type="fig" rid="F12">Figure 12</xref>).</p>
<fig position="float" id="F12">
<label>Figure 12</label>
<caption><p>Mammals mitochondrial genome Maxwell<sup>&#x000AE;</sup> classification and AL-codon-counter annotation; <bold>(A)</bold> Circular cladogram with peripheral bars proportional to mitochondrial genome sizes; <bold>(B)</bold> Linear phylogenetic tree with AL-trimeric distance annotation; <bold>(C)</bold> Heatmap; <bold>(D)</bold> 3D diagram of NCD distances between mammal species.).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fams-11-1634300-g0012.tif">
<alt-text content-type="machine-generated">Four-panel image showing different visualizations: A) Circular phylogenetic tree with various species names and colored lines. B) Dendrogram with species and genome sizes labeled, featuring branching lines. C) Heatmap displaying a matrix of species names with a color gradient from blue to red. D) 3D scatter plot with labeled data points representing species across three axes.</alt-text>
</graphic>
</fig>
<p>Work remains to be done to refine the graph at its root level. The same applies to the output in the form of a linear dendogram, whose bifurcation points have to be emphasized (as on <xref ref-type="fig" rid="F13">Figure 13</xref>) and annotations more linked to the classification by showing their possible inconsistency with the clustering due to Maxwell<sup>&#x000AE;</sup> (<xref ref-type="fig" rid="F12">Figure 12B</xref>). The restitution in the form of a heatmap (<xref ref-type="fig" rid="F12">Figure 12C</xref>) or a 3D representation of the distances between species (<xref ref-type="fig" rid="F12">Figure 12D</xref>) must provide additional information on the proximities between species, to be incorporated into the display of the results, to be able to use them in real time, if there is a modification of the input data.</p>
<fig position="float" id="F13">
<label>Figure 13</label>
<caption><p>Maxwell&#x00027;s phylogenetic tree of mammal species based on their mitochondrial genome. The bifurcation points between species are indicated in green. AL proximity P<sub>AL</sub> Doublet is indicated (in red).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fams-11-1634300-g0013.tif">
<alt-text content-type="machine-generated">Radial phylogenetic tree diagram showing species files labeled in black, arranged around a circle. Distances are indicated in red. Lines connect each species to a central point, with names like &#x0201C;human.txt&#x0201D; and &#x0201C;gorilla.txt&#x0201D; among others.</alt-text>
</graphic>
</fig>
<p>2) generalization of the method to large data sets.</p>
<p>The NCBI Nucleotide repository contains the genomes of more than 160,000 species, each of which containing approximately 600 RNAs of evolutionary interest (tRNA, rRNA, miRNA, circRNA, mRNA and the corresponding proteins [<xref ref-type="bibr" rid="B51">51</xref>, <xref ref-type="bibr" rid="B52">52</xref>]), primarily those involved in transcription, translation, and cellular energy, which are essential for cellular life. The next step will be the progressive generalization of the classifications presented here to the 160,000 species currently present in the NCBI server [<xref ref-type="bibr" rid="B1">1</xref>]. Expanding the classification work proposed here and in [<xref ref-type="bibr" rid="B32">32</xref>] to very large data sets will be a natural program for the future of our current approach.</p>
<p>3) Comparison with other clustering methods</p>
<p>In [<xref ref-type="bibr" rid="B53">53</xref>], we have compared 25 classification techniques of sepsis diagnosis, from classical k-means and multiple regression tools to the deep learning methods after what we have decided to use a new classifier having the same performance than the best one in sepsis diagnosis (surprisingly the multiple regression), but having the advantage to be reversible (each step is explainable), which is a necessary condition of acceptance in a medico-legal context. In <xref ref-type="fig" rid="F14">Figure 14</xref>, a comparison between the Maxwell&#x00027;s approach and a classical classifier [<xref ref-type="bibr" rid="B54">54</xref>] has been done on genomic data coming from the whole genome of a sample of Archaea in case of Maxwell<sup>&#x000AE;</sup> and from a chosen set of genes for the alternative method. In this last example, the obtained phylogenetic tree (<xref ref-type="fig" rid="F14">Figure 14A</xref>) was inferred by using a neighbor-joining analysis of 637 homologous sequences chosen in genome of Archaea.</p>
<fig position="float" id="F14">
<label>Figure 14</label>
<caption><p>Phylogenetic tree of crenoarchaeota (in green) and archaeoglobi (in red). <bold>(A)</bold> Classification method based on a neighbor-joining analysis of 637 homologous sequences chosen in the genome of each archaeum [<xref ref-type="bibr" rid="B54">54</xref>]; <bold>(B)</bold> Maxwell&#x00027;s classification of chosen crenarchaeota and archaeoglobi.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fams-11-1634300-g0014.tif">
<alt-text content-type="machine-generated">Phylogenetic tree diagrams labeled A and B. Diagram A shows branching relationships among species like Ignicoccus hospitalis, Archaeoglobus veneficus, and Ferroglobus placidus. Diagram B illustrates connections between species such as Acidilobus saccharovorans, Sulfolobus solfataricus, and Metallosphaera cuprina. Red and green circles highlight different groupings.</alt-text>
</graphic>
</fig>
<p>The scale bar on <xref ref-type="fig" rid="F14">Figure 14A</xref> represents 10 mutations per 100 nt of the positions of these homologous sequences. It is indicated as a fraction the percentage of 100 bootstrap resamplings supporting the topology of the neighbor joining skeleton. On <xref ref-type="fig" rid="F14">Figure 14B</xref>, the Maxwell&#x00027;s tree has been obtained by using the whole genome of soe Aechaea chosen in two families, the crenarchaota (in yellow in <xref ref-type="fig" rid="F14">Figure 14A</xref>) and the archaeaglobi (in red in <xref ref-type="fig" rid="F14">Figure 14A</xref>).</p>
<p>In conclusion, the Maxwell<sup>&#x000AE;</sup> classifier presents a unique set of characteristics, making it particularly suitable for biomedical applications:</p>
<list list-type="simple">
<list-item><p>1) Its adiabatic or reversible nature makes it possible to isolate the steps involved in a potential diagnostic or therapeutic advice error. This ability to demonstrate a detachable error, excluding, for example, the physician&#x00027;s liability, makes it a means of meeting the medico-legal requirements of medical practice.</p></list-item>
<list-item><p>2) Its agnostic nature, i.e., its independence of <italic>a priori</italic> semantics or semiology, makes it suitable for processing data such as nucleotide or amino acid strings coming from a sequencer for example, without any information other than the obtained sequences. In <xref ref-type="fig" rid="F15">Figure 15</xref>, using only the whole sequence of the archaeal or viral genomes, it is possible to obtain clusters coherent with the classical taxonomy.</p></list-item>
<list-item><p>3) Its autonomy, stemming from its unsupervised nature, makes it capable of automatically providing a classification, which will be interpreted using metadata and may subsequently lead to adjustments to the cluster aggregation thresholds in a second, partially non-autonomous phase. The sensitivity to the threshold distance parameter for cluster assignment is handled interactively by Maxwell<sup>&#x000AE;</sup>, allowing singletons to be assigned to the nearest cluster or, conversely, to split up clusters that are too large and heterogeneous.</p></list-item>
</list>
<fig position="float" id="F15">
<label>Figure 15</label>
<caption><p>Maxwell&#x00027;s classification of giant viruses and their putative targets chosen in amoebae, crenarchaeota and archaeoglobi families. <bold>(A)</bold> thumbnail image showing the start of the phylogenetic tree with the bifurcations of the branches (in blue); <bold>(B)</bold> classification of giant viruses (blue circle), their targets (violet circle), crenarchaeota (in green) and euryarchaeota (in red); <bold>(C)</bold> whole cladogram of Maxwell&#x00027;s classification.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fams-11-1634300-g0015.tif">
<alt-text content-type="machine-generated">Diagram showing a branching network labeled A and B, originating from a point labeled AL. Lines extend to various labels encircled in green, blue, purple, and red, such as Methanopyrus and Pyrobaculum. Below, a circular distribution of lines radiates from a central point.</alt-text>
</graphic>
</fig>
<p>In summary, Maxwell<sup>&#x000AE;</sup> is a new adiabatic, agnostic and almost autonomous classifier, whose efficiency will be systematically compared in a future work on very large genetic databases to the existing classifiers listed in [<xref ref-type="bibr" rid="B53">53</xref>, <xref ref-type="bibr" rid="B55">55</xref>]. The final choice of a classifier, in the case of a biomedical application, is the subject of a compromise between the precision and speed of its clustering and its capacity to provide clear answers in the identification of the faulty stages of its reasoning, in the context of forensic investigations, similar to those carried out on the non-digital medical human chain of diagnosis and care, to identify and qualify the responsibilities of a possible error. Then, its major application concerns the medical diagnosis in which a prior classification of a training set of patients allows, in the generalization phase to a larger patient population, to test the accuracy of the assignment of a new patient to the class that corresponds to him. In the case of the genome, the major interest of Maxwell<sup>&#x000AE;</sup> lies in its capacity to find clusters using only the sequence of the entire genome or that of cellular structures such as mitochondria (for mammals) and chloroplasts (for plants). A version of Maxwell<sup>&#x000AE;</sup> can be found online in [<xref ref-type="bibr" rid="B55">55</xref>].</p></sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="supplementary-material" rid="SM1">Supplementary material</xref>, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>JG: Conceptualization, Investigation, Methodology, Software, Supervision, Validation, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. JT: Investigation, Software, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing, Data curation, Formal analysis. CM: Data curation, Investigation, Software, Writing &#x02013; review &#x00026; editing, Conceptualization, Methodology, Validation. MJ: Conceptualization, Investigation, Methodology, Software, Writing &#x02013; review &#x00026; editing. HK: Conceptualization, Investigation, Software, Data curation, Writing &#x02013; review &#x00026; editing. JD: Conceptualization, Data curation, Investigation, Software, Writing &#x02013; review &#x00026; editing, Formal analysis, Funding acquisition, Methodology, Project administration, Resources, Supervision, Validation, Visualization, Writing &#x02013; original draft.</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="correction note" id="s12">
<title>Correction note</title>
<p>This article has been corrected with minor changes. These changes do not impact the scientific content of the article.</p></sec>
<sec sec-type="ai-statement" id="s9">
<title>Generative AI statement</title>
<p>The author(s) declare that no Gen AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec><sec sec-type="supplementary-material" id="s11">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fams.2025.1634300/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fams.2025.1634300/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1.</label>
<mixed-citation publication-type="web"><person-group person-group-type="author"><collab>NCBI</collab></person-group> (<year>2025</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/refseq/">https://www.ncbi.nlm.nih.gov/refseq/</ext-link> (Accessed May 23, 2025).</mixed-citation>
</ref>
<ref id="B2">
<label>2.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Demongeot</surname> <given-names>J</given-names></name> <name><surname>Magal</surname> <given-names>P</given-names></name></person-group>. <article-title>Data-driven mathematical modeling approaches for COVID-19: a survey</article-title>. <source>Phys Life Rev</source>. (<year>2024</year>) <volume>50</volume>:<fpage>166</fpage>&#x02013;<lpage>208</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.plrev.2024.08.004</pub-id><pub-id pub-id-type="pmid">39142261</pub-id></mixed-citation>
</ref>
<ref id="B3">
<label>3.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yaagoub</surname> <given-names>Z</given-names></name> <name><surname>Sadki</surname> <given-names>M</given-names></name> <name><surname>Allali</surname> <given-names>K</given-names></name></person-group>. <article-title>Global stability of spatio-temporal with quarantine and vaccination</article-title>. <source>J Indonesian Math Soc.</source> (<year>2024</year>) <volume>30</volume>:<fpage>321</fpage>&#x02013;<lpage>37</lpage>. doi: <pub-id pub-id-type="doi">10.22342/jims.30.2.1452.321-337</pub-id></mixed-citation>
</ref>
<ref id="B4">
<label>4.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yaagoub</surname> <given-names>Z</given-names></name> <name><surname>Farah</surname> <given-names>EM</given-names></name> <name><surname>Ahmad</surname> <given-names>S</given-names></name></person-group>. <article-title>Three-strain epidemic model for influenza virus involving fractional derivative and treatment</article-title>. <source>J Appl Math Comput.</source> (<year>2025</year>) <volume>71</volume>:<fpage>1247</fpage>&#x02013;<lpage>66</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s12190-024-02284-0</pub-id></mixed-citation>
</ref>
<ref id="B5">
<label>5.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sadki</surname> <given-names>M</given-names></name> <name><surname>Yaagoub</surname> <given-names>Z</given-names></name> <name><surname>Allali</surname> <given-names>K</given-names></name></person-group>. <article-title>Qualitative analysis of a fractional-order for a within-host infection dynamics with adaptive immunity using caputo derivative</article-title>. <source>Iranian J Sci.</source> (<year>2025</year>) <volume>49</volume>:<fpage>847</fpage>&#x02013;<lpage>69</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s40995-024-01768-9</pub-id></mixed-citation>
</ref>
<ref id="B6">
<label>6.</label>
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Brillouin</surname> <given-names>L</given-names></name></person-group>. <source>Science and Information Theory</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>Academic Press</publisher-name>. (<year>1956</year>).</mixed-citation>
</ref>
<ref id="B7">
<label>7.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Landauer</surname> <given-names>R</given-names></name></person-group>. <article-title>Irreversibility and heat generation in the computing process</article-title>. <source>IBM J Res Dev</source>. (<year>1961</year>) <volume>5</volume>:<fpage>183</fpage>&#x02013;<lpage>91</lpage>. doi: <pub-id pub-id-type="doi">10.1147/rd.53.0183</pub-id></mixed-citation>
</ref>
<ref id="B8">
<label>8.</label>
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Bennett</surname> <given-names>CH</given-names></name></person-group>. <article-title>Logical depth and physical complexity. In : Herken R, editor</article-title>. <source>A Half-Century Survey on the Universal Turing Machine</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>Oxford University Press</publisher-name> (<year>1988</year>). pp. <fpage>227</fpage>&#x02013;<lpage>57</lpage>. doi: <pub-id pub-id-type="doi">10.1093/oso/9780198537748.003.0008</pub-id></mixed-citation>
</ref>
<ref id="B9">
<label>9.</label>
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>M</given-names></name> <name><surname>Vit&#x000E1;nyi</surname> <given-names>PMB</given-names></name></person-group>. <source>An Introduction to Kolmogorov Complexity and its Applications</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>1993</year>). doi: <pub-id pub-id-type="doi">10.1007/978-1-4757-3860-5</pub-id></mixed-citation>
</ref>
<ref id="B10">
<label>10.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cilibrasi</surname> <given-names>R</given-names></name> <name><surname>Vit&#x000E1;nyi</surname> <given-names>P</given-names></name></person-group>. <article-title>Clustering by compression</article-title>. <source>IEEE Trans Inf Theory.</source> (<year>2005</year>) <volume>51</volume>:<fpage>1523</fpage>&#x02013;<lpage>45</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TIT.2005.844059</pub-id></mixed-citation>
</ref>
<ref id="B11">
<label>11.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Burrows</surname> <given-names>M</given-names></name> <name><surname>Wheeler</surname> <given-names>DJ</given-names></name></person-group>. <article-title>A block-sorting lossless data compression algorithm</article-title>. <source>Digit SRC Res Rep.</source> (<year>1994</year>) <volume>124</volume>:<fpage>10009821328</fpage>.</mixed-citation>
</ref>
<ref id="B12">
<label>12.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gardes</surname> <given-names>J</given-names></name> <name><surname>Maldivi</surname> <given-names>C</given-names></name> <name><surname>Boisset</surname> <given-names>D</given-names></name> <name><surname>Aubourg</surname> <given-names>T</given-names></name> <name><surname>Vuillerme</surname> <given-names>N</given-names></name> <name><surname>Demongeot</surname> <given-names>J</given-names></name></person-group>. <article-title>Maxwell<sup>&#x000AE;</sup>, an unsupervised learning approach for 5P medicine</article-title>. <source>Stud Health Technol Inform.</source> (<year>2019</year>) <volume>264</volume>:<fpage>1464</fpage>&#x02013;<lpage>5</lpage>. doi: <pub-id pub-id-type="doi">10.3233/SHTI190486</pub-id><pub-id pub-id-type="pmid">31438183</pub-id></mixed-citation>
</ref>
<ref id="B13">
<label>13.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Demongeot</surname> <given-names>J</given-names></name> <name><surname>Gardes</surname> <given-names>J</given-names></name> <name><surname>Maldivi</surname> <given-names>C</given-names></name> <name><surname>Boisset</surname> <given-names>D</given-names></name> <name><surname>Boufama</surname> <given-names>K</given-names></name> <name><surname>Touzouti</surname> <given-names>I</given-names></name></person-group>. <article-title>Genomic phylogeny using the Maxwell<sup>&#x000AE;</sup> classifier based on Burrows&#x02013;Wheeler Transform</article-title>. <source>Computation.</source> (<year>2023</year>) <volume>11</volume>:<fpage>158</fpage>. doi: <pub-id pub-id-type="doi">10.3390/computation11080158</pub-id></mixed-citation>
</ref>
<ref id="B14">
<label>14.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Drachmann</surname> <given-names>AG</given-names></name></person-group>. <article-title>Heron and ptolemaios</article-title>. <source>Centaurus.</source> (<year>1950</year>) <volume>1</volume>:<fpage>117</fpage>&#x02013;<lpage>31</lpage>. doi: <pub-id pub-id-type="doi">10.1111/j.1600-0498.1950.tb00576.x</pub-id></mixed-citation>
</ref>
<ref id="B15">
<label>15.</label>
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>De Vries</surname> <given-names>H</given-names></name></person-group>. <source>Die Mutationstheorie</source>. <publisher-loc>Leipzig, Germany</publisher-loc>: <publisher-name>Veit &#x00026; Co</publisher-name> (<year>1901</year>).</mixed-citation>
</ref>
<ref id="B16">
<label>16.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Watson</surname> <given-names>JD</given-names></name> <name><surname>Crick</surname> <given-names>FHC</given-names></name></person-group>. <article-title>The structure of DNA</article-title>. <source>Cold Spring Harbor Symp Quant Biol.</source> (<year>1953</year>) <volume>18</volume>:<fpage>123</fpage>&#x02013;<lpage>13</lpage>. doi: <pub-id pub-id-type="doi">10.1101/SQB.1953.018.01.020</pub-id></mixed-citation>
</ref>
<ref id="B17">
<label>17.</label>
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Monod</surname> <given-names>J</given-names></name></person-group>. <source>Chance and Necessity: An Essay on the Natural Philosophy of Modern Biology</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>Knopf</publisher-name> (<year>1971</year>).</mixed-citation>
</ref>
<ref id="B18">
<label>18.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Paecht-Horowitz</surname> <given-names>M</given-names></name> <name><surname>Berger</surname> <given-names>J</given-names></name> <name><surname>Katchalsky</surname> <given-names>A</given-names></name></person-group>. <article-title>Prebiotic synthesis of polypeptides by heterogeneous polycondensation of amino-acid adenylates</article-title>. <source>Nature.</source> (<year>1970</year>) <volume>228</volume>:<fpage>636</fpage>&#x02013;<lpage>9</lpage>. doi: <pub-id pub-id-type="doi">10.1038/228636a0</pub-id><pub-id pub-id-type="pmid">5474935</pub-id></mixed-citation>
</ref>
<ref id="B19">
<label>19.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Eigen</surname> <given-names>M</given-names></name></person-group>. <article-title>Selforganization of matter and the evolution of biological macromolecules</article-title>. <source>Naturwiss.</source> (<year>1971</year>) <volume>58</volume>:<fpage>465</fpage>&#x02013;<lpage>523</lpage>. doi: <pub-id pub-id-type="doi">10.1007/BF00623322</pub-id><pub-id pub-id-type="pmid">4942363</pub-id></mixed-citation>
</ref>
<ref id="B20">
<label>20.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Katchalsky</surname> <given-names>A</given-names></name></person-group>. <article-title>Prebiotic synthesis of biopolymers on inorganic templates</article-title>. <source>Naturwiss.</source> (<year>1973</year>) <volume>60</volume>:<fpage>215</fpage>&#x02013;<lpage>20</lpage>. doi: <pub-id pub-id-type="doi">10.1007/BF00625709</pub-id></mixed-citation>
</ref>
<ref id="B21">
<label>21.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tamura</surname> <given-names>K</given-names></name> <name><surname>Schimmel</surname> <given-names>P</given-names></name></person-group>. <article-title>Oligonucleotide-directed peptide synthesis in a ribosome- and ribozyme-free system</article-title>. <source>Proc Natl Acad Sci USA.</source> (<year>2001</year>) <volume>98</volume>:<fpage>1393</fpage>&#x02013;<lpage>7</lpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.98.4.1393</pub-id><pub-id pub-id-type="pmid">11171961</pub-id></mixed-citation>
</ref>
<ref id="B22">
<label>22.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xiao</surname> <given-names>H</given-names></name> <name><surname>Murakami</surname> <given-names>H</given-names></name> <name><surname>Suga</surname> <given-names>H</given-names></name></person-group>. <article-title>Ferr&#x000E9;-D&#x00027;Amar&#x000E9; AR. Structural basis of specific tRNA aminoacylation by a small <italic>in vitro</italic> selected ribozyme</article-title>. <source>Nature</source>. (<year>2008</year>) <volume>454</volume>:<fpage>358</fpage>&#x02013;<lpage>61</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nature07033</pub-id></mixed-citation>
</ref>
<ref id="B23">
<label>23.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Deng</surname> <given-names>J</given-names></name> <name><surname>Wilson</surname> <given-names>TJ</given-names></name> <name><surname>Wang</surname> <given-names>J</given-names></name> <name><surname>Peng</surname> <given-names>X</given-names></name> <name><surname>Li</surname> <given-names>M</given-names></name> <name><surname>Lin</surname> <given-names>X</given-names></name> <etal/></person-group>. <article-title>Structure and mechanism of a methyltransferase ribozyme</article-title>. <source>Nat Chem Biol.</source> (<year>2022</year>) <volume>18</volume>:<fpage>556</fpage>&#x02013;<lpage>64</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41589-022-00982-z</pub-id><pub-id pub-id-type="pmid">35301479</pub-id></mixed-citation>
</ref>
<ref id="B24">
<label>24.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shapiro</surname> <given-names>JA</given-names></name></person-group>. <article-title>Why the third way of evolution is necessary</article-title>. <source>Theor Biol Forum.</source> (<year>2021</year>) <volume>114</volume>:<fpage>13</fpage>&#x02013;<lpage>26</lpage>. doi: <pub-id pub-id-type="doi">10.19272/202111402002</pub-id><pub-id pub-id-type="pmid">36382546</pub-id></mixed-citation>
</ref>
<ref id="B25">
<label>25.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Miller</surname> <given-names>SL</given-names></name></person-group>. <article-title>A production of amino acids under possible primitive Earth conditions</article-title>. <source>Science.</source> (<year>1953</year>) <volume>117</volume>:<fpage>528</fpage>&#x02013;<lpage>9</lpage>. doi: <pub-id pub-id-type="doi">10.1126/science.117.3046.528</pub-id><pub-id pub-id-type="pmid">13056598</pub-id></mixed-citation>
</ref>
<ref id="B26">
<label>26.</label>
<mixed-citation publication-type="web"><person-group person-group-type="author"><name><surname>Demongeot</surname> <given-names>J</given-names></name></person-group>. <source>Au Sujet de Quelques Mod&#x000E8;les Stochastiques Appliqu&#x000E9;s &#x000E0; la Biologie</source>. PhD Thesis, Universit&#x000E9; Joseph Fourier, Grenoble, France (<year>1975</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="https://tel.archives-ouvertes.fr/tel-00286222">https://tel.archives-ouvertes.fr/tel-00286222</ext-link> (Accessed on January 5, 2025).</mixed-citation>
</ref>
<ref id="B27">
<label>27.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Demongeot</surname> <given-names>J</given-names></name></person-group>. <article-title>Sur la possibilit&#x000E9; de consid&#x000E9;rer le code g&#x000E9;n&#x000E9;tique comme un code &#x000E0; encha&#x000EE;nement</article-title>. <source>Rev Biomaths.</source> (<year>1978</year>) <volume>62</volume>:<fpage>61</fpage>&#x02013;<lpage>6</lpage>.</mixed-citation>
</ref>
<ref id="B28">
<label>28.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Demongeot</surname> <given-names>J</given-names></name> <name><surname>Besson</surname> <given-names>J</given-names></name></person-group>. <article-title>Code g&#x000E9;n&#x000E9;tique et codes &#x000E0; encha&#x000EE;nement I</article-title>. <source>CR Acad Sc III.</source> (<year>1983</year>) <volume>296</volume>:<fpage>807</fpage>&#x02013;<lpage>10</lpage>.</mixed-citation>
</ref>
<ref id="B29">
<label>29.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Demongeot</surname> <given-names>J</given-names></name> <name><surname>Besson</surname> <given-names>J</given-names></name></person-group>. <article-title>Genetic code and cyclic codes II</article-title>. <source>CR Acad Sc III.</source> (<year>1996</year>) <volume>319</volume>:<fpage>520</fpage>&#x02013;<lpage>8</lpage>.</mixed-citation>
</ref>
<ref id="B30">
<label>30.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Weil</surname> <given-names>G</given-names></name> <name><surname>Heus</surname> <given-names>K</given-names></name> <name><surname>Faraut</surname> <given-names>T</given-names></name> <name><surname>Demongeot</surname> <given-names>J</given-names></name></person-group>. <article-title>An archetypal basic code for the primitive genome</article-title>. <source>Theoret Comp Sc.</source> (<year>2004</year>) <volume>322</volume>:<fpage>313</fpage>&#x02013;<lpage>34</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.tcs.2004.03.015</pub-id></mixed-citation>
</ref>
<ref id="B31">
<label>31.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Demongeot</surname> <given-names>J</given-names></name> <name><surname>Moreira</surname> <given-names>A</given-names></name></person-group>. <article-title>A circular RNA at the origin of life</article-title>. <source>J Theor Biol.</source> (<year>2007</year>) <volume>249</volume>:<fpage>314</fpage>&#x02013;<lpage>24</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jtbi.2007.07.010</pub-id></mixed-citation>
</ref>
<ref id="B32">
<label>32.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Demongeot</surname> <given-names>J</given-names></name></person-group>. <article-title>Traces of a primitive RNA ring in current genomes</article-title>. <source>Biology.</source> (<year>2025</year>) <volume>14</volume>:<fpage>538</fpage>. doi: <pub-id pub-id-type="doi">10.3390/biology14050538</pub-id><pub-id pub-id-type="pmid">40427726</pub-id></mixed-citation>
</ref>
<ref id="B33">
<label>33.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Edous</surname> <given-names>M</given-names></name> <name><surname>Eidous</surname> <given-names>O</given-names></name></person-group>. <article-title>A simple approximation for normal distribution function</article-title>. <source>Math Stat.</source> (<year>2018</year>) <volume>6</volume>:<fpage>47</fpage>&#x02013;<lpage>9</lpage>. doi: <pub-id pub-id-type="doi">10.13189/ms.2018.060401</pub-id></mixed-citation>
</ref>
<ref id="B34">
<label>34.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Aherfi</surname> <given-names>S</given-names></name> <name><surname>Colson</surname> <given-names>P</given-names></name> <name><surname>LaScola</surname> <given-names>B</given-names></name> <name><surname>Raoult</surname> <given-names>D</given-names></name></person-group>. <article-title>Giant viruses of amoebas: an update</article-title>. <source>Front Microbiol.</source> (<year>2016</year>) <volume>7</volume>:<fpage>349</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fmicb.2016.00349</pub-id><pub-id pub-id-type="pmid">27047465</pub-id></mixed-citation>
</ref>
<ref id="B35">
<label>35.</label>
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Saneda</surname> <given-names>TM</given-names></name> <name><surname>Field</surname> <given-names>M</given-names></name></person-group>. <source>Biological Anthropology: A Brief Introduction</source>. <publisher-loc>Open WA, Bothell WA</publisher-loc>: <publisher-name>Cascadia College Pressbooks</publisher-name> (<year>2022</year>).</mixed-citation>
</ref>
<ref id="B36">
<label>36.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chimento</surname> <given-names>NR</given-names></name> <name><surname>Agnolin</surname> <given-names>FL</given-names></name></person-group>. <article-title>Phylogenetic tree of Litopterna and Perissodactyla indicates a complex early history of hoofed mammals</article-title>. <source>Sci Rep.</source> (<year>2020</year>) <volume>10</volume>:<fpage>13280</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-020-70287-5</pub-id><pub-id pub-id-type="pmid">32764723</pub-id></mixed-citation>
</ref>
<ref id="B37">
<label>37.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Carranza</surname> <given-names>J</given-names></name> <name><surname>P&#x000E9;rez-Barber&#x000ED;a</surname> <given-names>FJ</given-names></name></person-group>. <article-title>Sexual selection and senescence: male size-dimorphic ungulates evolved relatively smaller molars than females</article-title>. <source>Am Nat.</source> (<year>2007</year>) <volume>170</volume>:<fpage>370</fpage>&#x02013;<lpage>80</lpage>. doi: <pub-id pub-id-type="doi">10.1086/519852</pub-id><pub-id pub-id-type="pmid">17879188</pub-id></mixed-citation>
</ref>
<ref id="B38">
<label>38.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zaia</surname> <given-names>DA</given-names></name> <name><surname>Zaia</surname> <given-names>CT</given-names></name> <name><surname>De Santana</surname> <given-names>H</given-names></name></person-group>. <article-title>Which amino acids should be used in prebiotic chemistry studies?</article-title> <source>Orig Life Evol Biosph.</source> (<year>2008</year>) <volume>38</volume>:<fpage>469</fpage>&#x02013;<lpage>88</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11084-008-9150-5</pub-id><pub-id pub-id-type="pmid">18925425</pub-id></mixed-citation>
</ref>
<ref id="B39">
<label>39.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Robinson</surname> <given-names>R</given-names></name></person-group>. <article-title>Jump-starting a cellular world: investigating the origin of life, from soup to networks</article-title>. <source>PLoS Biol.</source> (<year>2005</year>) <volume>3</volume>:<fpage>e396</fpage>. doi: <pub-id pub-id-type="doi">10.1371/journal.pbio.0030396</pub-id><pub-id pub-id-type="pmid">16277560</pub-id></mixed-citation>
</ref>
<ref id="B40">
<label>40.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Seligmann</surname> <given-names>H</given-names></name> <name><surname>Raoult</surname> <given-names>D</given-names></name></person-group>. <article-title>Unifying view of stem-loop hairpin RNA as origin of current and ancient parasitic and non-parasitic RNAs, including in giant viruses</article-title>. <source>Curr Opin Microbiol.</source> (<year>2016</year>) <volume>31</volume>:<fpage>1</fpage>&#x02013;<lpage>8</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.mib.2015.11.004</pub-id><pub-id pub-id-type="pmid">26716728</pub-id></mixed-citation>
</ref>
<ref id="B41">
<label>41.</label>
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Muller</surname> <given-names>HJ</given-names></name></person-group>. <article-title>The gene as the basis of life</article-title>. In: <person-group person-group-type="editor"><name><surname>Duggar</surname> <given-names>BM</given-names></name></person-group>, editor. <source>Proceedings of the International Congress of Plant Sciences</source>. <publisher-loc>Ithaca, NY 1926, Menasha</publisher-loc>: <publisher-name>Banta WI,</publisher-name> (<year>1929</year>). pp. <fpage>897</fpage>&#x02013;<lpage>921</lpage>.</mixed-citation>
</ref>
<ref id="B42">
<label>42.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Maturana</surname> <given-names>HR</given-names></name> <name><surname>Varela</surname> <given-names>FJ</given-names></name></person-group>. <source>Autopoiesis and Cognition: The Realization of the Living</source>. Boston MA: Reidel (<year>1980</year>). doi: <pub-id pub-id-type="doi">10.1007/978-94-009-8947-4</pub-id></mixed-citation>
</ref>
<ref id="B43">
<label>43.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bourgine</surname> <given-names>P</given-names></name> <name><surname>Stewart</surname> <given-names>J</given-names></name></person-group>. <article-title>Autopoiesis and cognition</article-title>. <source>Artif Life.</source> (<year>2004</year>) <volume>10</volume>:<fpage>327</fpage>&#x02013;<lpage>45</lpage>. doi: <pub-id pub-id-type="doi">10.1162/1064546041255557</pub-id></mixed-citation>
</ref>
<ref id="B44">
<label>44.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ono</surname> <given-names>N</given-names></name> <name><surname>Ikegami</surname> <given-names>T</given-names></name></person-group>. <article-title>Self-maintenance and self-reproduction in an abstract cell model</article-title>. <source>J Theor Biol.</source> (<year>2000</year>) <volume>206</volume>:<fpage>243</fpage>&#x02013;<lpage>53</lpage>. doi: <pub-id pub-id-type="doi">10.1006/jtbi.2000.2121</pub-id><pub-id pub-id-type="pmid">10966762</pub-id></mixed-citation>
</ref>
<ref id="B45">
<label>45.</label>
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Ono</surname> <given-names>N</given-names></name> <name><surname>Ikegami</surname> <given-names>T</given-names></name></person-group>. <article-title>Artificial chemistry: computational studies on the emergence of self-reproducing units</article-title>. In:<person-group person-group-type="editor"><name><surname>Kelemen</surname> <given-names>J</given-names></name> <name><surname>Sosik</surname> <given-names>S</given-names></name></person-group>, editors. <source>Proceedings of the 6th European conference on Artificial Life (ECAL&#x00027;01)</source>. <publisher-loc>Prague, Czech Republic, September 2001. Berlin, Germany</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>2001</year>). pp. <fpage>186</fpage>&#x02013;<lpage>95</lpage>. doi: <pub-id pub-id-type="doi">10.1007/3-540-44811-X_20</pub-id></mixed-citation>
</ref>
<ref id="B46">
<label>46.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tamura</surname> <given-names>K</given-names></name> <name><surname>Schimmel</surname> <given-names>P</given-names></name></person-group>. <article-title>Chiral-selective aminoacylation of an RNA minihelix</article-title>. <source>Science.</source> (<year>2004</year>) <volume>305</volume>:<fpage>1253</fpage>. doi: <pub-id pub-id-type="doi">10.1126/science.1099141</pub-id><pub-id pub-id-type="pmid">15333830</pub-id></mixed-citation>
</ref>
<ref id="B47">
<label>47.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tamura</surname> <given-names>K</given-names></name> <name><surname>Schimmel</surname> <given-names>P</given-names></name></person-group>. <article-title>Chiral-selective aminoacylation of an RNA minihelix: Mechanistic features and chiral suppression</article-title>. <source>Proc Natl Acad Sci USA.</source> (<year>2006</year>) <volume>103</volume>:<fpage>13750</fpage>&#x02013;<lpage>2</lpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.0606070103</pub-id><pub-id pub-id-type="pmid">16950872</pub-id></mixed-citation>
</ref>
<ref id="B48">
<label>48.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Beringer</surname> <given-names>M</given-names></name> <name><surname>Rodnina</surname> <given-names>MV</given-names></name></person-group>. <article-title>Importance of tRNA interactions with 23S rRNA for peptide bond formation on the ribosome: studies with substrate analogs</article-title>. <source>Biol Chem.</source> (<year>2007</year>) <volume>388</volume>:<fpage>687</fpage>&#x02013;<lpage>91</lpage>. doi: <pub-id pub-id-type="doi">10.1515/BC.2007.077</pub-id><pub-id pub-id-type="pmid">17570820</pub-id></mixed-citation>
</ref>
<ref id="B49">
<label>49.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Koonin</surname> <given-names>EV</given-names></name> <name><surname>Novozhilov</surname> <given-names>AS</given-names></name></person-group>. <article-title>Origin and evolution of the genetic code: the universal enigma</article-title>. <source>Life.</source> (<year>2009</year>) <volume>61</volume>:<fpage>99</fpage>&#x02013;<lpage>111</lpage>. doi: <pub-id pub-id-type="doi">10.1002/iub.146</pub-id><pub-id pub-id-type="pmid">19117371</pub-id></mixed-citation>
</ref>
<ref id="B50">
<label>50.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rodin</surname> <given-names>AS</given-names></name> <name><surname>Szathm&#x000E1;ry</surname> <given-names>E</given-names></name> <name><surname>Rodin</surname> <given-names>SN</given-names></name></person-group>. <article-title>On origin of genetic code and tRNA before translation</article-title>. <source>Biol Direct.</source> (<year>2011</year>) <volume>6</volume>:<fpage>14</fpage>. doi: <pub-id pub-id-type="doi">10.1186/1745-6150-6-14</pub-id><pub-id pub-id-type="pmid">21342520</pub-id></mixed-citation>
</ref>
<ref id="B51">
<label>51.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Seligmann</surname> <given-names>H</given-names></name></person-group>. <article-title>Protein sequences recapitulate genetic code evolution</article-title>. <source>Comput Struct Biotechnol J.</source> (<year>2018</year>) <volume>16</volume>:<fpage>177</fpage>&#x02013;<lpage>89</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.csbj.2018.05.001</pub-id><pub-id pub-id-type="pmid">30002789</pub-id></mixed-citation>
</ref>
<ref id="B52">
<label>52.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fei</surname> <given-names>H</given-names></name> <name><surname>Li</surname> <given-names>Y</given-names></name> <name><surname>Liu</surname> <given-names>Y</given-names></name> <name><surname>Wei</surname> <given-names>J</given-names></name> <name><surname>Chen</surname> <given-names>A</given-names></name> <name><surname>Gao</surname> <given-names>C</given-names></name></person-group>. <article-title>Advancing protein evolution with inverse folding models integrating structural and evolutionary constraints</article-title>. <source>Cell.</source> (<year>2025</year>) <volume>188</volume>:<fpage>4674</fpage>&#x02013;<lpage>92</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cell.2025.06.014</pub-id><pub-id pub-id-type="pmid">40628259</pub-id></mixed-citation>
</ref>
<ref id="B53">
<label>53.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ben Khalfallah</surname> <given-names>H</given-names></name> <name><surname>Jelassi</surname> <given-names>M</given-names></name> <name><surname>Demongeot</surname> <given-names>J</given-names></name> <name><surname>Bellamine Ben Saouda</surname> <given-names>N</given-names></name></person-group>. <article-title>Advancements in predictive analytics: machine learning approaches to estimate length of stay and mortality in sepsis</article-title>. <source>Computation.</source> (<year>2025</year>) <volume>13</volume>:<fpage>8</fpage>. doi: <pub-id pub-id-type="doi">10.3390/computation13010008</pub-id></mixed-citation>
</ref>
<ref id="B54">
<label>54.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bintrim</surname> <given-names>S</given-names></name> <name><surname>Donohue</surname> <given-names>T</given-names></name> <name><surname>Handelsman</surname> <given-names>J</given-names></name> <name><surname>Roberts</surname> <given-names>G</given-names></name> <name><surname>Goodman</surname> <given-names>R</given-names></name></person-group>. <article-title>Molecular phylogeny of Archaea from soil</article-title>. <source>Proc Natl Acad Sci USA.</source> (<year>1997</year>) <volume>94</volume>:<fpage>277</fpage>&#x02013;<lpage>82</lpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.94.1.277</pub-id><pub-id pub-id-type="pmid">8990199</pub-id></mixed-citation>
</ref>
<ref id="B55">
<label>55.</label>
<mixed-citation publication-type="web"><person-group person-group-type="author"><collab>Maxwell</collab></person-group> (<year>2025</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="https://gitlab.com/Orange-OpenSource/documentare?filter=maxwell">https://gitlab.com/Orange-OpenSource/documentare?filter=maxwell</ext-link> (Accessed May 23, 2025).</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2334807/overview">Alberto Bersani</ext-link>, Sapienza University of Rome, Italy</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2530923/overview">Nossaiba Baba</ext-link>, University of Hassan II Casablanca, Morocco</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2665823/overview">Zakaria Yaagoub</ext-link>, University of Hassan II Casablanca, Morocco</p>
</fn>
</fn-group>
</back>
</article>