<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mol. Biosci.</journal-id>
<journal-title>Frontiers in Molecular Biosciences</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mol. Biosci.</abbrev-journal-title>
<issn pub-type="epub">2296-889X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1218518</article-id>
<article-id pub-id-type="doi">10.3389/fmolb.2023.1218518</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Molecular Biosciences</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>gtAI: an improved species-specific tRNA adaptation index using the genetic algorithm</article-title>
<alt-title alt-title-type="left-running-head">Anwar et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fmolb.2023.1218518">10.3389/fmolb.2023.1218518</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Anwar</surname>
<given-names>Ali Mostafa</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/473069/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Khodary</surname>
<given-names>Saif M.</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2298072/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ahmed</surname>
<given-names>Eman Ali</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Osama</surname>
<given-names>Aya</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ezzeldin</surname>
<given-names>Shahd</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Tanios</surname>
<given-names>Anthony</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Mahgoub</surname>
<given-names>Sebaey</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Magdeldin</surname>
<given-names>Sameh</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2307852/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Proteomics and Metabolomics Research Program</institution>, <institution>Basic Research Department</institution>, <institution>Children&#x2019;s Cancer Hospital 57357 (CCHE-57357)</institution>, <addr-line>Cairo</addr-line>, <country>Egypt</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Genetics</institution>, <institution>Faculty of Agriculture</institution>, <institution>Cairo University</institution>, <addr-line>Giza</addr-line>, <country>Egypt</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Department of Pharmacology</institution>, <institution>Faculty of Veterinary Medicine</institution>, <institution>Suez Canal University</institution>, <addr-line>Ismailia</addr-line>, <country>Egypt</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Department of Physiology</institution>, <institution>Faculty of Veterinary Medicine</institution>, <institution>Suez Canal University</institution>, <addr-line>Ismailia</addr-line>, <country>Egypt</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1628751/overview">Carolin Kosiol</ext-link>, University of St Andrews, United Kingdom</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2321116/overview">Ioanna Kotari</ext-link>, University of Veterinary Medicine Vienna, Austria</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/437934/overview">Paul Martin Harrison</ext-link>, McGill University, Canada</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Sameh Magdeldin, <email>sameh.magdeldin@57357.org</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>04</day>
<month>07</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>10</volume>
<elocation-id>1218518</elocation-id>
<history>
<date date-type="received">
<day>07</day>
<month>05</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>22</day>
<month>06</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Anwar, Khodary, Ahmed, Osama, Ezzeldin, Tanios, Mahgoub and Magdeldin.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Anwar, Khodary, Ahmed, Osama, Ezzeldin, Tanios, Mahgoub and Magdeldin</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>The tRNA adaptation index (tAI) is a translation efficiency metric that considers weighted values (<italic>S</italic>
<sub>
<italic>ij</italic>
</sub> values) for codon&#x2013;tRNA wobble interaction efficiencies. The initial implementation of the tAI had significant flaws. For instance, generated <italic>S</italic>
<sub>
<italic>ij</italic>
</sub> weights were optimized based on gene expression in <italic>Saccharomyces cerevisiae</italic>, which is expected to vary among different species. Consequently, a species-specific approach (stAI) was developed to overcome those limitations. However, the stAI method employed a hill climbing algorithm to optimize the <italic>S</italic>
<sub>
<italic>ij</italic>
</sub> weights, which is not ideal for obtaining the best set of <italic>S</italic>
<sub>
<italic>ij</italic>
</sub> weights because it could struggle to find the global maximum given a complex search space, even after using different starting positions. In addition, it did not perform well in computing the tAI of fungal genomes in comparison with the original implementation. We developed a novel approach named genetic tAI (gtAI) implemented as a Python package (<ext-link ext-link-type="uri" xlink:href="https://github.com/AliYoussef96/gtAI">https://github.com/AliYoussef96/gtAI</ext-link>), which employs a genetic algorithm to obtain the best set of <italic>S</italic>
<sub>
<italic>ij</italic>
</sub> weights and follows a new codon usage-based workflow that better computes the tAI of genomes from the three domains of life. The gtAI has significantly improved the correlation with the codon adaptation index (CAI) and the prediction of protein abundance (empirical data) compared to the stAI.</p>
</abstract>
<kwd-group>
<kwd>codon usage</kwd>
<kwd>tRNA adaptation index</kwd>
<kwd>molecular evolution</kwd>
<kwd>translational selection</kwd>
<kwd>codon usage analysis</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Molecular Evolution</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>
<xref ref-type="bibr" rid="B16">Grantham et al. (1980</xref>) highlighted the unequal usage of synonymous codons among different genes and genomes in a phenomenon currently known as codon usage bias (CUB). Thenceforward, scientists were investigating the effect of synonymous mutations on the efficiency/accuracy of protein translation and several biological processes ranging from RNA processing to protein folding and their potential consequences on the overall performance and evolution of living organisms (<xref ref-type="bibr" rid="B3">Chamary et al., 2006</xref>; <xref ref-type="bibr" rid="B27">Plotkin and Kudla, 2011</xref>). Codon usage is positively associated with the analogous tRNA in a species&#x2014;the tRNA pool determines the available amino acid used during the protein extension process. Therefore, protein expression and translation efficiency are highly associated with CUB (<xref ref-type="bibr" rid="B21">Karlin et al., 2001</xref>; <xref ref-type="bibr" rid="B17">Gustafsson et al., 2004</xref>). Accordingly, codons with high occurrence in a gene (putative optimal codons) improve the protein translation rate, and rare codons will cause a reduction in the translation and might cause translation errors (<xref ref-type="bibr" rid="B20">Ikemura, 1981</xref>).</p>
<p>In biotechnology studies, heterologous expression was applied to assemble vaccines and pharmaceuticals (<xref ref-type="bibr" rid="B18">Han et al., 2010</xref>; <xref ref-type="bibr" rid="B23">Liu et al., 2018</xref>). Codon optimization was proposed to increase heterologous gene expression (<xref ref-type="bibr" rid="B28">Quax et al., 2015</xref>; <xref ref-type="bibr" rid="B1">Brandis and Hughes, 2016</xref>; <xref ref-type="bibr" rid="B9">Fu et al., 2020</xref>). Many studies reported the success of the codon optimization approach to upregulate gene expression up to 1,000-fold (<xref ref-type="bibr" rid="B28">Quax et al., 2015</xref>). Several software tools are used for codon optimization and are patented to serve commercial purposes such as GenSmart Design (<ext-link ext-link-type="uri" xlink:href="https://www.genscript.com/gene-and-plasmid-construct-design.html">https://www.genscript.com/gene-and-plasmid-construct-design.html</ext-link>) and GENEWIZ (<ext-link ext-link-type="uri" xlink:href="https://www.genewiz.com/en-GB/Public/Services/Gene-Synthesis/codon-optimization">https://www.genewiz.com/en-GB/Public/Services/Gene-Synthesis/codon-optimization</ext-link>). A number of codon optimization algorithms are not open-source (<xref ref-type="bibr" rid="B32">Satya et al., 2003</xref>; <xref ref-type="bibr" rid="B19">Huang et al., 2021</xref>) or should be requested from the authors (<xref ref-type="bibr" rid="B10">Fuglsang, 2003</xref>). Regardless of their availability, many of those protein expression optimization software tools are based on the tRNA adaptation index (tAI) (<xref ref-type="bibr" rid="B15">Gould et al., 2014</xref>; <xref ref-type="bibr" rid="B40">Watts et al., 2021</xref>; <xref ref-type="bibr" rid="B29">Raguin et al., 2023</xref>) and codon adaptation index (CAI) (<xref ref-type="bibr" rid="B9">Fu et al., 2020</xref>). Many indices were developed to measure the degree of preference for the unbalanced use of codons. Some are codon-specific such as relative synonymous codon usage (RSCU), and others are gene-specific such as the effective number of codons (ENc) (<xref ref-type="bibr" rid="B41">Wright, 1990</xref>; <xref ref-type="bibr" rid="B36">Sun et al., 2013</xref>) and CAI (<xref ref-type="bibr" rid="B34">Sharp and Li, 1987</xref>). A relatively new index named tAI was introduced by <xref ref-type="bibr" rid="B7">dos Reis et al. (2004)</xref> to become a formal measure for CUB associated with translational selection. The tRNA presents a complementary anticodon for an amino acid to be incorporated into the growing polypeptide chain during the translation process. The codon&#x2013;anticodon interactions at the first two codon positions are governed solely by canonical (Watson&#x2013;Crick) base pairing rules, unlike the third codon position at which non-canonical (wobble) base pairing also occurs (<xref ref-type="bibr" rid="B5">Crick, 1966</xref>). The tAI considers weights for canonical and wobble interaction efficiencies between codons and tRNA molecules. To compute the tAI, first, the absolute adaptiveness value (<italic>W</italic>
<sub>
<italic>i</italic>
</sub>) for codon <italic>i</italic> is calculated by the following equation:<disp-formula id="e1">
<mml:math id="m1">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>G</mml:mi>
<mml:mi>N</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
<p>where <italic>n</italic>
<sub>
<italic>j</italic>
</sub> is the number of tRNA isoacceptors that can recognize the <italic>i</italic>th codon, <italic>S</italic>
<sub>
<italic>ij</italic>
</sub> is the codon&#x2013;anticodon coupling efficiency having values ranging from 0 (perfect interaction) to 1 (weak interaction) (<xref ref-type="bibr" rid="B7">dos Reis et al., 2004</xref>), and <italic>tGNC</italic>
<sub>
<italic>ij</italic>
</sub> is the gene copy number of the <italic>j</italic>th anticodon that can recognize the <italic>i</italic>th codon.</p>
<p>Then, each <italic>W</italic>
<sub>
<italic>i</italic>
</sub> is normalized to the maximum <italic>W</italic>
<sub>
<italic>i</italic>
</sub> value to obtain the relative adaptiveness value (<italic>w</italic>
<sub>
<italic>i</italic>
</sub>). Finally, the tAI of a gene can be defined as the geometric mean of the <italic>w</italic>
<sub>
<italic>i</italic>
</sub> values of its codons (<xref ref-type="bibr" rid="B7">dos Reis et al., 2004</xref>):<disp-formula id="e2">
<mml:math id="m2">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mi>g</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>exp</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>O</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:munder>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mi>log</mml:mi>
</mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>where <italic>O</italic>
<sub>
<italic>tot</italic>
</sub> is the frequency of the total codons.</p>
<p>The <italic>S</italic>
<sub>
<italic>ij</italic>
</sub> weights inferred by the original tAI (otAI) implementation were based on optimizing the correlation between tAI (<xref ref-type="disp-formula" rid="e1">Eq. 1</xref>) and gene expression levels in <italic>Saccharomyces cerevisiae</italic> using the Nelder&#x2013;Mead method under the assumption that highly expressed genes contain codons with higher adaptation to the tRNA pool (driven by the force of translational selection). In a study by <xref ref-type="bibr" rid="B6">Dana and Tuller (2014</xref>), two problems are associated with the original tAI implementation. First, it depends on gene expression information, often unavailable for many organisms (especially novel ones). Second, generated weights were specific for <italic>Saccharomyces cerevisiae</italic>. They suggested the possibility that wobble interaction efficiencies shall differ significantly among genomes from different domains. So, it would not be plausible to use the weights specifically for <italic>Saccharomyces cerevisiae</italic> to compute the tAI of other organisms. Consequently, they developed the species-specific tAI (stAI) (<xref ref-type="bibr" rid="B31">Sabi and Tuller, 2014</xref>) to solve these problems.</p>
<p>The inferred stAI weights are based on optimizing the correlation between the tAI (Eq. <xref ref-type="disp-formula" rid="e1">1</xref>) and a CUB index, namely, directional codon bias score (a modified version of relative CUB (<xref ref-type="bibr" rid="B25">Oymondal et al., 2009</xref>)) using the hill climbing algorithm under the assumption that highly expressed genes have higher adaptation to the tRNA pool and higher CUB (<xref ref-type="bibr" rid="B31">Sabi and Tuller, 2014</xref>). This eliminates the need for additional gene expression data and generates weights specific to the tested organism, indicating the value of stAI in tAI computation, especially for non-fungal species. However, two limitations in the stAI are as follows: 1) using the hill climbing optimization method by which only local maxima can be reached and often gets stuck in ridges and plateau scenarios (<xref ref-type="bibr" rid="B37">Thengade and Dondal, 2012</xref>); hence, the best set of <italic>S</italic>
<sub>
<italic>ij</italic>
</sub> weights may not be obtained even after using different starting positions (random restart) (<xref ref-type="bibr" rid="B30">Russell and Norvig, 2010</xref>; <xref ref-type="bibr" rid="B42">Yang, 2014</xref>); 2) the outperformance of the original tAI over the stAI in predicting the protein abundance (PA) of fungal organisms (<xref ref-type="bibr" rid="B31">Sabi and Tuller, 2014</xref>).</p>
<p>Here, we introduce a novel approach for tAI computation, namely, genetic tAI (gtAI), to solve the problems associated with the stAI, which affect its performance. The gtAI uses a genetic algorithm to reach the global maximum (best set of <italic>S</italic>
<sub>
<italic>ij</italic>
</sub> weights), solving the issue of obtaining a meaningful set of <italic>S</italic>
<sub>
<italic>ij</italic>
</sub> weights for each organism. It also utilizes robust CUB indices (ENc and RSCU) different from the directional codon bias score (DCBS) employed by the stAI.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>2 Materials and methods</title>
<sec id="s2-1">
<title>2.1 Establishing a reference set of genes using the effective number of codons</title>
<p>A reference set of genes is defined as a set of genes with the highest expression levels in a genome, such as ribosomal genes and translation elongation factors (<xref ref-type="bibr" rid="B8">Duret, 2000</xref>; <xref ref-type="bibr" rid="B12">Ghaemmaghami et al., 2003</xref>; <xref ref-type="bibr" rid="B13">Goetz and Fuglsang, 2005</xref>). The ENc is a widely used measure of CUB at the gene level, and in theory, it negatively correlates with gene expression (<xref ref-type="bibr" rid="B36">Sun et al., 2013</xref>). Given the assumption that highly expressed genes are highly biased (<xref ref-type="bibr" rid="B31">Sabi and Tuller, 2014</xref>), a reference set is obtained by selecting genes with the lowest ENc values (highest expression) in the tested genome. The ENc is calculated using the equations of the improved ENc implementation by <xref ref-type="bibr" rid="B36">Sun et al. (2013</xref>):<disp-formula id="e3">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mi>j</mml:mi>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>j</mml:mi>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>F</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2b;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mi>j</mml:mi>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>j</mml:mi>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>F</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2b;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mn>4</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mi>j</mml:mi>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mn>4</mml:mn>
</mml:msub>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mn>4</mml:mn>
</mml:msub>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>j</mml:mi>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>F</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
</p>
<p>where <italic>N</italic>
<sub>
<italic>S</italic>
</sub> is the number of codon families with a single codon. <italic>K</italic>
<sub>
<italic>i</italic>
</sub> is the number of <italic>i</italic>-fold codon families. In addition, <italic>F</italic>
<sub>
<italic>CF. j</italic>
</sub> is <italic>F</italic>
<sub>
<italic>CF</italic>
</sub> for family <italic>j</italic> obtained from the following equation:<disp-formula id="e4">
<mml:math id="m4">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>F</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>m</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
</p>
<p>where <italic>n</italic>
<sub>
<italic>i</italic>
</sub> is the count of codon <italic>i</italic> in the codon family of m synonymous codons.</p>
</sec>
<sec id="s2-2">
<title>2.2 Calculating the relative synonymous codon usage for the reference set</title>
<p>The RSCU is a codon-specific CUB measurement defined as the ratio of the observed to the expected frequency of codons, under the null hypothesis that all synonymous codons for a particular amino acid are used equally (<xref ref-type="bibr" rid="B35">Sharp and Li, 1986</xref>). It gives an accurate value for each amino acid codon ranging from 0 to the number of synonymous codons for that amino acid. The RSCU values for the reference set are calculated using the following equation:<disp-formula id="e5">
<mml:math id="m5">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>U</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:msub>
<mml:mi>O</mml:mi>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mi>a</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:munder>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>a</mml:mi>
</mml:msub>
</mml:mrow>
</mml:munder>
<mml:msub>
<mml:mi>O</mml:mi>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
</p>
<p>where <italic>O</italic>
<sub>
<italic>ac</italic>
</sub> is the count of codon <italic>c</italic> for the amino acid <italic>a</italic> and <italic>k</italic>
<sub>
<italic>a</italic>
</sub> is the number of synonymous codons in the amino acid <italic>a</italic> family.</p>
</sec>
<sec id="s2-3">
<title>2.3 <italic>S</italic>
<sub>
<italic>ij</italic>
</sub> weight inference by the genetic algorithm</title>
<p>Since highly expressed genes are influenced by translational selection to include more codons with higher adaptation to the intracellular tRNA pool (i.e., optimal codons) (<xref ref-type="bibr" rid="B31">Sabi and Tuller, 2014</xref>), we expect to find a correlation between RSCU (Eq. <xref ref-type="disp-formula" rid="e5">5</xref>) and absolute adaptiveness (<italic>W</italic>
<sub>
<italic>i</italic>
</sub>) values (Eq. <xref ref-type="disp-formula" rid="e1">1</xref>). Therefore, we inferred unique <italic>S</italic>
<sub>
<italic>ij</italic>
</sub> weights for each organism by optimizing the non-parametric (Spearman&#x2019;s rank) correlation between RSCU (of the reference set) and <italic>W</italic>
<sub>
<italic>i</italic>
</sub> values using a genetic algorithm (<ext-link ext-link-type="uri" xlink:href="https://pypi.org/project/gaft/">https://pypi.org/project/gaft/</ext-link>). It should be noted that the correlation between RSCU and <italic>W</italic>
<sub>
<italic>i</italic>
</sub> is at the level of codons.</p>
<p>The genetic algorithm is a metaheuristic search approach inspired by the Darwinian principle of survival of the fittest. It will search for the best <italic>S</italic>
<sub>
<italic>ij</italic>
</sub> weights that maximize the correlation between RSCU and <italic>W</italic>
<sub>
<italic>i</italic>
</sub> while operating in <xref ref-type="statement" rid="Algorithm_1">Algorithm 1</xref>
</p>
<p>
<statement content-type="algorithm" id="Algorithm_1">
<label>Algorithm 1</label>
<p>The genetic algorithm operates to optimize the Sij weights used to calculate the tAI values.<list list-type="simple">
<list-item>
<p>
<bold>Input:</bold> Genome coding sequences</p>
</list-item>
<list-item>
<p>Initialize S, vector of the initial population as chromosomes (<italic>S</italic>
<sub>
<italic>ij</italic>
</sub> sets) with random <italic>S</italic>
<sub>
<italic>ij</italic>
</sub> values (genes)</p>
</list-item>
<list-item>
<p>Generation time &#x3d; n;</p>
</list-item>
<list-item>
<p>F<bold>or</bold> s in S <bold>do</bold>
</p>
</list-item>
<list-item>
<p>Evaluate fitness function(s);</p>
</list-item>
<list-item>
<p>n &#x2b; &#x3d; 1</p>
</list-item>
<list-item>
<p>IntialLabel;</p>
</list-item>
<list-item>
<p>
<bold>Test:</bold>
</p>
</list-item>
<list-item>
<p>Selection(s) where Sij sets that exhibit higher correlation between RSCU and Wi;</p>
</list-item>
<list-item>
<p>
<bold>Do:</bold>
</p>
</list-item>
<list-item>
<p>Crossover(s);</p>
</list-item>
<list-item>
<p>Mutation(s);</p>
</list-item>
<list-item>
<p>Evaluate fitness function(s);</p>
</list-item>
<list-item>
<p>
<bold>If</bold> n &#x3d; Generation time, <bold>then</bold>
</p>
</list-item>
<list-item>
<p>Output &#x3d; Best fitness(s);</p>
</list-item>
<list-item>
<p>
<bold>Else</bold>
</p>
</list-item>
<list-item>
<p>Go to IntialLabel</p>
</list-item>
<list-item>
<p>
<bold>Output:</bold> the best set of <italic>S</italic>
<sub>
<italic>ij</italic>
</sub> weights &#x2b; tAI values</p>
</list-item>
</list>
</p>
</statement>
</p>
<p>Then, the best set of <italic>S</italic>
<sub>
<italic>ij</italic>
</sub> weights will be used to compute the tAI values using Eqs. <xref ref-type="disp-formula" rid="e1">1</xref>, <xref ref-type="disp-formula" rid="e2">2</xref> (<xref ref-type="bibr" rid="B31">Sabi and Tuller, 2014</xref>).</p>
</sec>
<sec id="s2-4">
<title>2.4 Genomic data collection</title>
<p>The coding sequences of 12 organisms (<italic>Ferroglobus placidus</italic>, <italic>Halomicrobium mukohataei</italic>, <italic>Methanocaldococcus jannaschii</italic>, <italic>Escherichia coli</italic>, <italic>Neisseria meningitides</italic>, <italic>Vibrio cholera</italic>, <italic>Caenorhabditis elegans</italic>, <italic>Drosophila melanogaster</italic>, <italic>Aspergillus fumigatus</italic>, <italic>Aspergillus nidulans</italic>, <italic>Saccharomyces cerevisiae</italic>, and <italic>Schizosaccharomyces pombe</italic>) used in this study as representatives of different domains were retrieved from NCBI (<ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/genome/">https://www.ncbi.nlm.nih.gov/genome/</ext-link>) in the FASTA format. Their tRNA gene copy numbers were obtained from GtRNAdb (<xref ref-type="bibr" rid="B4">Chan and Lowe, 2009</xref>). All information about the used organisms can be found in <xref ref-type="sec" rid="s10">Supplementary Table S1</xref>.</p>
</sec>
<sec id="s2-5">
<title>2.5 CAI and the original tAI indices&#x2019; calculation</title>
<p>The CAI was calculated using a Python package (<xref ref-type="bibr" rid="B22">Lee, 2018</xref>). In addition, the original tAI was calculated using a Python code developed by the authors (the same used to calculate the tAI in the gtAI package) using the <italic>S</italic>
<sub>
<italic>ij</italic>
</sub> weights found in the original study (<xref ref-type="bibr" rid="B31">Sabi and Tuller, 2014</xref>).</p>
</sec>
<sec id="s2-6">
<title>2.6 Protein abundance data collection</title>
<p>To test to what extent gtAI correlates with empirical data such as PA compared to the otAI and stAI, the PA data of <italic>E. coli</italic>, <italic>C. elegans</italic>, <italic>D. melanogaster</italic>, <italic>S. cerevisiae</italic>, and <italic>S. pombe</italic> were retrieved from PaxDB. The integrated PaxDB version (highest coverage) was used for all the organisms (version 4.1) (<xref ref-type="bibr" rid="B39">Wang et al., 2015</xref>). These organisms were chosen due to the availability of their PA data.</p>
</sec>
<sec id="s2-7">
<title>2.7 The impact of generation time and population size parameter choice on gtAI result reproducibility</title>
<p>First, we investigated the effect of the population size parameter on the gtAI result. Three random organisms were selected from the 12 used in this study (<italic>S. cerevisiae</italic>, <italic>E. coli</italic>, and <italic>H. mukohataei</italic>). For each organism, the non-parametric (Spearman) correlation between RSCU (of the reference set) and <italic>W</italic>
<sub>
<italic>i</italic>
</sub> values were optimized by the genetic algorithm used in gtAI calculation. We chose a constant generation time equal to 100 and different population sizes (10, 20, 30, n &#x2b; 10 &#x2026; , 100). Hence, each organism was optimized 10 times, each with a different population size to compare the best solution between each population size (inter-variability). This experiment was performed for each organism five times to reach the best solution within the same population size and for the same organism between different experiments (intra-variability). Then, we tested the best solution for selecting the suitable generation time by applying 1,000 generations on the same three organisms, with a constant population size equal to 60. Finally, we plotted the solutions from generation 1 to 1,000.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>3 Results</title>
<sec id="s3-1">
<title>3.1 CAI correlations with gtAI, stAI, and otAI</title>
<p>The Williams&#x2019; test was used to compare the rho values at an alpha score of 0.01 (two-sided test). The gtAI values for <italic>H. mukohataei</italic>, <italic>M. jannaschii</italic>, <italic>E. coli</italic>, <italic>N. meningitidis</italic>, <italic>D. melanogaster</italic>, <italic>A. nidulans</italic>, <italic>S. pombe</italic>, <italic>A. fumigatus</italic>, and <italic>S. cerevisiae</italic> (9 out of 12) revealed a statistically significant (Williams&#x2019; test <italic>p</italic> value &#x3c;0.01) higher correlation with CAI than stAI (<xref ref-type="table" rid="T1">Table 1</xref>). Moreover, the gtAI in all the fungal organisms showed a higher considerable correlation (<italic>A. nidulans</italic>, <italic>S. pombe</italic>, <italic>A. fumigatus</italic>, and <italic>S. cerevisiae</italic>) with the CAI than the original tAI (<xref ref-type="sec" rid="s10">Supplementary Tables S1&#x2013;S4</xref>).</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Spearman&#x2019;s rank correlation analysis between CAI and the three tAI measurements (original tAI, stAI, and gtAI) for the 12 model organisms and their average GC content and ENc values.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th colspan="2" align="left"/>
<th align="center">gtAI-CAI (rho)</th>
<th align="center">stAI-CAI (rho)</th>
<th align="center">tAI-CAI (rho)</th>
<th align="center">GC content (%)</th>
<th align="center">ENc</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td colspan="7" align="left">Archaea</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Ferroglobus placidus</italic>
</td>
<td align="center">0.46&#x2a;</td>
<td align="center">0.48&#x2a;</td>
<td align="center">-</td>
<td align="center">44.1</td>
<td align="center">44.81</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Halomicrobium mukohataei</italic>
</td>
<td align="center">0.61&#x2a;</td>
<td align="center">0.41&#x2a;</td>
<td align="center">-</td>
<td align="center">65.5</td>
<td align="center">34.26</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Methanocaldococcus jannaschii</italic>
</td>
<td align="center">0.23&#x2a;</td>
<td align="center">0.14&#x2a;</td>
<td align="center">-</td>
<td align="center">31.4</td>
<td align="center">37.92</td>
</tr>
<tr>
<td colspan="7" align="left">Bacteria</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Escherichia coli</italic>
</td>
<td align="center">0.83&#x2a;</td>
<td align="center">0.82&#x2a;</td>
<td align="center">-</td>
<td align="center">50.8</td>
<td align="center">39.09</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Neisseria meningitidis</italic>
</td>
<td align="center">0.9&#x2a;</td>
<td align="center">0.67&#x2a;</td>
<td align="center">-</td>
<td align="center">51.8</td>
<td align="center">36.89</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Vibrio cholera</italic>
</td>
<td align="center">0.78&#x2a;</td>
<td align="center">0.8&#x2a;</td>
<td align="center">-</td>
<td align="center">48.1</td>
<td align="center">41.76</td>
</tr>
<tr>
<td colspan="7" align="left">Eukarya (non-fungal)</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Caenorhabditis elegans</italic>
</td>
<td align="center">0.8&#x2a;</td>
<td align="center">0.82&#x2a;</td>
<td align="center">-</td>
<td align="center">35.4</td>
<td align="center">42.02</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Drosophila melanogaster</italic>
</td>
<td align="center">0.89&#x2a;</td>
<td align="center">0.74&#x2a;</td>
<td align="center">-</td>
<td align="center">42.0</td>
<td align="center">38.69</td>
</tr>
<tr>
<td colspan="7" align="left">Eukarya (fungal)</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Aspergillus fumigatus</italic>
</td>
<td align="center">0.91&#x2a;</td>
<td align="center">0.78&#x2a;</td>
<td align="center">0.82&#x2a;</td>
<td align="center">49.5</td>
<td align="center">40.64</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Aspergillus nidulans</italic>
</td>
<td align="center">0.94&#x2a;</td>
<td align="center">0.29&#x2a;</td>
<td align="center">0.91&#x2a;</td>
<td align="center">50.1</td>
<td align="center">41.57</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Saccharomyces cerevisiae</italic>
</td>
<td align="center">0.94&#x2a;</td>
<td align="center">0.56&#x2a;</td>
<td align="center">0.87&#x2a;</td>
<td align="center">38.2</td>
<td align="center">36.42</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Schizosaccharomyces pombe</italic>
</td>
<td align="center">0.88&#x2a;</td>
<td align="center">0.56&#x2a;</td>
<td align="center">0.84&#x2a;</td>
<td align="center">36</td>
<td align="center">40.32</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>&#x2a; represents <italic>p</italic> value &#x3c;0.001.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3-2">
<title>3.2 Repeated random sampling and correlations with the CAI</title>
<p>First, we calculated the gtAI, stAI, and CAI values for the genes of all tested organisms. Then, for each replicate (1,000 replicates with replacement), we sampled a 25% random sample size from the values of these measures for the Spearman&#x2019;s rank correlation analyses. This is to make sure that the reference set of genes present among other genes is not causing inflated gtAI-CAI correlations. The rep_sample_n R function from the infer package was used in the random sampling (<ext-link ext-link-type="uri" xlink:href="https://www.rdocumentation.org/packages/infer/versions/1.0.4/topics/rep_sample_n">https://www.rdocumentation.org/packages/infer/versions/1.0.4/topics/rep_sample_n</ext-link>). It does not specify a particular distribution type to be used but rather allows for repeated sampling of data from a specified data frame. The script of this random sampling method could be found here (<ext-link ext-link-type="uri" xlink:href="https://github.com/AliYoussef96/gtAI/blob/master/random%20sampling.r">https://github.com/AliYoussef96/gtAI/blob/master/random%20sampling.r</ext-link>).</p>
<p>The result showed that the gtAI exhibited stronger correlations for the same nine organisms compared to the stAI. Furthermore, a stronger correlation with the CAI in the four fungal organisms compared to the otAI is shown (<xref ref-type="fig" rid="F1">Figure 1</xref>).</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Repeated Spearman&#x2019;s rank correlation analysis between gtAI-CAI, stAI-CAI, and otAI-CAI for randomly sampled values. The gtAI, stAI, and CAI values for each organism (the 12 used in the study) were randomly sampled with size 25%, and 1,000 replicates with replacement have been taken. Spearman&#x2019;s rank correlation analysis between gtAI-CAI, stAI-CAI, and otAI-CAI for each replicate was applied. The line plot shows the rho values for each replicate, and the box plots show the distributions. <bold>(A)</bold> Three archaea organisms being tested in this study, <bold>(B)</bold> the three bacterial organisms, <bold>(C)</bold> two eukaryotic (non-fungal) organisms, and <bold>(D)</bold> four eukarya (fungal) organisms. The color code is red for gtAI-CAI correlation results, gray for stAI-CAI correlation results, and green for the original tAI (otAI)-CAI correlation results.</p>
</caption>
<graphic xlink:href="fmolb-10-1218518-g001.tif"/>
</fig>
</sec>
<sec id="s3-3">
<title>3.3 SCUO correlations with gtAI, stAI, and otAI</title>
<p>The SCUO is a codon usage index that does not involve the use of a reference set in its calculation (<xref ref-type="bibr" rid="B38">Wan et al., 2004</xref>). The gtAI, stAI, otAI, and SCUO values were calculated for all 12 organisms. The gtAI has outperformed both stAI and otAI by exhibiting a stronger statistically significant correlation with SCUO in eight organisms consistent with CAI association results except in <italic>E. coli</italic>, and the gtAI-SCUO and stAI-SCUO correlations are 0.26 and 0.27, respectively (<xref ref-type="table" rid="T2">Table 2</xref>). A two-sided Williams&#x2019; test was used to compare the rho values at an alpha score of 0.01.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Spearman&#x2019;s rank correlation analysis between SCUO and the three tAI measurements (original tAI, stAI, and gtAI) for the 12 model organisms.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th colspan="2" align="left"/>
<th align="center">gtAI-SCUO (rho)</th>
<th align="center">stAI-SCUO (rho)</th>
<th align="center">tAI-SCUO (rho)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td colspan="5" align="left">Archaea</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Ferroglobus placidus</italic>
</td>
<td align="center">0.22&#x2a;</td>
<td align="center">0.24&#x2a;</td>
<td align="center">-</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Halomicrobium mukohataei</italic>
</td>
<td align="center">0.45&#x2a;</td>
<td align="center">0.27&#x2a;</td>
<td align="center">-</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Methanocaldococcus jannaschii</italic>
</td>
<td align="center">0.2&#x2a;</td>
<td align="center">&#x2212;0.03</td>
<td align="center">-</td>
</tr>
<tr>
<td colspan="5" align="left">Bacteria</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Escherichia coli</italic>
</td>
<td align="center">0.26&#x2a;</td>
<td align="center">0.27&#x2a;</td>
<td align="center">-</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Neisseria meningitidis</italic>
</td>
<td align="center">0.4&#x2a;</td>
<td align="center">0.22&#x2a;</td>
<td align="center">-</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Vibrio cholera</italic>
</td>
<td align="center">0.27&#x2a;</td>
<td align="center">0.28&#x2a;</td>
<td align="center">-</td>
</tr>
<tr>
<td colspan="5" align="left">Eukarya (non-fungal)</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Caenorhabditis elegans</italic>
</td>
<td align="center">0.26&#x2a;</td>
<td align="center">0.23&#x2a;</td>
<td align="center">-</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Drosophila melanogaster</italic>
</td>
<td align="center">0.6&#x2a;</td>
<td align="center">0.5&#x2a;</td>
<td align="center">-</td>
</tr>
<tr>
<td colspan="5" align="left">Eukarya (fungal)</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Aspergillus fumigatus</italic>
</td>
<td align="center">0.59&#x2a;</td>
<td align="center">0.49&#x2a;</td>
<td align="center">0.52&#x2a;</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Aspergillus nidulans</italic>
</td>
<td align="center">0.48&#x2a;</td>
<td align="center">0.25&#x2a;</td>
<td align="center">0.45&#x2a;</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Saccharomyces cerevisiae</italic>
</td>
<td align="center">0.41&#x2a;</td>
<td align="center">0.27&#x2a;</td>
<td align="center">0.36&#x2a;</td>
</tr>
<tr>
<td align="left"/>
<td align="left">
<italic>Schizosaccharomyces pombe</italic>
</td>
<td align="center">0.22&#x2a;</td>
<td align="center">0.22&#x2a;</td>
<td align="center">0.20&#x2a;</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>&#x2a; represents <italic>p</italic> value &#x3c;0.001.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3-4">
<title>3.4 The gtAI correlates better with PA data than stAI and CAI in both fungal and non-fungal organisms</title>
<p>The Williams&#x2019; test was used to compare the rho values at an alpha score of 0.01 (two-sided test). For <italic>C. elegans</italic>, <italic>D. melanogaster</italic>, <italic>S. pombe</italic>, <italic>S. cerevisiae</italic>, and <italic>E. coli</italic>, the gtAI showed a higher statistically significant correlation with PA than the stAI and CAI (Williams&#x2019; test <italic>p</italic> value &#x3c;0.01). Furthermore, the gtAI exhibits a higher statistically significant correlation with PA in <italic>E. coli</italic> than the original tAI (Williams&#x2019; test <italic>p</italic> value &#x3c;0.01). On the other hand, the original tAI predicted the PA of fungal organisms better than the gtAI and stAI, which is expected as it used experimental microarray data from yeast to obtain an optimal set of <italic>S</italic>
<sub>
<italic>ij</italic>
</sub> values that maximizes the correlation between expression levels and tAI values (<xref ref-type="bibr" rid="B7">dos Reis et al., 2004</xref>). Consequently, the gtAI is a valuable tool as it improves the prediction of PA in many organisms (<xref ref-type="table" rid="T3">Table 3</xref>).</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Spearman&#x2019;s rank correlation analysis between PA and the three tAI measurements.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left"/>
<th align="center">gtAI-PA (rho)</th>
<th align="center">stAI-PA (rho)</th>
<th align="center">tAI-PA (rho)</th>
<th align="center">CAI-PA (rho)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">
<italic>Caenorhabditis elegans</italic>
</td>
<td align="center">0.38&#x2a;</td>
<td align="center">0.36&#x2a;</td>
<td align="center">-</td>
<td align="center">0.28&#x2a;</td>
</tr>
<tr>
<td align="left">
<italic>Drosophila melanogaster</italic>
</td>
<td align="center">0.48&#x2a;</td>
<td align="center">0.44&#x2a;</td>
<td align="center">-</td>
<td align="center">0.33&#x2a;</td>
</tr>
<tr>
<td align="left">
<italic>Escherichia coli</italic>
</td>
<td align="center">0.54&#x2a;</td>
<td align="center">0.53&#x2a;</td>
<td align="center">0.5&#x2a;</td>
<td align="center">0.52&#x2a;</td>
</tr>
<tr>
<td align="left">
<italic>Saccharomyces cerevisiae</italic>
</td>
<td align="center">0.50&#x2a;</td>
<td align="center">0.49&#x2a;</td>
<td align="center">0.56&#x2a;</td>
<td align="center">0.49&#x2a;</td>
</tr>
<tr>
<td align="left">
<italic>Schizosaccharomyces pombe</italic>
</td>
<td align="center">0.61&#x2a;</td>
<td align="center">0.54&#x2a;</td>
<td align="center">0.62&#x2a;</td>
<td align="center">0.53&#x2a;</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>&#x2a; represents <italic>p</italic> value &#x3c;0.001.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3-5">
<title>3.5 The absolute adaptiveness values generated by the gtAI reflect the evolutionary proximity</title>
<p>The absolute adaptiveness (<italic>W</italic>
<sub>
<italic>i</italic>
</sub>) values of a codon depend on both the efficacy of codon&#x2013;anticodon interaction (<italic>S</italic>
<sub>
<italic>ij</italic>
</sub> values) and the abundance of tRNA available for that codon. The number of tRNA genes and their abundance are diverse among the three domains of life (<xref ref-type="bibr" rid="B11">Fujishima and Kanai, 2014</xref>). Therefore, in theory, <italic>W</italic>
<sub>
<italic>i</italic>
</sub> should explain the divergence of organisms from different domains. To examine whether the <italic>W</italic>
<sub>
<italic>i</italic>
</sub> calculated using <italic>S</italic>
<sub>
<italic>ij</italic>
</sub> values generated by the gtAI are biologically meaningful, a hierarchical clustering on principal component (HCPC) analysis of <italic>W</italic>
<sub>
<italic>i</italic>
</sub> values was performed. The clustering classified all 12 model organisms used in the study into three distinct groups (<xref ref-type="fig" rid="F2">Figure 2</xref>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Hierarchical clustering on principal component (HCPC) analysis based on the absolute adaptiveness values (<italic>W</italic>
<sub>
<italic>i</italic>
</sub>) of the 12 model organisms. The <italic>x</italic>-axis and <italic>y</italic>-axis represent the first and second principal components (Dim1 and Dim2), respectively (the clustering was performed using Ward&#x2019;s method).</p>
</caption>
<graphic xlink:href="fmolb-10-1218518-g002.tif"/>
</fig>
</sec>
<sec id="s3-6">
<title>3.6 The effect of population size on gtAI result reproducibility</title>
<p>The inter-variability resulting from changing the population size in the three organisms was extremely low. For <italic>S. cerevisiae</italic>, <italic>E. coli</italic>, and <italic>H. mukohataei</italic>, the average best solution in the five experiments (the optimization of the non-parametric Spearman correlation between RSCU and <italic>W</italic>
<sub>
<italic>i</italic>
</sub> values) ranged from 0.8109 to 0.8134 (SD &#x3d; from 0.0016 to 0.0031), from 0.6017 to 0.6022 (SD &#x3d; from 0.001 to 0.0023), and from 0.4287 to 0.4308 (SD &#x3d; from 0 to 0.0067), respectively. Then, the coefficient of variation (CV) for the same population size (10, 20, 30, n &#x2b; 10, &#x2026; , 100) was computed from the results of the five performed experiments for each organism. <italic>S. cerevisiae</italic> showed CV ranging from 0.00057 to 0.0042, <italic>E. coli</italic> ranging from 0.0012 to 0.0036, and <italic>H. mukohataei</italic> ranging from 0 to 0.0223. The coefficient of variation (CV) shows the extent of variability to the population&#x2019;s mean. Therefore, as the variability decreases, the CV approaches zero. The CV values for the tested genomes showed an extremely low intra-variability, approximately equal to zero CV. Therefore, the inter-variability and intra-variability resulting from choosing different population sizes in the gtAI algorithm will not influence the reproducibility of the results. However, we recommend choosing a constant population size for the whole analysis.</p>
</sec>
<sec id="s3-7">
<title>3.7 The effect of generation time on gtAI result reproducibility</title>
<p>At generation time 100 and higher, the solution was constant or had shallow changes (<xref ref-type="fig" rid="F3">Figure 3</xref>). Therefore, we recommend using a generation time of 100 or higher, but a constant generation time must be selected for the whole analysis.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Impact of selecting different generation times for the genetic algorithm on gtAI results. The figure shows the best solutions after applying 1,000 generations in <italic>Saccharomyces cerevisiae</italic> (green line), <italic>Escherichia coli</italic> (blue line), and <italic>Halomicrobium mukohataei</italic> (red line). The <italic>y</italic>-axis represents the best solutions for optimizing the non-parametric (Spearman) correlation between RSCU (of the reference set) and <italic>W</italic>
<sub>
<italic>i</italic>
</sub> values at each generation. The <italic>x</italic>-axis represents the generation number&#x2014;the vertical black line represents the generation time number 100 (the default generation time in the gtAI package).</p>
</caption>
<graphic xlink:href="fmolb-10-1218518-g003.tif"/>
</fig>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>4 Discussion</title>
<p>The tAI is a formal measure of the force of translational selection. It has been widely employed to investigate fundamental questions related to gene expression, molecular evolution, and virus&#x2013;host adaptation (<xref ref-type="bibr" rid="B33">Sharp et al., 2005</xref>; <xref ref-type="bibr" rid="B24">Man and Pilpel, 2007</xref>; <xref ref-type="bibr" rid="B14">Goodman et al., 2013</xref>; <xref ref-type="bibr" rid="B26">Pechmann and Frydman, 2013</xref>; <xref ref-type="bibr" rid="B31">Sabi and Tuller, 2014</xref>). Due to its importance, we were motivated to improve its performance by solving the issues of previous methods used for its calculation. We evaluated our proposed method mainly by examining whether it correlates better with other well-established codon usage indices. In addition, it shows a better association with empirical PA data.</p>
<p>The CAI is a gene-specific CUB index (<xref ref-type="bibr" rid="B34">Sharp and Li, 1987</xref>). Many studies suggested that the CAI is a good predictor of gene expression at mRNA and protein levels and has been used in many studies as a reference index to compare new indices and methods (<xref ref-type="bibr" rid="B2">Carbone et al., 2003</xref>; <xref ref-type="bibr" rid="B36">Sun et al., 2013</xref>; <xref ref-type="bibr" rid="B9">Fu et al., 2020</xref>). Accordingly, we conducted a comparative analysis on 12 model organisms (<xref ref-type="sec" rid="s10">Supplementary Table S1</xref>) to evaluate the performance of the gtAI method compared to the original tAI and stAI (<xref ref-type="sec" rid="s10">Supplementary Tables S2&#x2013;S5</xref>) by examining whether it correlates better with the CAI using Spearman&#x2019;s rank correlation analysis. The gtAI managed to outperform both methods by exhibiting a stronger significant correlation in 9 out of 12 model organisms with the CAI.</p>
<p>Attempting to explain the reason behind the better association of the stAI with the CAI than the gtAI in <italic>F. placidus</italic>, <italic>V. cholera</italic>, and <italic>C. elegans</italic> revealed a notable conclusion. These three organisms showed the highest ENc (low CUB) average value within their domains. For example, the average ENc value of the reference set for <italic>F. placidus</italic> was 44.8, while in <italic>H. mukohataei</italic> and <italic>M. jannaschii</italic>, the ENc values were 34.25 and 37.91, respectively. The same trend was observed in the bacterial group, as the average ENc value for <italic>V. cholera</italic> was 41.83, 39.08 for <italic>E. coli</italic>, and 37.08 for <italic>N. meningitidis</italic>. For non-fungal eukaryotes, <italic>C. elegans</italic> exhibited a 42.02 average ENc value, while 38.69 for <italic>D. melanogaster</italic>. This shows one limitation in our approach which can be attributed to organisms with overall weak CUB. It slightly influenced the result of the gtAI leading to the better correlation of stAI in these organisms. Furthermore, insights into the relation between GC content and gtAI performance have revealed that the change in GC content could not explain the slight outperformance of the stAI over the gtAI in terms of correlation with the CAI except in archaeal genomes. To embark on, in non-fungal eukaryotes, though <italic>C. elegans</italic> has an average GC content of 35.4%, indicating a possible strong bias against GC-rich codons, it showed an overall relatively weak bias (ENc &#x3d; 42.02) which resulted in the slight underperformance of gtAI compared to stAI. Additionally, though <italic>D. melanogaster</italic> has a relatively higher GC content of 42.0% (closer to 50%), indicating relatively weaker bias, it showed an overall relatively stronger bias (ENc &#x3d; 38.69) than <italic>C. elegans</italic>. Meanwhile, in Archaea, it is notable that the change in their overall bias is consistent with their deviation from the 50% GC content. In other words, the archaeal genome that showed an underperformance of gtAI (<italic>F. placidus</italic>) has an average GC content of 44.1% which is the closest to 50% compared to the other two archaeal genomes of 31.4% and 65.5%, as well as the highest ENc value of 44.8 compared to the other two of 34.25 and 37.91. Without regard to this limitation, the results (<xref ref-type="table" rid="T1">Table 1</xref>) suggest that the gtAI method performance was better for <italic>S</italic>
<sub>
<italic>ij</italic>
</sub> value optimization, giving a more reliable tAI value that better demonstrates the effect of translational selection.</p>
<p>One can argue that the correlation between the gtAI and CAI might be inflated due to using the same reference set of genes. Hence, we conducted two more analyses to test it. In the first one, we obtained multiple random samples of CAI and the three tAI indices&#x2019; values and performed Spearman&#x2019;s rank correlation analysis between each of them with the CAI. The results remained the same and agreed with our conclusion as the gtAI showed a stronger correlation with the CAI than the stAI in the same nine organisms. In addition, a higher correlation with the CAI than the original tAI in the four fungal organisms was shown. In the second analysis, we investigated whether gtAI correlates better with another CUB index that is independent of using a reference set of genes in its calculation, namely, SCUO (to exclude the reference set parameter). The results also revealed a stronger gtAI association with SCUO, further confirming that the correlation between the gtAI and CAI is not inflated nor affected by the reference set of genes.</p>
<p>In conclusion, our gtAI method can solve the query of <italic>S</italic>
<sub>
<italic>ij</italic>
</sub> value optimization and effectively estimate the tAI values while overcoming the limitations observed in other implementations. Performance evaluation showed that the gtAI method performed better than the original tAI and stAI by exhibiting a stronger correlation with the CAI and SCUO. It has also improved the prediction of PA compared to the stAI and CAI. The reproducibility of the genetic algorithm employed by the gtAI was tested and revealed its reliability in reaching the best solution in complex optimization problems. The <italic>Wi</italic> values generated by the gtAI correctly reflect the evolutionary proximity between organisms from different domains of life. Indeed, one significant advantage of CUB-dependent tAI computation methods (i.e., gtAI and stAI) over the original tAI is the lack of neediness for external information such as gene expression data or mRNA levels (which are often unavailable for most genomes). We believe that the gtAI will allow for obtaining higher quality tAI results used to draw conclusions about the force of translational selection acting on genes in related studies.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s10">Supplementary Material</xref>; further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>AA: conceptualization, methodology, software, formal analysis, and visualization. SK: validation, investigation, writing&#x2014;original draft. EA: writing&#x2014;review and editing. AO: data curation. SE: data curation. AT: investigation. MS: investigation. MS: project administration, writing&#x2014;review and editing. All authors contributed to the article and approved the submitted version.</p>
</sec>
<sec id="s7">
<title>Funding</title>
<p>This work was supported by the Egyptian Cancer Network (ECN), United States of America, and the Children&#x2019;s Cancer Hospital, Egypt 57357.</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors, and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fmolb.2023.1218518/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fmolb.2023.1218518/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Table2.XLSX" id="SM1" mimetype="application/XLSX" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table3.XLSX" id="SM2" mimetype="application/XLSX" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image4.PNG" id="SM3" mimetype="application/PNG" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table4.XLSX" id="SM4" mimetype="application/XLSX" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image2.PNG" id="SM5" mimetype="application/PNG" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table1.XLSX" id="SM6" mimetype="application/XLSX" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image1.PNG" id="SM7" mimetype="application/PNG" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table5.XLSX" id="SM8" mimetype="application/XLSX" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image3.PNG" id="SM9" mimetype="application/PNG" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brandis</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Hughes</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>The selective advantage of synonymous codon usage bias in Salmonella</article-title>. <source>PLoS Genet.</source> <volume>12</volume>, <fpage>1005926</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pgen.1005926</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Carbone</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Zinovyev</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>K&#xe9;p&#xe8;s</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>Codon adaptation index as a measure of dominating codon bias</article-title>. <source>Bioinformatics</source> <volume>19</volume>, <fpage>2005</fpage>&#x2013;<lpage>2015</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btg272</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chamary</surname>
<given-names>J. V.</given-names>
</name>
<name>
<surname>Parmley</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Hurst</surname>
<given-names>L. D.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Hearing silence: Non-neutral evolution at synonymous sites in mammals</article-title>. <source>Nat. Rev. Genet.</source> <volume>7</volume>, <fpage>98</fpage>&#x2013;<lpage>108</lpage>. <pub-id pub-id-type="doi">10.1038/nrg1770</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chan</surname>
<given-names>P. P.</given-names>
</name>
<name>
<surname>Lowe</surname>
<given-names>T. M.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>GtRNAdb: A database of transfer RNA genes detected in genomic sequence</article-title>. <source>Nucleic Acids Res.</source> <volume>37</volume>, <fpage>93</fpage>&#x2013;<lpage>97</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkn787</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Crick</surname>
<given-names>F. H. C.</given-names>
</name>
</person-group> (<year>1966</year>). <article-title>Codon&#x2014;Anticodon pairing: The wobble hypothesis</article-title>. <source>J. Mol. Biol.</source> <volume>19</volume>, <fpage>548</fpage>&#x2013;<lpage>555</lpage>. <pub-id pub-id-type="doi">10.1016/S0022-2836(66)80022-0</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dana</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Tuller</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>The effect of tRNA levels on decoding times of mRNA codons</article-title>. <source>Nucleic Acids Res.</source> <volume>42</volume>, <fpage>9171</fpage>&#x2013;<lpage>9181</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gku646</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>dos Reis</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Savva</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Wernisch</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Solving the riddle of codon usage preferences: A test for translational selection</article-title>. <source>Nucleic Acids Res.</source> <volume>32</volume>, <fpage>5036</fpage>&#x2013;<lpage>5044</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkh834</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Duret</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>tRNA gene number and codon usage in the <italic>C. elegans</italic> genome are co-adapted for optimal translation of highly expressed genes</article-title>. <source>Trends Genet.</source> <volume>16</volume>, <fpage>287</fpage>&#x2013;<lpage>289</lpage>. <pub-id pub-id-type="doi">10.1016/S0168-9525(00)02041-2</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Codon optimization with deep learning to enhance protein expression</article-title>. <source>Sci. Rep.</source> <volume>10</volume>, <fpage>17617</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-020-74091-z</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fuglsang</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>Codon optimizer: A freeware tool for codon optimization</article-title>. <source>Protein Expr. Purif.</source> <volume>31</volume>, <fpage>247</fpage>&#x2013;<lpage>249</lpage>. <pub-id pub-id-type="doi">10.1016/s1046-5928(03)00213-4</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fujishima</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Kanai</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>tRNA gene diversity in the three domains of life</article-title>. <source>Front. Genet.</source> <volume>5</volume>, <fpage>142</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2014.00142</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ghaemmaghami</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Huh</surname>
<given-names>W. K.</given-names>
</name>
<name>
<surname>Bower</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Howson</surname>
<given-names>R. W.</given-names>
</name>
<name>
<surname>Belle</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Dephoure</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2003</year>). <article-title>Global analysis of protein expression in yeast</article-title>. <source>Nature</source> <volume>425</volume>, <fpage>737</fpage>&#x2013;<lpage>741</lpage>. <pub-id pub-id-type="doi">10.1038/nature02046</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Goetz</surname>
<given-names>R. M.</given-names>
</name>
<name>
<surname>Fuglsang</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Correlation of codon bias measures with mRNA levels: Analysis of transcriptome data from <italic>Escherichia coli</italic>
</article-title>. <source>Biochem. Biophys. Res. Commun.</source> <volume>327</volume>, <fpage>4</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1016/j.bbrc.2004.11.134</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Goodman</surname>
<given-names>D. B.</given-names>
</name>
<name>
<surname>Church</surname>
<given-names>G. M.</given-names>
</name>
<name>
<surname>Kosuri</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Causes and effects of N-terminal codon bias in bacterial genes</article-title>. <source>Science</source> <volume>342</volume>, <fpage>475</fpage>&#x2013;<lpage>479</lpage>. <pub-id pub-id-type="doi">10.1126/science.1241934</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gould</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Hendy</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Papamichail</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Computational tools and algorithms for designing customized synthetic genes</article-title>. <source>Front. Bioeng. Biotechnol.</source> <volume>2</volume>, <fpage>41</fpage>. <pub-id pub-id-type="doi">10.3389/fbioe.2014.00041</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Grantham</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Gautier</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Gouy</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Moleculaire</surname>
<given-names>E. E.</given-names>
</name>
<name>
<surname>Biome</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>De</surname>
<given-names>I. U. L.</given-names>
</name>
<etal/>
</person-group> (<year>1980</year>). <article-title>Codon catalog usage and the genome hypothesis</article-title>. <source>Nucleic Acids Res.</source> <volume>8</volume>, <fpage>197</fpage>. <pub-id pub-id-type="doi">10.1093/nar/8.1.197-c</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gustafsson</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Govindarajan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Minshull</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Codon bias and heterologous protein expression</article-title>. <source>Trends Biotechnol.</source> <volume>22</volume>, <fpage>346</fpage>&#x2013;<lpage>353</lpage>. <pub-id pub-id-type="doi">10.1016/j.tibtech.2004.04.006</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Han</surname>
<given-names>J. H.</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>Y. S.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>W. J.</given-names>
</name>
<name>
<surname>Jeon</surname>
<given-names>Y. H.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>S. K.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>B. J.</given-names>
</name>
<etal/>
</person-group> (<year>2010</year>). <article-title>Codon optimization enhances protein expression of human peptide deformylase in <italic>E. coli</italic>
</article-title>. <source>Protein Expr. Purif.</source> <volume>70</volume>, <fpage>224</fpage>&#x2013;<lpage>230</lpage>. <pub-id pub-id-type="doi">10.1016/j.pep.2009.10.005</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>Y. E.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Codon pair optimization (CPO): A software tool for synthetic gene design based on codon pair bias to improve the expression of recombinant proteins in Pichia pastoris</article-title>. <source>Microb. Cell. Fact.</source> <volume>20</volume>, <fpage>209</fpage>&#x2013;<lpage>210</lpage>. <pub-id pub-id-type="doi">10.1186/s12934-021-01696-y</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ikemura</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>1981</year>). <article-title>Correlation between the abundance of <italic>Escherichia coli</italic> transfer RNAs and the occurrence of the respective codons in its protein genes: A proposal for a synonymous codon choice that is optimal for the <italic>E. coli</italic> translational system</article-title>. <source>J. Mol. Biol.</source> <volume>151</volume>, <fpage>389</fpage>&#x2013;<lpage>409</lpage>. <pub-id pub-id-type="doi">10.1016/0022-2836(81)90003-6</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Karlin</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Mr&#xe1;zek</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Campbell</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kaiser</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Characterizations of highly expressed genes of four fast-growing bacteria</article-title>. <source>J. Bacteriol.</source> <volume>183</volume>, <fpage>5025</fpage>&#x2013;<lpage>5040</lpage>. <pub-id pub-id-type="doi">10.1128/JB.183.17.5025-5040.2001</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Python implementation of codon adaptation index</article-title>. <source>J. Open Source Softw.</source> <volume>3</volume>, <fpage>905</fpage>. <pub-id pub-id-type="doi">10.21105/joss.00905</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Kong</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Codon optimization significantly enhanced the expression of human 37-kDa iLRP in <italic>Escherichia coli</italic>
</article-title>. <source>3 Biotech.</source> <volume>8</volume>, <fpage>210</fpage>. <pub-id pub-id-type="doi">10.1007/s13205-018-1234-y</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Man</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Pilpel</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Differential translation efficiency of orthologous genes is involved in phenotypic divergence of yeast species</article-title>. <source>Nat. Genet.</source> <volume>39</volume>, <fpage>415</fpage>&#x2013;<lpage>421</lpage>. <pub-id pub-id-type="doi">10.1038/ng1967</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Oymondal</surname>
<given-names>U. R.</given-names>
</name>
<name>
<surname>As</surname>
<given-names>S. D.</given-names>
</name>
<name>
<surname>Ahoo</surname>
<given-names>S. S.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Predicting gene expression level from relative codon usage bias: An application to <italic>Escherichia coli</italic> genome</article-title>. <fpage>13</fpage>&#x2013;<lpage>30</lpage>.</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pechmann</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Frydman</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Evolutionary conservation of codon optimality reveals hidden signatures of cotranslational folding</article-title>. <source>Nat. Struct. Mol. Biol.</source> <volume>20</volume>, <fpage>237</fpage>&#x2013;<lpage>243</lpage>. <pub-id pub-id-type="doi">10.1038/nsmb.2466</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Plotkin</surname>
<given-names>J. B.</given-names>
</name>
<name>
<surname>Kudla</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Synonymous but not the same: The causes and consequences of codon bias</article-title>. <source>Nat. Rev. Genet.</source> <volume>12</volume>, <fpage>32</fpage>&#x2013;<lpage>42</lpage>. <pub-id pub-id-type="doi">10.1038/nrg2899</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Quax</surname>
<given-names>T. E. F.</given-names>
</name>
<name>
<surname>Claassens</surname>
<given-names>N. J.</given-names>
</name>
<name>
<surname>S&#xf6;ll</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>van der Oost</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Codon bias as a means to fine-tune gene expression</article-title>. <source>Mol. Cell.</source> <volume>59</volume>, <fpage>149</fpage>&#x2013;<lpage>161</lpage>. <pub-id pub-id-type="doi">10.1016/j.molcel.2015.05.035</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Raguin</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Stansfield</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Romano</surname>
<given-names>M. C.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>ExpressInHost: A codon tuning tool for the expression of recombinant proteins in host microorganisms</article-title>. <source>J. Open Res. Softw.</source> <volume>11</volume>, <fpage>385</fpage>. <pub-id pub-id-type="doi">10.5334/jors.385</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Russell</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Norvig</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2010</year>). <source>Artificial intelligence: A modern approach</source>. <edition>Third</edition>. <publisher-loc>Upper Saddle River, NJ</publisher-loc>: <publisher-name>Prentice Hall</publisher-name>.</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sabi</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Tuller</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Modelling the efficiency of codon-tRNA interactions based on codon usage bias</article-title>. <source>DNA Res.</source> <volume>21</volume>, <fpage>511</fpage>&#x2013;<lpage>525</lpage>. <pub-id pub-id-type="doi">10.1093/dnares/dsu017</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Satya</surname>
<given-names>R. V.</given-names>
</name>
<name>
<surname>Mukherjee</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ranga</surname>
<given-names>U.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>A pattern matching algorithm for codon optimization and CpG motif-engineering in DNA expression vectors</article-title>. <source>Comput. Syst. Bioinforma.</source> <volume>2</volume>, <fpage>294</fpage>&#x2013;<lpage>305</lpage>. <pub-id pub-id-type="doi">10.1109/CSB.2003.1227330</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sharp</surname>
<given-names>P. M.</given-names>
</name>
<name>
<surname>Bailes</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Grocock</surname>
<given-names>R. J.</given-names>
</name>
<name>
<surname>Peden</surname>
<given-names>J. F.</given-names>
</name>
<name>
<surname>Sockett</surname>
<given-names>R. E.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Variation in the strength of selected codon usage bias among bacteria</article-title>. <source>Nucleic Acids Res.</source> <volume>33</volume>, <fpage>1141</fpage>&#x2013;<lpage>1153</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gki242</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sharp</surname>
<given-names>P. M.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>W. H.</given-names>
</name>
</person-group> (<year>1987</year>). <article-title>The codon adaptation index-a measure of directional synonymous codon usage bias, and its potential applications</article-title>. <source>Nucleic Acids Res.</source> <volume>15</volume>, <fpage>1281</fpage>&#x2013;<lpage>1295</lpage>. <pub-id pub-id-type="doi">10.1093/nar/15.3.1281</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sharp</surname>
<given-names>P. M.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>W. H.</given-names>
</name>
</person-group> (<year>1986</year>). <article-title>An evolutionary perspective on synonymous codon usage in unicellular organisms</article-title>. <source>J. Mol. Evol.</source> <volume>24</volume>, <fpage>28</fpage>&#x2013;<lpage>38</lpage>. <pub-id pub-id-type="doi">10.1007/BF02099948</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Xia</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>An improved implementation of effective number of codons (Nc)</article-title>. <source>Mol. Biol. Evol.</source> <volume>30</volume>, <fpage>191</fpage>&#x2013;<lpage>196</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/mss201</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Thengade</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Dondal</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2012</year>). &#x201c;<article-title>Genetic algorithm &#x2013; survey paper</article-title>,&#x201d; in <conf-name>MPGI National Multi Conference International Journal of Computer Applications</conf-name>, <conf-date>2012</conf-date>, <fpage>975</fpage>&#x2013;<lpage>8887</lpage>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.ijcaonline.org/proceedings/ncrtc/number5/6549-1039">https://www.ijcaonline.org/proceedings/ncrtc/number5/6549-1039</ext-link>
</comment>.</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wan</surname>
<given-names>X. F.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Kleinhofs</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Quantitative relationship between synonymous codon usage bias and GC composition across unicellular genomes</article-title>. <source>BMC Evol. Biol.</source> <volume>4</volume>, <fpage>19</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2148-4-19</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Herrmann</surname>
<given-names>C. J.</given-names>
</name>
<name>
<surname>Simonovic</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Szklarczyk</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>von Mering</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Version 4.0 of PaxDb: Protein abundance data, integrated across model organisms, tissues, and cell-lines</article-title>. <source>Proteomics</source> <volume>15</volume>, <fpage>3163</fpage>&#x2013;<lpage>3168</lpage>. <pub-id pub-id-type="doi">10.1002/pmic.201400441</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Watts</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sankaranarayanan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Watts</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Raipuria</surname>
<given-names>R. K.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Optimizing protein expression in heterologous system: Strategies and tools</article-title>. <source>Meta Gene</source> <volume>29</volume>, <fpage>100899</fpage>. <pub-id pub-id-type="doi">10.1016/j.mgene.2021.100899</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wright</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>1990</year>). <article-title>The &#x201c;effective number of codons&#x201d; used in a gene</article-title>. <source>Gene</source> <volume>87</volume>, <fpage>23</fpage>&#x2013;<lpage>29</lpage>. <pub-id pub-id-type="doi">10.1016/0378-1119(90)90491-9</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>X. S.</given-names>
</name>
</person-group> (<year>2014</year>). &#x201c;<article-title>Nature-inspired optimization algorithms</article-title>,&#x201d; in <source>Nature-inspired optimization algorithms</source> (<publisher-loc>Oxford</publisher-loc>: <publisher-name>Elsevier</publisher-name>). <pub-id pub-id-type="doi">10.1016/B978-0-12-416743-8.00017-8</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>