<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Microbiol.</journal-id>
<journal-title>Frontiers in Microbiology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Microbiol.</abbrev-journal-title>
<issn pub-type="epub">1664-302X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmicb.2025.1512923</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Microbiology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Gene age and genome organization in <italic>Escherichia coli</italic> and <italic>Bacillus subtilis</italic></article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Jers</surname> <given-names>Carsten</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/177035/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Mi&#x00161;eti&#x00107;</surname> <given-names>Hrvoje</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Ravikumar</surname> <given-names>Vaishnavi</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/177185/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Garg</surname> <given-names>Abhroop</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1333521/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Franjevi&#x00107;</surname> <given-names>Damjan</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/174817/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Domazet-Lo&#x00161;o</surname> <given-names>Tomislav</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2072749/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Mijakovic</surname> <given-names>Ivan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/124332/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Novo Nordisk Foundation Center for Biosustainability, Technical University of Denmark</institution>, <addr-line>Kongens Lyngby</addr-line>, <country>Denmark</country></aff>
<aff id="aff2"><sup>2</sup><institution>Laboratory Evolutionary Genetics, Division of Molecular Biology, Ruder Bo&#x00161;kovi&#x00107; Institute</institution>, <addr-line>Zagreb</addr-line>, <country>Croatia</country></aff>
<aff id="aff3"><sup>3</sup><institution>Division of Biology, University of Zagreb</institution>, <addr-line>Zagreb</addr-line>, <country>Croatia</country></aff>
<aff id="aff4"><sup>4</sup><institution>School of Medicine, Catholic University of Croatia</institution>, <addr-line>Zagreb</addr-line>, <country>Croatia</country></aff>
<aff id="aff5"><sup>5</sup><institution>Systems and Synthetic Biology Division, Department of Life Sciences, Chalmers University of Technology</institution>, <addr-line>Gothenburg</addr-line>, <country>Sweden</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Edgardo Jofre, National University of R&#x000ED;o Cuarto, Argentina</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Cesar Diaz Perez, University of Guanajuato, Mexico</p>
<p>Gregory Marczynski, McGill University, Canada</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Ivan Mijakovic <email>ivan.mijakovic&#x00040;chalmers.se</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>18</day>
<month>06</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>16</volume>
<elocation-id>1512923</elocation-id>
<history>
<date date-type="received">
<day>17</day>
<month>10</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>27</day>
<month>05</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2025 Jers, Mi&#x00161;eti&#x00107;, Ravikumar, Garg, Franjevi&#x00107;, Domazet-Lo&#x00161;o and Mijakovic.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Jers, Mi&#x00161;eti&#x00107;, Ravikumar, Garg, Franjevi&#x00107;, Domazet-Lo&#x00161;o and Mijakovic</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>Using genomic phylostratigraphy, we examined the organization of <italic>Escherichia coli</italic> and <italic>Bacillus subtilis</italic> genomes from the perspective of evolutionary age of their genes. Phylostratigraphy analysis classifies individual genes into age-related bins, called phylostrata. Based on this analysis, several common features emerged in the genomes of the two model bacteria. More recent genes tend to be shorter and are expressed less frequently, or only in specific conditions. In terms of genomic location, new genes are enriched in areas containing prophages, suggesting a link with horizontal gene transfer. Interestingly, while most bacterial transcription regulators belong to the oldest phylostrata, they regulate expression of both older and more recent genes alike. A large fraction of bacterial operons contains genes from different phylostrata. This suggests that newer genes are integrated in the existing framework for regulating gene expression, and that the establishment of new regulatory circuits typically do not accompany acquisition of new genes. One striking difference between <italic>E. coli</italic> and <italic>B. subtilis</italic> genomes was observed. About 87.0% of all <italic>E. coli</italic> genes belong to the evolutionary oldest physlostratum. In <italic>B. subtilis</italic>, this number is only 71.8%, indicating a more eventful evolutionary past in terms of acquisition of new genes, either by gene emergence or by horizontal transfer.</p></abstract>
<kwd-group>
<kwd>genomic phylostratigraphy</kwd>
<kwd>horizontal gene transfer</kwd>
<kwd>operon structure</kwd>
<kwd>prophages</kwd>
<kwd>transcription regulators</kwd>
</kwd-group>
<contract-sponsor id="cn001">Novo Nordisk Fonden<named-content content-type="fundref-id">10.13039/501100009708</named-content></contract-sponsor>
<contract-sponsor id="cn002">Danmarks Frie Forskningsfond<named-content content-type="fundref-id">10.13039/501100011958</named-content></contract-sponsor>
<counts>
<fig-count count="5"/>
<table-count count="2"/>
<equation-count count="0"/>
<ref-count count="59"/>
<page-count count="13"/>
<word-count count="8352"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Systems Microbiology</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Understanding the origins and evolution of genes remains one of the central challenges in evolutionary biology. A fundamental question is how new genetic material arises, becomes integrated into existing cellular networks, and contributes to cell fitness and diversity. While ancient genes underpin essential cellular functions, new genes can introduce novel traits and adaptive advantages. Exploring the mechanisms and dynamics of gene emergence and loss is therefore key to understanding the evolution of genomes over time. The oldest genes present in the extant genomes are the ones there were also present in the last universal common ancestor (LUCA; Mushegian, <xref ref-type="bibr" rid="B41">2008</xref>). Newer genes are the ones that emerged later. Besides a variety of duplication-based mechanisms (Tautz and Domazet-Lo&#x00161;o, <xref ref-type="bibr" rid="B51">2011</xref>), new genes could emerge from non-coding DNA sequences through random mutations (Neme and Tautz, <xref ref-type="bibr" rid="B42">2014</xref>). While it has been proposed that this type of gene emergence is common in all species, the frequency of this phenomenon probably varies a lot, and is difficult to assess quantitatively (Light et al., <xref ref-type="bibr" rid="B35">2014</xref>). Recently, Iyengar and Bornberg-Bauer (<xref ref-type="bibr" rid="B25">2023</xref>) developed a mathematical model that suggests that genes are lost much more rapidly than they emerge, and that new genes preferentially arise in transcribed regions. This concept is well-illustrated by a recent study of human proto genes (Grandchamp et al., <xref ref-type="bibr" rid="B19">2022</xref>), suggesting that new genes tend to &#x0201C;capture&#x0201D; regulatory sequences in their vicinity, such as introns, promoter motifs, enhancers. Proto genes that survive the initial selection tend to grow by gaining new functional domains. While comparative genomics has provided strong evidence for the phenomenon of <italic>de novo</italic> gene emergence in various organisms, experimental validation of gene emergence is obviously challenging. To experimentally illustrate the selection process following &#x0201C;gene birth,&#x0201D; Knopp et al. (<xref ref-type="bibr" rid="B27">2019</xref>) constructed plasmid libraries with short randomly generated open reading frames (ORFs) and expressed them in <italic>Escherichia coli</italic>. They demonstrated that short peptides offering a fitness benefit during exposure to aminoglycoside antibiotics could be effectively selected by bacterial cells.</p>
<p>Genomic phylostratigraphy is a computational method for studying genome evolution based on estimating evolutionary age of individual genes (Domazet-Lo&#x00161;o et al., <xref ref-type="bibr" rid="B11">2007</xref>). For any species to be analyzed, the first step is to create a consensus phylogeny tree. In this species-centered tree, each node is named a phylostratum (PS). Each PS is then populated with individual genes whose founders emerged at that specific node. From the whole genome perspective, this means that all genes get distributed in phylostrata corresponding to their evolutionary age. In the early evolutionary history, the first few PS correspond to deep, shared ancestry (e.g., LUCA, Bacteria) and are therefore common across different bacterial species such as <italic>E. coli</italic> and <italic>B. subtilis</italic>. Subsequent PS become increasingly species-specific as bacterial lineages diverge. Phylostratigraphy has proven to be a powerful method for explaining macro-evolutionary phenomena. Certain complex functions that emerged at some point in time tend to involve large sets of genes, which then cluster to a specific PS. Hence, phylostratigraphy was used to demonstrate that cancer-related genes could be coupled to the emergence of multicellularity (Domazet-Lo&#x00161;o and Tautz, <xref ref-type="bibr" rid="B14">2010a</xref>), and that the expression of genes during ontogenic development of metazoa follows an age-specific pattern (Domazet-Lo&#x00161;o and Tautz, <xref ref-type="bibr" rid="B14">2010a</xref>). The latter study has been particularly noted for providing direct evidence for the hourglass theory of development (Casci, <xref ref-type="bibr" rid="B7">2011</xref>). There has been some debate about the reliability of sequence similarity search algorithms underlying phylostratigraphic approach (Moyers and Zhang, <xref ref-type="bibr" rid="B40">2015</xref>, <xref ref-type="bibr" rid="B39">2016</xref>; Domazet-Lo&#x00161;o et al., <xref ref-type="bibr" rid="B13">2017</xref>, <xref ref-type="bibr" rid="B10">2024</xref>). However, the statistical mapping of well-studied functional data on phylogenies repeatedly demonstrates that classical sequence similarity searches accurately recover macroevolutionary information (Domazet-Lo&#x00161;o et al., <xref ref-type="bibr" rid="B13">2017</xref>, <xref ref-type="bibr" rid="B10">2024</xref>; Xia et al., <xref ref-type="bibr" rid="B57">2025</xref>).</p>
<p>It is known that bacterial genomes undergo extensive horizontal gene transfer (Arnold et al., <xref ref-type="bibr" rid="B1">2022</xref>). This evolutionary process is particularly prominent in mixed bacterial communities (Brito, <xref ref-type="bibr" rid="B4">2021</xref>). Mobile genetic elements, such as plasmids and phages, have been recognized as the main driver of horizontal gene transfer (Lang et al., <xref ref-type="bibr" rid="B32">2017</xref>). Since phylostratigraphy was initially developed for metazoa, i.e., species with predominantly vertical mode of evolution, it was not initially clear how well the method would perform in analyzing bacterial genome evolution. However, it turned out to work remarkably well. Using genomic phylostratigraphy, Futo et al. (<xref ref-type="bibr" rid="B18">2021</xref>) demonstrated that the development of <italic>Bacillus subtilis</italic> biofilms recapitulates phylogeny at the expression level. This finding suggested that the ontogeny of bacterial biofilms is a developmental process similar to that of e.g., metazoans or plants (Koska et al., <xref ref-type="bibr" rid="B28">2024</xref>). Similarly, phylostratigraphy has been used to characterize the development of different morphotypes of <italic>Borreliella</italic> (syn. Borrelia) <italic>burgdorferi</italic> (Corak et al., <xref ref-type="bibr" rid="B8">2023</xref>). Based on the assumption that bacterial sporulation is also a true developmental phenomenon, phylostratigraphy has been successfully used to predict new sporulation genes, among uncharacterized genes in sporulation-related phylostrata (Shi et al., <xref ref-type="bibr" rid="B49">2020</xref>). This validated the earlier proposition that phylostratigraphy could be a useful tool in genome mining pipelines, as previously suggested (Mijakovic, <xref ref-type="bibr" rid="B38">2020</xref>).</p>
<p>Bacterial genomes are large circular DNA molecules, in which the origin of replication (and terminus of replication, at 180&#x000B0; with respect to origin) plays a major organizational role (Duigou and Boccard, <xref ref-type="bibr" rid="B16">2017</xref>). Each chromosome arm between the origin of replication and replication terminus consists of two regions, a non-structured region and a so-called macrodomain (Valens et al., <xref ref-type="bibr" rid="B53">2004</xref>). The &#x0201C;Right&#x0201D; and &#x0201C;Left&#x0201D; macrodomains are defined by their incapacity to interact with each other, a phenomenon linked to high level structural organization of the bacterial genome. The position of genes with respect to the bacterial origin of replication has a deep significance. Genes near the replication origin tend to be more highly expressed (Ying et al., <xref ref-type="bibr" rid="B58">2014</xref>; Kosmidis et al., <xref ref-type="bibr" rid="B29">2020</xref>; Lato and Golding, <xref ref-type="bibr" rid="B33">2020</xref>). Gene essentiality is also high near the origin of replication (Kosmidis et al., <xref ref-type="bibr" rid="B29">2020</xref>; Lato and Golding, <xref ref-type="bibr" rid="B33">2020</xref>). Genes farther away from the origin are more prone to molecular changes, such as substitutions, recombination events and genomic rearrangements (Lato and Golding, <xref ref-type="bibr" rid="B34">2021</xref>). In <italic>B. subtilis</italic>, during asymmetric division leading up to spore formation, chromosomal location governs the timing of expression of sporulation genes (Zupancic et al., <xref ref-type="bibr" rid="B60">2001</xref>).</p>
<p>In this report, we used genomic phylostratigraphy to examine the relationship between gene age and chromosomal organization in two model bacterial species, <italic>B. subtilis</italic> and <italic>E. coli</italic>. We hypothesized that differences in lifestyle and ecology between <italic>B. subtilis</italic> (predominantly soil-dwelling, sporulating bacterium) and <italic>E. coli</italic> (a facultative anaerobe associated with animal hosts) may have influenced their evolutionary paths and their propensity to acquire new genes. We further hypothesized that bacterial genome dynamics, including rate of gene acquisition, genomic placement of new genes, their transcriptional regulation, and their lateral mobility, might exhibit distinct patterns compared to those observed in eukaryotes.</p>
</sec>
<sec id="s2">
<title>2 Materials and methods</title>
<sec>
<title>2.1 Phylostratigraphic analysis</title>
<p>Phylostratigraphic maps were generated based on protein sequence data downloaded from the Uniprot homepage (UniProt Consortium, <xref ref-type="bibr" rid="B52">2023</xref>), as described before (Ravikumar et al., <xref ref-type="bibr" rid="B45">2018</xref>; Shi et al., <xref ref-type="bibr" rid="B49">2020</xref>; Futo et al., <xref ref-type="bibr" rid="B18">2021</xref>). The genomes to produce the phylostratigraphic tree and the protein sequences analyzed for <italic>B. subtilis</italic> 168 and <italic>E. coli</italic> K12 are detailed in <xref ref-type="supplementary-material" rid="SM2">Supplementary Data Sheets 1</xref>, <xref ref-type="supplementary-material" rid="SM3">2</xref>, respectively. The consensus phylogenetic tree covering the divergence from the last common ancestor of cellular organisms to the <italic>B. subtilis</italic> was done as described previously (Domazet-Lo&#x00161;o et al., <xref ref-type="bibr" rid="B11">2007</xref>; Domazet-Lo&#x00161;o and Tautz, <xref ref-type="bibr" rid="B15">2010b</xref>). For <italic>B. subtilis</italic>, 4,177 of 4,197 proteins were assigned a phylostratigraphic age and in case of <italic>E. coli</italic> 4,279 of 4,306 proteins were assigned an age. The taxon ID from NCBI was used except for groups A, B, C (1708685, 1708686, 1708687, 1708688, 1708689).</p>
</sec>
<sec>
<title>2.2 Data acquisition</title>
<p>To correlate various protein properties with phylostratigraphic age, global datasets were extracted. For both <italic>B. subtilis</italic> and <italic>E. coli</italic>, gene coordinates and the corresponding protein lengths were obtained from the BioCyc database (Karp et al., <xref ref-type="bibr" rid="B26">2019</xref>). For <italic>B. subtilis</italic>, lists of genes located within annotated prophage regions (hereafter referred to as prophage genes), as well as data on operon structure, transcriptional regulation, and proteins involved in sporulation, competence, and biofilm formation were downloaded from the SubtiWiki v.3 database (Zhu and St&#x000FC;lke, <xref ref-type="bibr" rid="B59">2018</xref>). Lists of genes expressed at low and high levels were obtained from a transcriptome study by Nicolas et al. (<xref ref-type="bibr" rid="B43">2012</xref>). For <italic>E. coli</italic>, a list of prophage genes was derived from a study of Wang et al. (<xref ref-type="bibr" rid="B55">2010</xref>). Lists of operon structure and transcriptional regulation was obtained from RegulonDB (Santos-Zavaleta et al., <xref ref-type="bibr" rid="B48">2019</xref>). Finally, <italic>E. coli</italic> proteins implicated in biofilm formation was derived from BioCyc based on associated Gene ontology (GO) terms (Karp et al., <xref ref-type="bibr" rid="B26">2019</xref>). The compiled data for <italic>B. subtilis</italic> and <italic>E. coli</italic> is available in <xref ref-type="supplementary-material" rid="SM4">Supplementary Data Sheets 3</xref>, <xref ref-type="supplementary-material" rid="SM5">4</xref>, respectively. Calculation of the Spearman&#x00027; correlation coefficient was done using the function CORREL in Excel (Microsoft) as was the calculation of <italic>p</italic>-value for Chi-square test using the CHISQ.TEST function.</p>
</sec>
<sec>
<title>2.3 Circular visualization of genomes</title>
<p>CiVi (Circular visualization for microbial genomes; Overmars et al., <xref ref-type="bibr" rid="B44">2015</xref>) was used for the purpose of generating circular maps to represent positions of gene clusters on the genomes of <italic>B. subtilis</italic> 168 and <italic>E. coli</italic> K12. Groups of genes that belong to the same PS were imported and the data was displayed as position of the &#x0201C;genes on the plus strand&#x0201D; in the form of a single concentric circle. This was repeated subsequently for each PS individually. For each species, for the innermost ring, &#x0201C;coordinates&#x0201D; was chosen under the data display option, with the origin of replication denoted as zero.</p>
</sec>
<sec>
<title>2.4 Functional annotation</title>
<p>Functional annotation analysis of the proteins falling under different phylostrata in <italic>B. subtilis</italic> 168 and <italic>E. coli</italic> K12 was performed using DAVID (Huang da et al., <xref ref-type="bibr" rid="B23">2009a</xref>,<xref ref-type="bibr" rid="B24">b</xref>). Phylostrata 1&#x02013;5 of <italic>B. subtilis</italic> 168 and <italic>E. coli</italic> K12 fall under same groups (phylogeny). Hence, two groups were made (phylostrata 1&#x02013;5 and 6&#x02013;15) in case of <italic>B. subtilis</italic> and the analysis was performed overall on these two groups, using the DAVID Bioinformatics Resources. UniProt accession IDs were submitted as a gene list for this purpose. Functional annotation charts/tables using the categories GOTERM_BP_DIRECT, GOTERM_MF_DIRECT, and KEGG_PATHWAY were generated. Default parameters such as count threshold of 2 and <italic>P</italic>-value score of 0.1 were used. The same was done for <italic>E. coli</italic> wherein phylostrata 1&#x02013;5 and 6&#x02013;11 were grouped. For reference, the results of the analysis done for proteins in the individual phylostrata is presented (<xref ref-type="supplementary-material" rid="SM6">Supplementary Data Sheet 5</xref>).</p>
</sec>
</sec>
<sec id="s3">
<title>3 Results</title>
<p>The bacterial species studied were selected based on their differing habitats, with <italic>B. subtilis</italic> being a predominantly soil-dwelling, sporulating bacterium, and <italic>E. coli</italic> a facultative anaerobe associated with animal hosts. Additionally, they were chosen because they are among the best-characterized bacterial models, with early selection driven by their biological properties (such as sporulation and rapid growth, respectively), followed by extensive development of genetic tools and comprehensive genomic resources. For <italic>E. coli</italic>, we specifically focused on the non-pathogenic laboratory K-12 strain, which serves as a standard reference genome and minimizes confounding variation associated with pathogenicity.</p>
<sec>
<title>3.1 Phylostratigraphy maps of <italic>E. coli</italic> and <italic>B. subtilis</italic>: general features</title>
<p>Phylostratigraphy maps of <italic>B. subtilis</italic> and <italic>E. coli</italic> were constructed as previously described (Ravikumar et al., <xref ref-type="bibr" rid="B45">2018</xref>; Shi et al., <xref ref-type="bibr" rid="B49">2020</xref>; Futo et al., <xref ref-type="bibr" rid="B18">2021</xref>; <xref ref-type="fig" rid="F1">Figures 1A, B</xref>). In both bacteria, a large majority of genes clusters in the oldest PS. This phenomenon was more pronounced in <italic>E. coli</italic>, where 87.0% of genes belonged to PS1, whereas in <italic>B. subtilis</italic> that fraction was considerably lower, only 71.8%. This was a higher fraction than reported in the domain of Eukarya, where for example in humans about 38% of proteins belong to PS1 (Domazet-Lo&#x00161;o and Tautz, <xref ref-type="bibr" rid="B14">2010a</xref>). Gene age was found to be inversely correlated to gene length in both bacteria (<xref ref-type="fig" rid="F1">Figures 1C, D</xref>), in accord with the theory of &#x0201C;gene birth&#x0201D; from short ORFs (Neme and Tautz, <xref ref-type="bibr" rid="B42">2014</xref>). A previous proteomics study indicated that &#x0201C;younger&#x0201D; bacterial genes are less expressed (Ravikumar et al., <xref ref-type="bibr" rid="B45">2018</xref>). It was possible for us to re-examine this proposition by using an exhaustive transcriptome dataset from Nicolas et al. (<xref ref-type="bibr" rid="B43">2012</xref>). This study investigated transcriptional responses of <italic>B. subtilis</italic> in &#x0003E;100 different growth conditions. Genes that consistently showed either the lowest or the highest expression levels in all conditions were identified. Using this criterion, total genes in each PS were then categorized as possessing a &#x0201C;high&#x0201D; expression level or &#x0201C;low expression level,&#x0201D; while those not belonging to these two categories were labeled as &#x0201C;other&#x0201D; (<xref ref-type="fig" rid="F1">Figure 1E</xref>). Based on these expression indicators from the transcriptome data (Nicolas et al., <xref ref-type="bibr" rid="B43">2012</xref>), it was evident that for all PS, the majority of genes belonged to the &#x0201C;other&#x0201D; category, indicating intermediate or variable transcription levels. However, a trend was observed in which PS 1&#x02013;4 contained a higher fraction of highly expressed genes, whereas PS 5&#x02013;14 had a greater proportion of genes in the low expression category. It should be noted that PS3 and PS4 that exhibited the highest fraction of highly expressed genes consist of only 24 and 6 proteins, respectively. The very small PS15 (comprising 0.05% of the genome) did not contain any genes in either the &#x0201C;high&#x0201D; or &#x0201C;low&#x0201D; categories.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>Distribution, protein length and expression within the various phylostrata. Distribution of proteins in respective phylostrata for <bold>(A)</bold> <italic>B. subtilis</italic> and <bold>(B)</bold> <italic>E. coli</italic>. The fraction of all proteins in the specific phylostrata is given (%) and an indication of the evolutionary trajectory is given below the phylostratum number. The average protein length in respective phylostrata is shown in a box plot for <italic>B. subtilis</italic> <bold>(C)</bold> and <italic>E. coli</italic> <bold>(D)</bold>. In <bold>(E)</bold>, the distribution of <italic>B. subtilis</italic> proteins encoded by genes that exhibit high (green) and low (red) expression within each phylostrata is shown.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-16-1512923-g0001.tif"/>
</fig>
</sec>
<sec>
<title>3.2 Location of new genes in bacterial genomes is not governed by distance to origin of replication</title>
<p>High expression levels and gene essentiality are known to gravitate to the origin of replication in the bacterial chromosomes (Ying et al., <xref ref-type="bibr" rid="B58">2014</xref>; Kosmidis et al., <xref ref-type="bibr" rid="B29">2020</xref>; Lato and Golding, <xref ref-type="bibr" rid="B33">2020</xref>). Since the newer genes are less strongly expressed and are most often involved in non-essential specialized functions (Ravikumar et al., <xref ref-type="bibr" rid="B45">2018</xref>), we asked whether they would cluster toward the opposite end of the chromosome, the replication terminus. Although the precise locations of the origin and terminus of replication are typically inferred from sequence composition features such as GC skew (Grigoriev, <xref ref-type="bibr" rid="B20">1998</xref>), these estimates are generally robust and sufficient for broad-scale analyses of gene distribution. In <xref ref-type="fig" rid="F2">Figures 2A, B</xref>, the location of each gene on the respective genome maps of <italic>B. subtilis</italic> and <italic>E. coli</italic> is shown, color coded with respect to PS. Average distances from replication origin for genes in each PS show a weak inverse correlation (Spearman&#x00027;s correlation coefficient of 0.22) with gene age (<xref ref-type="fig" rid="F2">Figures 2C, D</xref>). When prophage genes are excluded from the analysis, this weak correlation disappears completely (Spearman&#x00027;s correlation coefficient of &#x02212;0.19; <xref ref-type="supplementary-material" rid="SM7">Supplementary Figure 1</xref>). There is also no evident enrichment of younger genes in &#x0201C;Right&#x0201D; vs. &#x0201C;Left&#x0201D; chromosome arm, nor in non-structured regions vs. macrodomains. However, we identified prophages as one genomic feature that shows significant enrichment in more recent genes, both in <italic>E. coli</italic> and <italic>B. subtilis</italic> (<xref ref-type="fig" rid="F3">Figure 3</xref>). An alternative illustration of the genome organization, showing genes as boxes color-coded with respect to PS (<xref ref-type="supplementary-material" rid="SM8">Supplementary Figures 2A, B</xref>), illustrates the prophage regions (marked in red). The number of prophage genes constitute 8.3% and 5.2% in <italic>B. subtilis</italic> and <italic>E. coli</italic>, respectively. When quantifying the fraction of prophage genes in the different phylostrata (<xref ref-type="fig" rid="F3">Figures 3A, B</xref>), it should be noted that certain phylostrata, such as PS6, contain relatively few genes, which could exaggerate apparent enrichment. Nevertheless, it was, evident that PS1 genes are underrepresented in prophages, and prophages are generally enriched in newer genes.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>Genomic localization of genes. The genes of <italic>B. subtilis</italic> <bold>(A)</bold> and <italic>E. coli</italic> <bold>(B)</bold> are divided into 15 and 11 phylostrata (PS), depicted by concentric rings. Each dot in a phylostratum ring represents one protein-encoding gene. The rings are numbered 1&#x02013;15, to be read from outside to inside. The innermost ring indicates the coordinates on the genome. The origin of replication is positioned at the top center of each circular genome map. The average distance to the origin of replication of genes within each of the phylostrata is shown for <italic>B. subtilis</italic> <bold>(C)</bold> and <italic>E. coli</italic> <bold>(D)</bold>. The dotted red line represents the length corresponding to even distribution of genes.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-16-1512923-g0002.tif"/>
</fig>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>Enrichment of genes belonging to newer phylostrata in prophages. The fraction of prophage genes in the different phylostrata for <italic>B. subtilis</italic> <bold>(A)</bold> and <italic>E. coli</italic> <bold>(B)</bold> is presented. While there is a general trend of increased enrichment in newer genes, some phylostrata (e.g., PS6) contain relatively few genes, which may exaggerate apparent enrichment patterns and should be interpreted with caution.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-16-1512923-g0003.tif"/>
</fig>
</sec>
<sec>
<title>3.3 New genes tend to integrate into existing operons</title>
<p>Most bacterial genes are organized in operons, which are the basic transcription control units, with several genes expressed from one common promoter. With the data on evolutionary age of genes at hand, and given the operonic structure of bacterial genomes, we asked whether new genes typically form new transcriptional units, or if they get incorporated in preexisting operons. To address this question, we defined heterogenous operons as operons consisting of genes from two or more different phylostrata. For <italic>B. subtilis</italic>, 862 operon structures were reported in SubtiWiki and for <italic>E. coli</italic>, 798 operons were found in RegulonDB. <xref ref-type="fig" rid="F4">Figures 4A, B</xref> shows the fraction of heterogenous operons for <italic>E. coli</italic> and <italic>B. subtilis</italic>, distributed per operon length, expressed as number of genes in operon. A significant fraction of all operons contains genes from two or more phylostrata, and this fraction increases with operon length. This is particularly evident in <italic>B. subtilis</italic>, where all operons with 13 or more genes were found to be heterogenous. While we did not systematically analyze genomic islands in this study, we did assess gene enrichment within annotated prophage regions. Although prophages are indeed enriched in newer genes, they account for a relatively small portion of the genome. Thus, the majority of new genes are located outside of prophages and often appear within annotated operons, suggesting that integration into existing transcriptional units is a common fate for new genes in these species. We then tried to probe what functionalities were introduced in the pre-existing operons. When we considered the operons consisting of genes with known functions, it seemed evident that the most common event was introduction of proteins that regulated one or more proteins in the operons by modulation either transcription or translation or by protein-protein interaction (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 1</xref>). There were also some examples of newer genes that were more difficult to rationalize. These included subunits of heterooligomeric proteins, an anti-toxin and even a protein reported to be essential. In general, most of the homogeneous operons contain genes from PS1. There exist only 11 non-PS1 homogenous operons in <italic>B. subtilis</italic> and 13 such operons were found in <italic>E. coli</italic>. Interestingly, several of these more recent homogenous operons appear to play a role in developmental phenomena such as sporulation in <italic>B. subtilis</italic> (<xref ref-type="table" rid="T1">Table 1</xref>) and biofilm formation in <italic>E. coli</italic> (<xref ref-type="table" rid="T2">Table 2</xref>).</p>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p>Transcriptional regulation and PS composition of operons. <bold>(A, B)</bold> Heterogenous operons as defined by presence of genes from different phylostrata in the same operon for <italic>B. subtilis</italic> <bold>(A)</bold> and <italic>E. coli</italic> <bold>(B)</bold>. <bold>(C&#x02013;F)</bold> Heatmaps showing the number of genes in each phylostratum regulated by sigma factors <bold>(C, E)</bold> and global transcription regulators <bold>(D, F)</bold> in <italic>B. subtilis</italic> <bold>(C, D)</bold> and <italic>E. coli</italic> <bold>(E, F)</bold>. For each regulatory group, the distribution of regulated proteins in respective phylostrata is shown. Cells are color-coded using a blue&#x02013;white&#x02013;red gradient to emphasize relative gene counts across categories. These heatmaps are intended as a visual summary of count data and do not represent statistical significance or enrichment. For <italic>B. subtilis</italic>, regulators that do not belong to PS1 are indicated; in <italic>E. coli</italic>, all global regulators fall within PS1. For <italic>B. subtilis</italic>, it is noted when transcriptional regulators did not belong to PS1 (all global regulators in <italic>E. coli</italic> belonged to PS1).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-16-1512923-g0004.tif"/>
</fig>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Homogenous, non-PS1 operons in <italic>B. subtilis</italic>.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Operon</bold></th>
<th valign="top" align="center"><bold>Phylostratum</bold></th>
<th valign="top" align="left"><bold>Function</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><italic>fbpA, fbpB</italic></td>
<td valign="top" align="center">12</td>
<td valign="top" align="left">Iron starvation</td>
</tr>
<tr>
<td valign="top" align="left"><italic>yorB, yorC</italic></td>
<td valign="top" align="center">12</td>
<td valign="top" align="left">Unknown, SP-&#x003B2; prophage</td>
</tr>
<tr>
<td valign="top" align="left"><italic>spoIISA, spoIISB</italic></td>
<td valign="top" align="center">10</td>
<td valign="top" align="left">Toxin-antitoxin, sporulation</td>
</tr>
<tr>
<td valign="top" align="left"><italic>ykzB, ykoL</italic></td>
<td valign="top" align="center">10</td>
<td valign="top" align="left">Unknown</td>
</tr>
<tr>
<td valign="top" align="left"><italic>yppD, yppE</italic></td>
<td valign="top" align="center">10</td>
<td valign="top" align="left">Unknown</td>
</tr>
<tr>
<td valign="top" align="left"><italic>yxcE, yxcD</italic></td>
<td valign="top" align="center">10</td>
<td valign="top" align="left">Unknown</td>
</tr>
<tr>
<td valign="top" align="left"><italic>spoVID, ysxE</italic></td>
<td valign="top" align="center">8</td>
<td valign="top" align="left">Sporulation</td>
</tr>
<tr>
<td valign="top" align="left"><italic>cotB, ywrJ</italic></td>
<td valign="top" align="center">8</td>
<td valign="top" align="left">Sporulation</td>
</tr>
<tr>
<td valign="top" align="left"><italic>sigI, rsgI</italic></td>
<td valign="top" align="center">2</td>
<td valign="top" align="left">Sigma factor/anti-sigma factor, heat shock</td>
</tr>
<tr>
<td valign="top" align="left"><italic>gpr, spoIIP</italic></td>
<td valign="top" align="center">2</td>
<td valign="top" align="left">Sporulation/Germination</td>
</tr>
<tr>
<td valign="top" align="left"><italic>yuzM, yusN</italic></td>
<td valign="top" align="center">2</td>
<td valign="top" align="left">Sporulation</td>
</tr></tbody>
</table>
</table-wrap>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Homogenous, non-PS1 operons in <italic>E. coli</italic>.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Operon</bold></th>
<th valign="top" align="center"><bold>Phylostratum</bold></th>
<th valign="top" align="left"><bold>Function</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><italic>yffM, yffN</italic></td>
<td valign="top" align="center">11</td>
<td valign="top" align="left">Unknown, Prophage cpz55</td>
</tr>
<tr>
<td valign="top" align="left"><italic>yffQ, yffR</italic></td>
<td valign="top" align="center">11</td>
<td valign="top" align="left">Unknown, Prophage cpz55</td>
</tr>
<tr>
<td valign="top" align="left"><italic>yadM, yadL, yadK, yadC</italic></td>
<td valign="top" align="center">9</td>
<td valign="top" align="left">Biofilm, cryptic under normal laboratory conditions</td>
</tr>
<tr>
<td valign="top" align="left"><italic>ydfA, ydfB, ydfC</italic></td>
<td valign="top" align="center">9</td>
<td valign="top" align="left">Unknown, Qin prophage</td>
</tr>
<tr>
<td valign="top" align="left"><italic>kilR, ydaE</italic></td>
<td valign="top" align="center">9</td>
<td valign="top" align="left">Inhibitor of FtsZ, killing protein, Rac prophage</td>
</tr>
<tr>
<td valign="top" align="left"><italic>ydaG, ydaF</italic></td>
<td valign="top" align="center">9</td>
<td valign="top" align="left">Unknown, Rac prophage</td>
</tr>
<tr>
<td valign="top" align="left"><italic>yhaB, yhaC</italic></td>
<td valign="top" align="center">9</td>
<td valign="top" align="left">Unknown</td>
</tr>
<tr>
<td valign="top" align="left"><italic>yjbL, yjbM</italic></td>
<td valign="top" align="center">9</td>
<td valign="top" align="left">Unknown</td>
</tr>
<tr>
<td valign="top" align="left"><italic>ymcE, gnsA</italic></td>
<td valign="top" align="center">9</td>
<td valign="top" align="left">Cold shock protein; predicted regulator of phosphatidylethanolamine synthesis</td>
</tr>
<tr>
<td valign="top" align="left"><italic>yfdP, yfdQ</italic></td>
<td valign="top" align="center">6</td>
<td valign="top" align="left">Unknown, Prophage CPS-53</td>
</tr>
<tr>
<td valign="top" align="left"><italic>ynfO, ydfO</italic></td>
<td valign="top" align="center">5</td>
<td valign="top" align="left">Unknown, Qin prophage</td>
</tr>
<tr>
<td valign="top" align="left"><italic>mokC, hokC</italic></td>
<td valign="top" align="center">2</td>
<td valign="top" align="left">Gef toxin, interferes with membrane function</td>
</tr>
<tr>
<td valign="top" align="left"><italic>mqsR, mqsA</italic></td>
<td valign="top" align="center">2</td>
<td valign="top" align="left">Toxin-antitoxin, biofilm, persistence</td>
</tr></tbody>
</table>
</table-wrap>
</sec>
<sec>
<title>3.4 There is no age correlation between genes and regulators that govern their expression</title>
<p>The operonic organization of bacterial genomes depends on transcriptional regulators that govern expression from different promoters. Out of the 194 proteins annotated as transcriptional regulators in <italic>B. subtilis</italic> on SubtiWiki, a majority (163; 84%) belonged to PS1 (<xref ref-type="supplementary-material" rid="SM9">Supplementary Figure 3A</xref>). Similarly, in <italic>E. coli</italic> we observed that 193 of 207 (93%) transcriptional regulators reported in RegulonDB belonged to PS1 (<xref ref-type="supplementary-material" rid="SM9">Supplementary Figure 3C</xref>). Since it was clear from the operon structure that most of the new genes land in existing operons, we investigated how this reflects on the relationship between the evolutionary age of genes and the age of transcription regulators that govern their expression. Among the evolutionary younger transcriptional regulators in <italic>B. subtilis</italic> were for example the competence regulator ComK (PS8; van Sinderen et al., <xref ref-type="bibr" rid="B54">1994</xref>), its repressor Rok (PS10; Hoa et al., <xref ref-type="bibr" rid="B22">2002</xref>), GerR involved in sporulation (PS8; Kuwana et al., <xref ref-type="bibr" rid="B31">2005</xref>) and the master activator of flagellar biosynthesis SwrAA/1 (PS10; Calvio et al., <xref ref-type="bibr" rid="B6">2005</xref>). We asked whether these evolutionary younger transcriptional regulators would preferentially regulate other younger genes. To assess this, we divided regulated proteins into three groups based on whether they are regulated by a transcriptional regulator belonging to either phylostratum 1, 2, or above 2 (<xref ref-type="supplementary-material" rid="SM9">Supplementary Figure 3</xref>). There was a weak tendency for the transcriptional regulators belonging to PS1 to preferentially regulate evolutionary older proteins (<xref ref-type="supplementary-material" rid="SM9">Supplementary Figures 3C, D</xref>). While these differences were statistically significant (Chi-square test <italic>p</italic>-value of 0.001 and 0.009 for <italic>B. subtilis</italic> and <italic>E. coli</italic>, respectively) it was observed that evolutionary younger transcriptional regulators can also adopt transcriptional control of older genes. For major players in regulating gene expression, the sigma factors and global transcriptional regulators, we created an overview of age correlation with the genes under their regulation by making heat maps (<xref ref-type="fig" rid="F4">Figures 4C&#x02013;F</xref>). In <italic>B. subtilis</italic> (<xref ref-type="fig" rid="F4">Figures 4C, D</xref>), it became apparent that several sigma factors including SigE and SigK, known to be involved in regulation of sporulation (Haldenwang et al., <xref ref-type="bibr" rid="B21">1981</xref>; Stragier et al., <xref ref-type="bibr" rid="B50">1989</xref>), regulate a relatively higher number of proteins from newer phylostrata (PS2, 7, 8, 9 and PS8, 9, 12, respectively; <xref ref-type="fig" rid="F4">Figure 4C</xref>). Among the regulated genes with a known function in these phylostrata, most were involved in various aspects of the sporulation process. Among the global transcriptional regulators, especially AbrB and to a lesser extent SpoIIID stood out (<xref ref-type="fig" rid="F4">Figure 4D</xref>). For AbrB, regulated genes in the recent phylostrata were involved in sporulation, antibacterials biosynthesis and a substantial number of genes were of unknown function. For SpoIIID, mainly genes involved in sporulation were observed, consistent with the known regulatory role of SpoIIID (Kunkel et al., <xref ref-type="bibr" rid="B30">1989</xref>). For <italic>E. coli</italic> it seemed less pronounced, that specific transcriptional regulators would preferentially regulate newer genes. That said, the sigma factor 38 (RpoS) showed an enrichment in PS5 genes (<xref ref-type="fig" rid="F4">Figure 4E</xref>). Sigma factor 38 is induced upon entry into stationary phase and functions as a master regulator of the general stress response (Weber et al., <xref ref-type="bibr" rid="B56">2005</xref>). Among PS5 genes regulated by RpoS were several involved in production of curli, an amyloid protein that functions as a structural component of biofilms (Salinas et al., <xref ref-type="bibr" rid="B47">2020</xref>), proteins associated with biofilm formation, and the utilization of DNA as sole carbon source. The two-component system response regulator PhoP involved in stress response exhibited an enrichment of regulated genes in newer phylostrata, specifically the PS9 (<xref ref-type="fig" rid="F4">Figure 4F</xref>). In this set of proteins, we find MgrB and SafA (formerly B1500) that both regulate PhoP activity (Lippa and Goulian, <xref ref-type="bibr" rid="B36">2009</xref>; Eguchi et al., <xref ref-type="bibr" rid="B17">2007</xref>). It could thus indicate that these evolutionary newer proteins have found a role in the fine-tuning of gene expression in the cell. Other proteins include ones with a role in acid stress response and regulation of intracellular magnesium ion concentration.</p>
</sec>
<sec>
<title>3.5 Younger genes tend to be related to developmental phenomena</title>
<p>To provide an overview of the cellular processes that were developed later in evolution, we performed an over-representation analysis of GO and KEGG terms for the two bacteria. This was done both on the level of individual phylostrata as well as for the groups phylostrata 1&#x02013;5 and 6&#x02013;15/11. The rationale for the latter grouping of phylostrata is that the lineages of <italic>B. subtilis</italic> and <italic>E. coli</italic> diverged after PS5. Consequently, it could be argued that the genetic &#x0201C;innovation&#x0201D; differentiating between the two bacteria can be found primarily in phylostrata 6&#x02013;15 (20% of genes) in <italic>B. subtilis</italic> and 6&#x02013;11 (5.6% of genes) in <italic>E. coli</italic>. When performing the analysis on the set of older genes (PS1&#x02013;5), over-represented categories included proteins in universal housekeeping processes such as &#x0201C;metabolic pathways,&#x0201D; &#x0201C;biosynthesis of secondary metabolites,&#x0201D; and &#x0201C;biosynthesis of Microbial metabolism in diverse environments&#x0201D; (<xref ref-type="supplementary-material" rid="SM6">Supplementary Data Sheet 5</xref>). In the newer phylostrata, in both bacteria, developmental programs were enriched (<xref ref-type="fig" rid="F5">Figures 5A, B</xref>, <xref ref-type="supplementary-material" rid="SM6">Supplementary Data Sheet 5</xref>). In <italic>B. subtilis</italic>, genes involved in for examples sporulation, and genetic competence are over-represented. In the case of <italic>E. coli</italic>, different terms related to stress responses, cell division, as well as cell adhesion relevant for biofilm formation are over-represented. This prompted us to investigate development related genes more in depth.</p>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p>Over-representation analysis of proteins in later phylostrata. Functional over-representation based on the categories GO biological process (BP), Molecular function (MF), and KEGG (KG) terms for <italic>B. subtilis</italic> proteins in phylostrata 6&#x02013;15 <bold>(A)</bold>, and <italic>E. coli</italic> proteins in phylostrata 6&#x02013;11 <bold>(B)</bold>. GO terms showing very high enrichment but based on low gene counts (e.g., two or fewer genes) should be interpreted with caution as they may reflect chance associations rather than biologically significant trends. Phylostratigraphic age of proteins involved in developmental phenomena. Distribution in phylostrata of <italic>B. subtilis</italic> proteins involved in sporulation <bold>(C)</bold>, and genetic competence <bold>(D)</bold>. For biofilm formation, the distribution of proteins for <italic>B. subtilis</italic> and <italic>E. coli</italic> is shown <bold>(E, F)</bold>.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-16-1512923-g0005.tif"/>
</fig>
<p>Overall, majority of sporulation proteins belong to PS1 (53.9%). As previously reported (Shi et al., <xref ref-type="bibr" rid="B49">2020</xref>), we found a significant involvement of younger genes in sporulation, especially in PS 7&#x02013;11 (29.9%; <xref ref-type="fig" rid="F5">Figure 5C</xref>). While sporulation presumably did not yet exist as a functional development program in PS1, a substantial number of older genes now participate in the process. These genes may have been functionally integrated into sporulation through regulatory or contextual changes, or possibly by undergoing sequence modification. A similar pattern was identified for other developmental phenomena, such as <italic>B. subtilis</italic> genetic competence (<xref ref-type="fig" rid="F5">Figure 5D</xref>) and biofilm development (<xref ref-type="fig" rid="F5">Figure 5E</xref>), as well as <italic>E. coli</italic> biofilm development (<xref ref-type="fig" rid="F5">Figure 5F</xref>). In all these cases, the majority of developmental genes are found in PS1, but a substantial fraction of younger genes also contributes to these phenomena.</p>
</sec>
</sec>
<sec id="s4">
<title>4 Discussion</title>
<p>What can be learned from looking at bacterial genome organization from the perspective of evolutionary age of genes? The bulk of bacterial genes are dedicated to core functions, including housekeeping, and these are naturally encoded by the oldest genes, situated in PS1&#x02013;5 (<xref ref-type="fig" rid="F1">Figures 1A, B</xref>, <xref ref-type="supplementary-material" rid="SM2">Supplementary Data Sheets 1</xref>, <xref ref-type="supplementary-material" rid="SM3">2</xref>). A majority of the proteins in both species belonged to PS1 indicating that many of the core functions in present day bacteria also existed in the last universal common ancestor (LUCA). This contrasts with the situation in Eukarya, were the fraction of PS1 proteins is much lower, indicating that many more new genes appeared in this domain during the course of evolution. Developmental phenomena in bacteria, such as biofilm formation (Futo et al., <xref ref-type="bibr" rid="B18">2021</xref>) and sporulation (Shi et al., <xref ref-type="bibr" rid="B49">2020</xref>) are known to be more recent inventions, and as suggested by our study, typically involve newer genes (<xref ref-type="fig" rid="F5">Figures 5C&#x02013;F</xref>). This is in line with bacterial developmental processes being cited as models for ongoing social evolution in bacteria (Boyle et al., <xref ref-type="bibr" rid="B3">2013</xref>). Since bacterial social interactions in biofilms are highly dynamic and evolvable, their intense evolution can be directly observed in adaptive evolution experiments (Martin et al., <xref ref-type="bibr" rid="B37">2016</xref>). <italic>B. subtilis</italic> is often cited for its rich repertoire of developmental and community-based genetic programs, involving biofilm and pellicle formation, sporulation, cannibalism and genetic competence (Ricci-Tam et al., <xref ref-type="bibr" rid="B46">2023</xref>). By contrast, <italic>E. coli</italic> has a less diverse lifestyle, with biofilm formation being the only developmental process it can do. This is very accurately reflected in the phylostratigraphy analysis, with only 5% of <italic>E. coli</italic> genes in PS6&#x02013;11 (<xref ref-type="fig" rid="F1">Figure 1B</xref>), and <italic>B. subtilis</italic> with 20% of all genes in PS6&#x02013;15. Does this mean that <italic>E. coli</italic> evolved fewer <italic>de novo</italic> genes than <italic>B. subtilis</italic> during its evolution? Not necessarily. This question just brings us to horizontal gene transfer, recognized as a major generator of novelty in bacterial genomes (Arnold et al., <xref ref-type="bibr" rid="B1">2022</xref>). The origins of novel bacterial genes likely involve a combination of evolutionary mechanisms. These include horizontal gene transfer from other bacteria or phages, gene duplication followed by divergence, frameshift-based innovation, and <italic>de novo</italic> emergence from previously non-coding sequences (Lang et al., <xref ref-type="bibr" rid="B32">2017</xref>; Tautz and Domazet-Lo&#x00161;o, <xref ref-type="bibr" rid="B51">2011</xref>; Neme and Tautz, <xref ref-type="bibr" rid="B42">2014</xref>; Xia et al., <xref ref-type="bibr" rid="B57">2025</xref>). While prophage regions represent identifiable hotspots for gene acquisition in both <italic>E. coli</italic> and <italic>B. subtilis</italic>, they account for only a fraction of the younger genes. Additional new genes may arise gradually within transcriptionally active regions, as proposed for proto-gene evolution in eukaryotes (Grandchamp et al., <xref ref-type="bibr" rid="B19">2022</xref>), although the relative importance of these mechanisms in bacteria remains difficult to quantify. In many cases, phages represent a large fraction of the strain-specific DNA sequences (Br&#x000FC;ssow et al., <xref ref-type="bibr" rid="B5">2003</xref>). Our results suggest that the prophage regions in <italic>E. coli</italic> and <italic>B. subtilis</italic> genomes represent hot spots for more recent genes (<xref ref-type="fig" rid="F3">Figure 3</xref>). Gene transfer is particularly intense in bacterial communities (Brito, <xref ref-type="bibr" rid="B4">2021</xref>). This means that any <italic>de novo</italic> genes that evolve in bacteria became more or less immediately available to all community members by means of horizontal gene transfer. Then, the genetic makeup of an individual bacterial species gets defined by the preferred niche and environmental challenges. The soil dwelling <italic>B. subtilis</italic>, having to cope with vary variable and adverse environmental challenges, picked up more of the &#x0201C;novelty&#x0201D; tools for its adaptation toolbox. <italic>E. coli</italic>, adapting to a less challenging and more constant environment of a symbiont, does not require such a diverse developmental toolkit, hence the reduced proportion of novel, development-related genes. However, <italic>E. coli</italic> is fully capable of taking up additional functions when switching from commensal to pathogen lifestyle (Dobrindt et al., <xref ref-type="bibr" rid="B9">2010</xref>), a phenomenon that has been described as the &#x0201C;unexhausted potential&#x0201D; of <italic>E. coli</italic> (Blount, <xref ref-type="bibr" rid="B2">2015</xref>). Overall, it would appear that <italic>de novo</italic> genes are relatively accessible to bacteria, that are far less siloed than e.g., plants or metazoans, with strictly vertical evolution patterns.</p>
<p>Younger genes identified in bacteria follow the same pattern as those in more complex life forms (Neme and Tautz, <xref ref-type="bibr" rid="B42">2014</xref>), they tend to be short, non-essential, and expressed to a low level or only under certain specific conditions. This description, in conjunction with the known association of essential and highly expressed genes to the bacterial origin of replication (Ying et al., <xref ref-type="bibr" rid="B58">2014</xref>; Kosmidis et al., <xref ref-type="bibr" rid="B29">2020</xref>; Lato and Golding, <xref ref-type="bibr" rid="B33">2020</xref>), would suggest that the more recent genes should be expected to cluster closer to the replication terminus. Surprisingly, our analysis revealed a weak (with prophages included) or no correlation (with prophages excluded) between location of more recent genes and the replication origin (<xref ref-type="fig" rid="F2">Figure 2</xref>). One possible explanation for this finding is linked to constraints on placement of new genes in the genome. As noted by Grandchamp et al. (<xref ref-type="bibr" rid="B19">2022</xref>), <italic>de novo</italic> genes in humans are more likely to arise in transcribed regions, coupled to existing elements of transcriptional regulation. This makes sense from the probabilistic perspective, since there are fewer constraints for creating an open reading frame from a non-coding sequence, than for creating a promoter region and other regulatory sequences required for ensuring gene expression in each organism. While bacteria do not have complex transcription regulation based on chromatin de-condensation to enable transcription, they do possess transcriptional regulatory units known as operons. We report an analogous observation that in bacteria new genes get predominantly inserted in preexisting operons (<xref ref-type="fig" rid="F4">Figures 4A, B</xref>) and thus get placed under control of available transcription regulators (<xref ref-type="fig" rid="F4">Figures 4C&#x02013;F</xref>). As a housekeeping process, transcription control is generally ensured by old genes, and we found very few examples on new transcription regulators, such as ComK, Rok, GerR and SwrAA/1 from <italic>B. subtilis</italic>. Evolving of new developmental phenomena seldom involves establishing completely novel operons. Only 11 such operons were found in <italic>B. subtilis</italic> and 13 in <italic>E. coli</italic> (<xref ref-type="table" rid="T1">Tables 1</xref>, <xref ref-type="table" rid="T2">2</xref>). Rather, new developmental programs in bacteria seem to arise as a combination of new genes, and extensive repurposing of older genes and operons (<xref ref-type="fig" rid="F5">Figures 5C&#x02013;F</xref>). This is supported by our observation that many of the genes involved in processes such as sporulation, competence, and biofilm formation originate from the oldest PS, particularly PS1, and have likely been incorporated into newer regulatory frameworks. This aligns with the view that bacterial developmental systems can evolve as modular additions, but also indicates that these modules are, at least in part, constructed from pre-existing components whose original functions were retained or adapted to new roles. In this light, bacterial development reflects both evolutionary innovation and the reorganization of existing genetic material, rather than purely the acquisition of entirely novel gene modules.</p>
<p>Our analysis of the two model bacteria suggests that there are important lessons to be learned from looking at bacterial genome evolution and genome organization from the perspective of evolutionary age of new genes. More recent bacterial genes tend to be short, non-essential, and their level of expression is generally low. Despite these features, newer genes are surprisingly not preferentially located far from the origin of replication. Their genomic location is rather uniform, and they are only in some instances enriched in areas containing mobile genetic elements, such as prophages. While most bacterial transcription regulators belong to the oldest phylostrata, they were found to regulate expression of both older and more recent genes alike. This suggests that many newer genes get inserted in the existing operons under the control of conserved regulatory elements. In particular, <italic>E. coli</italic> contains significantly fewer novel genes than <italic>B. subtilis</italic>, and this is mirrored by its more limited repertoire of developmental processes. By contrast, <italic>B. subtilis</italic> exhibits a more eventful evolutionary past in terms of acquisition of new genes, and particularly in this species, which possesses highly elaborate developmental regulation (e.g., during sporulation), it is evident that new regulatory systems have also evolved. The observation that many genetic operons contain genes from different phylostrata indicates a layered evolutionary history, in which new genes are incorporated into both pre-existing and newly formed frameworks for gene expression.</p>
<p>Genomic features described above suggest that phylostratigraphy approaches could assist in genome mining efforts (Mijakovic, <xref ref-type="bibr" rid="B38">2020</xref>), not by predicting individual gene function directly, but by identifying phylostrata enriched in specific biological processes. For example, in the case of sporulation in <italic>B. subtilis</italic>, genes involved in this process were overrepresented in certain phylostrata, and a higher fraction of uncharacterized proteins from these phylostrata were experimentally shown to be involved in sporulation (Shi et al., <xref ref-type="bibr" rid="B49">2020</xref>), demonstrating how phylostratigraphy can highlight candidate genes for targeted validation, particularly in biological processes involving functionally related gene sets that emerged during the same evolutionary interval.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="supplementary-material" rid="SM1">Supplementary material</xref>, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="s6">
<title>Author contributions</title>
<p>CJ: Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. HM: Writing &#x02013; review &#x00026; editing. VR: Writing &#x02013; review &#x00026; editing. AG: Writing &#x02013; review &#x00026; editing. DF: Writing &#x02013; review &#x00026; editing. TD-L: Writing &#x02013; review &#x00026; editing. IM: Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing.</p>
</sec>
<sec sec-type="funding-information" id="s7">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research and/or publication of this article. This work was supported by grants to IM from the Novo Nordisk Foundation (NNF20CC0035580) and the Independent Research Fund Denmark (9040-00075B).</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest. The author(s) declared that they were an editorial board member of Frontiers, at the time of submission. This had no impact on the peer review process and the final decision.</p>
</sec>
<sec sec-type="ai-statement" id="s8">
<title>Generative AI statement</title>
<p>The author(s) declare that no Gen AI was used in the creation of this manuscript.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="s10">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fmicb.2025.1512923/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fmicb.2025.1512923/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Table_1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Data_Sheet_1.xlsx" id="SM2" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Data_Sheet_2.xlsx" id="SM3" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Data_Sheet_3.xlsx" id="SM4" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Data_Sheet_4.xlsx" id="SM5" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Data_Sheet_5.xlsx" id="SM6" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image_1.tif" id="SM7" mimetype="image/tif" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image_2.tif" id="SM8" mimetype="image/tif" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image_3.tif" id="SM9" mimetype="image/tif" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Arnold</surname> <given-names>B. J.</given-names></name> <name><surname>Huang</surname> <given-names>I. T.</given-names></name> <name><surname>Hanage</surname> <given-names>W. P.</given-names></name></person-group> (<year>2022</year>). <article-title>Horizontal gene transfer and adaptive evolution in bacteria</article-title>. <source>Nat. Rev. Microbiol.</source> <volume>20</volume>, <fpage>206</fpage>&#x02013;<lpage>218</lpage>. <pub-id pub-id-type="doi">10.1038/s41579-021-00650-4</pub-id><pub-id pub-id-type="pmid">34773098</pub-id></citation></ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Blount</surname> <given-names>Z. D.</given-names></name></person-group> (<year>2015</year>). <article-title>The unexhausted potential of <italic>E. coli</italic></article-title>. <source>Elife</source> <volume>4</volume>:<fpage>e05826</fpage>. <pub-id pub-id-type="doi">10.7554/eLife.05826</pub-id><pub-id pub-id-type="pmid">25807083</pub-id></citation></ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Boyle</surname> <given-names>K. E.</given-names></name> <name><surname>Heilmann</surname> <given-names>S.</given-names></name> <name><surname>van Ditmarsch</surname> <given-names>D.</given-names></name> <name><surname>Xavier</surname> <given-names>J. B.</given-names></name></person-group> (<year>2013</year>). <article-title>Exploiting social evolution in biofilms</article-title>. <source>Curr. Opin. Microbiol.</source> <volume>16</volume>, <fpage>207</fpage>&#x02013;<lpage>212</lpage>. <pub-id pub-id-type="doi">10.1016/j.mib.2013.01.003</pub-id><pub-id pub-id-type="pmid">23357558</pub-id></citation></ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Brito</surname> <given-names>I. L.</given-names></name></person-group> (<year>2021</year>). <article-title>Examining horizontal gene transfer in microbial communities</article-title>. <source>Nat. Rev. Microbiol.</source> <volume>19</volume>, <fpage>442</fpage>&#x02013;<lpage>453</lpage>. <pub-id pub-id-type="doi">10.1038/s41579-021-00534-7</pub-id><pub-id pub-id-type="pmid">33846600</pub-id></citation></ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Br&#x000FC;ssow</surname> <given-names>H.</given-names></name> <name><surname>Canchaya</surname> <given-names>C.</given-names></name> <name><surname>Hardt</surname> <given-names>W.-D.</given-names></name></person-group> (<year>2003</year>). <article-title>Phages and the evolution of bacterial pathogens: from genomic rearrangements to lysogenic conversion</article-title>. <source>Microbiol. Mol. Biol. Rev.</source> <volume>68</volume>, <fpage>560</fpage>&#x02013;<lpage>602</lpage>. <pub-id pub-id-type="doi">10.1128/MMBR.68.3.560-602.2004</pub-id><pub-id pub-id-type="pmid">15353570</pub-id></citation></ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Calvio</surname> <given-names>C.</given-names></name> <name><surname>Celandroni</surname> <given-names>F.</given-names></name> <name><surname>Ghelardi</surname> <given-names>E.</given-names></name> <name><surname>Amati</surname> <given-names>G.</given-names></name> <name><surname>Salvetti</surname> <given-names>S.</given-names></name> <name><surname>Ceciliani</surname> <given-names>F.</given-names></name> <etal/></person-group>. (<year>2005</year>). <article-title>Swarming differentiation and swimming motility in <italic>Bacillus subtilis</italic> are controlled by <italic>swrA</italic>, a newly identified dicistronic operon</article-title>. <source>J. Bacteriol</source>. <volume>187</volume>, <fpage>5356</fpage>&#x02013;<lpage>5366</lpage>. <pub-id pub-id-type="doi">10.1128/JB.187.15.5356-5366.2005</pub-id><pub-id pub-id-type="pmid">16030230</pub-id></citation></ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Casci</surname> <given-names>T.</given-names></name></person-group> (<year>2011</year>). <article-title>Hourglass theory gets molecular approval</article-title>. <source>Nat. Rev. Genet.</source> <volume>12</volume>:<fpage>76</fpage>. <pub-id pub-id-type="doi">10.1038/nrg2940</pub-id><pub-id pub-id-type="pmid">21173773</pub-id></citation></ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Corak</surname> <given-names>N.</given-names></name> <name><surname>Anniko</surname> <given-names>S.</given-names></name> <name><surname>Daschkin-Steinborn</surname> <given-names>C.</given-names></name> <name><surname>Krey</surname> <given-names>V.</given-names></name> <name><surname>Koska</surname> <given-names>S.</given-names></name> <name><surname>Futo</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Pleomorphic variants of <italic>Borreliella</italic> (syn. Borrelia) <italic>burgdorferi</italic> express evolutionary distinct transcriptomes</article-title>. <source>Int. J. Mol. Sci.</source> <volume>24</volume>:<fpage>5594</fpage>. <pub-id pub-id-type="doi">10.3390/ijms24065594</pub-id><pub-id pub-id-type="pmid">36982667</pub-id></citation></ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dobrindt</surname> <given-names>U.</given-names></name> <name><surname>Chowdary</surname> <given-names>M. G.</given-names></name> <name><surname>Krumbholz</surname> <given-names>G.</given-names></name> <name><surname>Hacker</surname> <given-names>J.</given-names></name></person-group> (<year>2010</year>). <article-title>Genome dynamics and its impact on evolution of <italic>Escherichia coli</italic></article-title>. <source>Med. Microbiol. Immunol.</source> <volume>199</volume>, <fpage>145</fpage>&#x02013;<lpage>154</lpage>. <pub-id pub-id-type="doi">10.1007/s00430-010-0161-2</pub-id><pub-id pub-id-type="pmid">20445988</pub-id></citation></ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Domazet-Lo&#x00161;o</surname> <given-names>M.</given-names></name> <name><surname>&#x00160;iroki</surname> <given-names>T.</given-names></name> <name><surname>&#x00160;imi&#x0010D;evi&#x00107;</surname> <given-names>K.</given-names></name> <name><surname>Domazet-Lo&#x00161;o</surname> <given-names>T.</given-names></name></person-group> (<year>2024</year>). <article-title>Macroevolutionary dynamics of gene family gain and loss along multicellular eukaryotic lineages</article-title>. <source>Nat. Commun</source>. <volume>15</volume>:<fpage>2663</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-024-47017-w</pub-id><pub-id pub-id-type="pmid">38531970</pub-id></citation></ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Domazet-Lo&#x00161;o</surname> <given-names>T.</given-names></name> <name><surname>Brajkovi&#x00107;</surname> <given-names>J.</given-names></name> <name><surname>Tautz</surname> <given-names>D.</given-names></name></person-group> (<year>2007</year>). <article-title>A phylostratigraphy approach to uncover the genomic history of major adaptations in metazoan lineages</article-title>. <source>Trends Genet.</source> <volume>23</volume>, <fpage>533</fpage>&#x02013;<lpage>539</lpage>. <pub-id pub-id-type="doi">10.1016/j.tig.2007.08.014</pub-id><pub-id pub-id-type="pmid">18029048</pub-id></citation></ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Domazet-Lo&#x00161;o</surname> <given-names>T.</given-names></name> <name><surname>Carvunis</surname> <given-names>A. R.</given-names></name> <name><surname>Alb&#x000E0;</surname> <given-names>M. M.</given-names></name> <name><surname>&#x00160;estak</surname> <given-names>M. S.</given-names></name> <name><surname>Bakaric</surname> <given-names>R.</given-names></name> <name><surname>Neme</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>No evidence for phylostratigraphic bias impacting inferences on patterns of gene emergence and evolution</article-title>. <source>Mol. Biol. Evol.</source> <volume>34</volume>, <fpage>843</fpage>&#x02013;<lpage>856</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msw284</pub-id><pub-id pub-id-type="pmid">28087778</pub-id></citation></ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Domazet-Lo&#x00161;o</surname> <given-names>T.</given-names></name> <name><surname>Tautz</surname> <given-names>D.</given-names></name></person-group> (<year>2010a</year>). <article-title>Phylostratigraphic tracking of cancer genes suggests a link to the emergence of multicellularity in metazoa</article-title>. <source>BMC Biol.</source> <volume>8</volume>:<fpage>66</fpage>. <pub-id pub-id-type="doi">10.1186/1741-7007-8-66</pub-id><pub-id pub-id-type="pmid">20492640</pub-id></citation></ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Domazet-Lo&#x00161;o</surname> <given-names>T.</given-names></name> <name><surname>Tautz</surname> <given-names>D.</given-names></name></person-group> (<year>2010b</year>). <article-title>A phylogenetically based transcriptome age index mirrors ontogenetic divergence patterns</article-title>. <source>Nature</source> <volume>468</volume>, <fpage>815</fpage>&#x02013;<lpage>818</lpage>. <pub-id pub-id-type="doi">10.1038/nature09632</pub-id><pub-id pub-id-type="pmid">21150997</pub-id></citation></ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Duigou</surname> <given-names>S.</given-names></name> <name><surname>Boccard</surname> <given-names>F.</given-names></name></person-group> (<year>2017</year>). <article-title>Long range chromosome organization in <italic>Escherichia coli</italic>: the position of the replication origin defines the non-structured regions and the Right and Left macrodomains</article-title>. <source>PLoS Genet.</source> <volume>13</volume>:<fpage>e1006758</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pgen.1006758</pub-id><pub-id pub-id-type="pmid">28486476</pub-id></citation></ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Eguchi</surname> <given-names>Y.</given-names></name> <name><surname>Itou</surname> <given-names>J.</given-names></name> <name><surname>Yamane</surname> <given-names>M.</given-names></name> <name><surname>Demizu</surname> <given-names>R.</given-names></name> <name><surname>Yamato</surname> <given-names>F.</given-names></name> <name><surname>Okada</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2007</year>). <article-title>B1500, a small membrane protein, connects the two-component systems EvgS/EvgA and PhoQ/PhoP in <italic>Escherichia coli</italic></article-title>. <source>Proc. Natl. Acad. Sci. U.S.A.</source> <volume>104</volume>, <fpage>18712</fpage>&#x02013;<lpage>18717</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.0705768104</pub-id><pub-id pub-id-type="pmid">17998538</pub-id></citation></ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Futo</surname> <given-names>M.</given-names></name> <name><surname>Opa&#x00161;i&#x00107;</surname> <given-names>L.</given-names></name> <name><surname>Koska</surname> <given-names>S.</given-names></name> <name><surname>Corak</surname> <given-names>N.</given-names></name> <name><surname>&#x00160;iroki</surname> <given-names>T.</given-names></name> <name><surname>Ravikumar</surname> <given-names>V.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Embryo-like features in developing <italic>Bacillus subtilis</italic> biofilms</article-title>. <source>Mol. Biol. Evol.</source> <volume>38</volume>, <fpage>31</fpage>&#x02013;<lpage>47</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msaa217</pub-id><pub-id pub-id-type="pmid">32871001</pub-id></citation></ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Grandchamp</surname> <given-names>A.</given-names></name> <name><surname>Berk</surname> <given-names>K.</given-names></name> <name><surname>Dohmen</surname> <given-names>E.</given-names></name> <name><surname>Bornberg-Bauer</surname> <given-names>E.</given-names></name></person-group> (<year>2022</year>). <article-title>New genomic signals underlying the emergence of human proto-genes</article-title>. <source>Genes</source> <volume>13</volume>:<fpage>284</fpage>. <pub-id pub-id-type="doi">10.3390/genes13020284</pub-id><pub-id pub-id-type="pmid">35205330</pub-id></citation></ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Grigoriev</surname> <given-names>A.</given-names></name></person-group> (<year>1998</year>). <article-title>Analyzing genomes with cumulative skew diagrams</article-title>. <source>Nucl. Acids Res</source>. <volume>26</volume>, <fpage>2286</fpage>&#x02013;<lpage>2290</lpage>. <pub-id pub-id-type="doi">10.1093/nar/26.10.2286</pub-id><pub-id pub-id-type="pmid">9580676</pub-id></citation></ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Haldenwang</surname> <given-names>W. G.</given-names></name> <name><surname>Lang</surname> <given-names>N.</given-names></name> <name><surname>Losick</surname> <given-names>R.</given-names></name></person-group> (<year>1981</year>). <article-title>A sporulation-induced sigma-like regulatory protein from <italic>B. subtilis</italic></article-title>. <source>Cell</source> <volume>23</volume>, <fpage>615</fpage>&#x02013;<lpage>624</lpage>. <pub-id pub-id-type="doi">10.1016/0092-8674(81)90157-4</pub-id><pub-id pub-id-type="pmid">6781761</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hoa</surname> <given-names>T. T.</given-names></name> <name><surname>Tortosa</surname> <given-names>P.</given-names></name> <name><surname>Albano</surname> <given-names>M.</given-names></name> <name><surname>Dubnau</surname> <given-names>D.</given-names></name></person-group> (<year>2002</year>). <article-title>Rok (YkuW) regulates genetic competence in <italic>Bacillus subtilis</italic> by directly repressing <italic>comK</italic></article-title>. <source>Mol. Microbiol</source>. <volume>43</volume>, <fpage>15</fpage>&#x02013;<lpage>26</lpage>. <pub-id pub-id-type="doi">10.1046/j.1365-2958.2002.02727.x</pub-id><pub-id pub-id-type="pmid">11849533</pub-id></citation></ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Huang da</surname> <given-names>W.</given-names></name> <name><surname>Sherman</surname> <given-names>B. T.</given-names></name> <name><surname>Lempicki</surname> <given-names>R. A.</given-names></name></person-group> (<year>2009a</year>). <article-title>Systematic and integrative analysis of large gene lists using DAVID bioinformatics resources</article-title>. <source>Nat. Protoc.</source> <volume>4</volume>, <fpage>44</fpage>&#x02013;<lpage>57</lpage>. <pub-id pub-id-type="doi">10.1038/nprot.2008.211</pub-id><pub-id pub-id-type="pmid">19131956</pub-id></citation></ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Huang da</surname> <given-names>W.</given-names></name> <name><surname>Sherman</surname> <given-names>B. T.</given-names></name> <name><surname>Lempicki</surname> <given-names>R. A.</given-names></name></person-group> (<year>2009b</year>). <article-title>Bioinformatics enrichment tools: paths toward the comprehensive functional analysis of large gene lists</article-title>. <source>Nucl. Acids Res.</source> <volume>37</volume>, <fpage>1</fpage>&#x02013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkn923</pub-id><pub-id pub-id-type="pmid">19033363</pub-id></citation></ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Iyengar</surname> <given-names>B. R.</given-names></name> <name><surname>Bornberg-Bauer</surname> <given-names>E.</given-names></name></person-group> (<year>2023</year>). <article-title>Neutral models of <italic>de novo</italic> gene emergence suggest that gene evolution has a preferred trajectory</article-title>. <source>Mol. Biol. Evol.</source> <volume>40</volume>:<fpage>msad079</fpage>. <pub-id pub-id-type="doi">10.1093/molbev/msad079</pub-id><pub-id pub-id-type="pmid">37011142</pub-id></citation></ref>
<ref id="B26">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Karp</surname> <given-names>P. D.</given-names></name> <name><surname>Billington</surname> <given-names>R.</given-names></name> <name><surname>Caspi</surname> <given-names>R.</given-names></name> <name><surname>Fulcher</surname> <given-names>C. A.</given-names></name> <name><surname>Latendresse</surname> <given-names>M.</given-names></name> <name><surname>Kothari</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>The BioCyc collection of microbial genomes and metabolic pathways</article-title>. <source>Brief. Bioinf.</source> <volume>20</volume>, <fpage>1085</fpage>&#x02013;<lpage>1093</lpage>. <pub-id pub-id-type="doi">10.1093/bib/bbx085</pub-id><pub-id pub-id-type="pmid">29447345</pub-id></citation></ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Knopp</surname> <given-names>M.</given-names></name> <name><surname>Gudmundsdottir</surname> <given-names>J. S.</given-names></name> <name><surname>Nilsson</surname> <given-names>T.</given-names></name> <name><surname>K&#x000F6;nig</surname> <given-names>F.</given-names></name> <name><surname>Warsi</surname> <given-names>O.</given-names></name> <name><surname>Rajer</surname> <given-names>F.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title><italic>De Novo</italic> emergence of peptides that confer antibiotic resistance</article-title>. <source>MBio</source> <volume>10</volume>, <fpage>e00837</fpage>&#x02013;<lpage>e00819</lpage>. <pub-id pub-id-type="doi">10.1128/mBio.00837-19</pub-id><pub-id pub-id-type="pmid">31164464</pub-id></citation></ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Koska</surname> <given-names>S.</given-names></name> <name><surname>Leljak-Levani&#x00107;</surname> <given-names>D.</given-names></name> <name><surname>Malenica</surname> <given-names>N.</given-names></name> <name><surname>Villi</surname> <given-names>K. B.</given-names></name> <name><surname>Futo</surname> <given-names>M.</given-names></name> <name><surname>Corak</surname> <given-names>N.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Somatic embryogenesis of grapevine (<italic>Vitis vinifera</italic>) expresses a transcriptomic hourglass</article-title>. <source>bioRxiv</source> <fpage>2024</fpage>&#x02013;<lpage>04</lpage>. <pub-id pub-id-type="doi">10.1101/2024.04.08.588272</pub-id><pub-id pub-id-type="pmid">39972184</pub-id></citation></ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kosmidis</surname> <given-names>K.</given-names></name> <name><surname>Jablonski</surname> <given-names>K. P.</given-names></name> <name><surname>Muskhelishvili</surname> <given-names>G.</given-names></name> <name><surname>H&#x000FC;tt</surname> <given-names>M.-T.</given-names></name></person-group> (<year>2020</year>). <article-title>Chromosomal origin of replication coordinates logically distinct types of bacterial genetic regulation</article-title>. <source>NPJ Syst. Biol. Appl</source>. <volume>6</volume>:<fpage>5</fpage>. <pub-id pub-id-type="doi">10.1038/s41540-020-0124-1</pub-id><pub-id pub-id-type="pmid">32066730</pub-id></citation></ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kunkel</surname> <given-names>B.</given-names></name> <name><surname>Kroos</surname> <given-names>L.</given-names></name> <name><surname>Poth</surname> <given-names>H.</given-names></name> <name><surname>Youngman</surname> <given-names>P.</given-names></name> <name><surname>Losick</surname> <given-names>R.</given-names></name></person-group> (<year>1989</year>). <article-title>Temporal and spatial control of the mother-cell regulatory gene <italic>spoIIID</italic> of <italic>Bacillus subtilis</italic></article-title>. <source>Genes Dev</source>. <volume>3</volume>, <fpage>1735</fpage>&#x02013;<lpage>1744</lpage>. <pub-id pub-id-type="doi">10.1101/gad.3.11.1735</pub-id><pub-id pub-id-type="pmid">2514119</pub-id></citation></ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kuwana</surname> <given-names>R.</given-names></name> <name><surname>Okumura</surname> <given-names>T.</given-names></name> <name><surname>Takamatsu</surname> <given-names>H.</given-names></name> <name><surname>Watabe</surname> <given-names>K.</given-names></name></person-group> (<year>2005</year>). <article-title>The <italic>ylbO</italic> gene product of <italic>Bacillus subtilis</italic> is involved in the coat development and lysozyme resistance of spore</article-title>. <source>FEMS Microbiol. Lett</source>. <volume>242</volume>, <fpage>51</fpage>&#x02013;<lpage>57</lpage>. <pub-id pub-id-type="doi">10.1016/j.femsle.2004.10.038</pub-id><pub-id pub-id-type="pmid">15621419</pub-id></citation></ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lang</surname> <given-names>A.</given-names></name> <name><surname>Thomas Beatty</surname> <given-names>J.</given-names></name> <name><surname>Rice</surname> <given-names>P. A.</given-names></name></person-group> (<year>2017</year>). <article-title>Guest editorial: mobile genetic elements and horizontal gene transfer in prokaryotes</article-title>. <source>Curr. Opin. Microbiol.</source> 38, v&#x02013;vii. <pub-id pub-id-type="doi">10.1016/j.mib.2017.09.018</pub-id><pub-id pub-id-type="pmid">29173837</pub-id></citation></ref>
<ref id="B33">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lato</surname> <given-names>D. F.</given-names></name> <name><surname>Golding</surname> <given-names>G. B.</given-names></name></person-group> (<year>2020</year>). <article-title>Spatial patterns of gene expression in bacterial genomes</article-title>. <source>J. Mol. Evol.</source> <volume>88</volume>, <fpage>510</fpage>&#x02013;<lpage>520</lpage>. <pub-id pub-id-type="doi">10.1007/s00239-020-09951-3</pub-id><pub-id pub-id-type="pmid">32506154</pub-id></citation></ref>
<ref id="B34">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lato</surname> <given-names>D. F.</given-names></name> <name><surname>Golding</surname> <given-names>G. B.</given-names></name></person-group> (<year>2021</year>). <article-title>The location of substitutions and bacterial genome arrangements</article-title>. <source>Genome Biol. Evol.</source> <volume>13</volume>:<fpage>evaa260</fpage>. <pub-id pub-id-type="doi">10.1093/gbe/evaa260</pub-id><pub-id pub-id-type="pmid">33320172</pub-id></citation></ref>
<ref id="B35">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Light</surname> <given-names>S.</given-names></name> <name><surname>Basile</surname> <given-names>W.</given-names></name> <name><surname>Elofsson</surname> <given-names>A.</given-names></name></person-group> (<year>2014</year>). <article-title>Orphans and new gene origination, a structural and evolutionary perspective</article-title>. <source>Curr. Opin. Struct. Biol.</source> <volume>26</volume>, <fpage>73</fpage>&#x02013;<lpage>83</lpage>. <pub-id pub-id-type="doi">10.1016/j.sbi.2014.05.006</pub-id><pub-id pub-id-type="pmid">24934869</pub-id></citation></ref>
<ref id="B36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lippa</surname> <given-names>A. M.</given-names></name> <name><surname>Goulian</surname> <given-names>M.</given-names></name></person-group> (<year>2009</year>). <article-title>Feedback inhibition in the PhoQ/PhoP signaling system by a membrane peptide</article-title>. <source>PLoS Genet.</source> <volume>5</volume>:<fpage>e1000788</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pgen.1000788</pub-id><pub-id pub-id-type="pmid">20041203</pub-id></citation></ref>
<ref id="B37">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Martin</surname> <given-names>M.</given-names></name> <name><surname>H&#x000F6;lscher</surname> <given-names>T.</given-names></name> <name><surname>Drago&#x00161;</surname> <given-names>A.</given-names></name> <name><surname>Cooper</surname> <given-names>V. S.</given-names></name> <name><surname>Kov&#x000E1;cs</surname> <given-names>&#x000C1;. T.</given-names></name></person-group> (<year>2016</year>). <article-title>Laboratory evolution of microbial interactions in bacterial biofilms</article-title>. <source>J. Bacteriol.</source> <volume>198</volume>, <fpage>2564</fpage>&#x02013;<lpage>2571</lpage>. <pub-id pub-id-type="doi">10.1128/JB.01018-15</pub-id><pub-id pub-id-type="pmid">27044625</pub-id></citation></ref>
<ref id="B38">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mijakovic</surname> <given-names>I.</given-names></name></person-group> (<year>2020</year>). <article-title>Evolutionary age of genes can assist in genome mining</article-title>. <source>Period. Biol.</source> <volume>121&#x02013;122</volume>, <fpage>3</fpage>&#x02013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.18054/pb.v121-122i1-2.10737</pub-id></citation>
</ref>
<ref id="B39">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Moyers</surname> <given-names>B.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name></person-group> (<year>2016</year>). <article-title>Phylostratigraphic bias creates spurious patterns of genome evolution</article-title>. <source>Mol. Biol. Evol</source>. <volume>33</volume>:<fpage>3031</fpage>. <pub-id pub-id-type="doi">10.1093/molbev/msw202</pub-id><pub-id pub-id-type="pmid">27738272</pub-id></citation></ref>
<ref id="B40">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Moyers</surname> <given-names>B. A.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name></person-group> (<year>2015</year>). <article-title>Phylostratigraphic bias creates spurious patterns of genome evolution</article-title>. <source>Mol. Biol. Evol</source>. <volume>32</volume>, <fpage>258</fpage>&#x02013;<lpage>267</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msu286</pub-id><pub-id pub-id-type="pmid">25312911</pub-id></citation></ref>
<ref id="B41">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mushegian</surname> <given-names>A.</given-names></name></person-group> (<year>2008</year>). <article-title>Gene content of LUCA, the last universal common ancestor</article-title>. <source>Front. Biosci.</source> <volume>13</volume>, <fpage>4657</fpage>&#x02013;<lpage>4666</lpage>. <pub-id pub-id-type="doi">10.2741/3031</pub-id><pub-id pub-id-type="pmid">18508537</pub-id></citation></ref>
<ref id="B42">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Neme</surname> <given-names>R.</given-names></name> <name><surname>Tautz</surname> <given-names>D.</given-names></name></person-group> (<year>2014</year>). <article-title>Evolution: dynamics of de novo gene emergence</article-title>. <source>Curr. Biol.</source> <volume>24</volume>, <fpage>R238</fpage>&#x02013;<lpage>R240</lpage>. <pub-id pub-id-type="doi">10.1016/j.cub.2014.02.016</pub-id><pub-id pub-id-type="pmid">24650912</pub-id></citation></ref>
<ref id="B43">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nicolas</surname> <given-names>P.</given-names></name> <name><surname>M&#x000E4;der</surname> <given-names>U.</given-names></name> <name><surname>Dervyn</surname> <given-names>E.</given-names></name> <name><surname>Rochat</surname> <given-names>T.</given-names></name> <name><surname>Leduc</surname> <given-names>A.</given-names></name> <name><surname>Pigeonneau</surname> <given-names>N.</given-names></name> <etal/></person-group>. (<year>2012</year>). <article-title>Condition-dependent transcriptome reveals high-level regulatory architecture in <italic>Bacillus subtilis</italic></article-title>. <source>Science</source> <volume>335</volume>, <fpage>1103</fpage>&#x02013;<lpage>1106</lpage>. <pub-id pub-id-type="doi">10.1126/science.1206848</pub-id><pub-id pub-id-type="pmid">22383849</pub-id></citation></ref>
<ref id="B44">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Overmars</surname> <given-names>L.</given-names></name> <name><surname>van Hijum</surname> <given-names>S. A.</given-names></name> <name><surname>Siezen</surname> <given-names>R. J.</given-names></name> <name><surname>Francke</surname> <given-names>C.</given-names></name></person-group> (<year>2015</year>). <article-title>CiVi: circular genome visualization with unique features to analyze sequence elements</article-title>. <source>Bioinformatics</source> <volume>31</volume>, <fpage>2867</fpage>&#x02013;<lpage>2869</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btv249</pub-id><pub-id pub-id-type="pmid">25910699</pub-id></citation></ref>
<ref id="B45">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ravikumar</surname> <given-names>V.</given-names></name> <name><surname>Nalpas</surname> <given-names>N. C.</given-names></name> <name><surname>Anselm</surname> <given-names>V.</given-names></name> <name><surname>Krug</surname> <given-names>K.</given-names></name> <name><surname>Lenuzzi</surname> <given-names>M.</given-names></name> <name><surname>&#x00160;estak</surname> <given-names>M. S.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>In-depth analysis of <italic>Bacillus subtilis</italic> proteome identifies new ORFs and traces the evolutionary history of modified proteins</article-title>. <source>Sci. Rep.</source> <volume>8</volume>:<fpage>17246</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-018-35589-9</pub-id><pub-id pub-id-type="pmid">30467398</pub-id></citation></ref>
<ref id="B46">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ricci-Tam</surname> <given-names>C.</given-names></name> <name><surname>Kuipa</surname> <given-names>S.</given-names></name> <name><surname>Kostman</surname> <given-names>M. P.</given-names></name> <name><surname>Aronson</surname> <given-names>M. S.</given-names></name> <name><surname>Sgro</surname> <given-names>A. E.</given-names></name></person-group> (<year>2023</year>). <article-title>Microbial models of development: inspiration for engineering self-assembled synthetic multicellularity</article-title>. <source>Semin. Cell Dev. Biol.</source> <volume>141</volume>, <fpage>50</fpage>&#x02013;<lpage>62</lpage>. <pub-id pub-id-type="doi">10.1016/j.semcdb.2022.04.014</pub-id><pub-id pub-id-type="pmid">35537929</pub-id></citation></ref>
<ref id="B47">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Salinas</surname> <given-names>N.</given-names></name> <name><surname>Povolotsky</surname> <given-names>T. L.</given-names></name> <name><surname>Landau</surname> <given-names>M.</given-names></name> <name><surname>Kolodkin-Gal</surname> <given-names>I.</given-names></name></person-group> (<year>2020</year>). <article-title>Emerging roles of functional bacterial amyloids in gene regulation, toxicity, and immunomodulation</article-title>. <source>Microbiol. Mol. Biol. Rev.</source> <volume>85</volume>, <fpage>e00062</fpage>&#x02013;<lpage>e00020</lpage>. <pub-id pub-id-type="doi">10.1128/MMBR.00062-20</pub-id><pub-id pub-id-type="pmid">33239434</pub-id></citation></ref>
<ref id="B48">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Santos-Zavaleta</surname> <given-names>A.</given-names></name> <name><surname>Salgado</surname> <given-names>H.</given-names></name> <name><surname>Gama-Castro</surname> <given-names>S.</given-names></name> <name><surname>S&#x000E1;nchez-P&#x000E9;rez</surname> <given-names>M.</given-names></name> <name><surname>G&#x000F3;mez-Romero</surname> <given-names>L.</given-names></name> <name><surname>Ledezma-Tejeida</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>RegulonDB v 10.5: tackling challenges to unify classic and high throughput knowledge of gene regulation in <italic>E. coli</italic> K-12</article-title>. <source>Nucl. Acids Res.</source> <volume>47</volume>, <fpage>D212</fpage>&#x02013;<lpage>D220</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gky1077</pub-id><pub-id pub-id-type="pmid">30395280</pub-id></citation></ref>
<ref id="B49">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shi</surname> <given-names>L.</given-names></name> <name><surname>Derouiche</surname> <given-names>A.</given-names></name> <name><surname>Pandit</surname> <given-names>S.</given-names></name> <name><surname>Rahimi</surname> <given-names>S.</given-names></name> <name><surname>Kalantari</surname> <given-names>A.</given-names></name> <name><surname>Futo</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Evolutionary analysis of the <italic>Bacillus subtilis</italic> genome reveals new genes involved in sporulation</article-title>. <source>Mol. Biol. Evol.</source> <volume>37</volume>, <fpage>1667</fpage>&#x02013;<lpage>1678</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msaa035</pub-id><pub-id pub-id-type="pmid">32061128</pub-id></citation></ref>
<ref id="B50">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Stragier</surname> <given-names>P.</given-names></name> <name><surname>Kunkel</surname> <given-names>B.</given-names></name> <name><surname>Kroos</surname> <given-names>L.</given-names></name> <name><surname>Losick</surname> <given-names>R.</given-names></name></person-group> (<year>1989</year>). <article-title>Chromosomal rearrangement generating a composite gene for a developmental transcription factor</article-title>. <source>Science</source> <volume>243</volume>, <fpage>507</fpage>&#x02013;<lpage>512</lpage>. <pub-id pub-id-type="doi">10.1126/science.2536191</pub-id><pub-id pub-id-type="pmid">2536191</pub-id></citation></ref>
<ref id="B51">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tautz</surname> <given-names>D.</given-names></name> <name><surname>Domazet-Lo&#x00161;o</surname> <given-names>T.</given-names></name></person-group> (<year>2011</year>). <article-title>The evolutionary origin of orphan genes</article-title>. <source>Nat. Rev. Genet</source>. <volume>12</volume>, <fpage>692</fpage>&#x02013;<lpage>702</lpage>. <pub-id pub-id-type="doi">10.1038/nrg3053</pub-id><pub-id pub-id-type="pmid">21878963</pub-id></citation></ref>
<ref id="B52">
<citation citation-type="journal"><person-group person-group-type="author"><collab>UniProt Consortium</collab></person-group> (<year>2023</year>). <article-title>UniProt: the universal protein knowledgebase in 2023</article-title>. <source>Nucleic Acids Res</source>. <volume>51</volume>, <fpage>D523</fpage>&#x02013;<lpage>D531</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkac1052</pub-id><pub-id pub-id-type="pmid">36408920</pub-id></citation></ref>
<ref id="B53">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Valens</surname> <given-names>M.</given-names></name> <name><surname>Penaud</surname> <given-names>S.</given-names></name> <name><surname>Rossignol</surname> <given-names>M.</given-names></name> <name><surname>Cornet</surname> <given-names>F.</given-names></name> <name><surname>Boccard</surname> <given-names>F.</given-names></name></person-group> (<year>2004</year>). <article-title>Macrodomain organization of the <italic>Escherichia coli</italic> chromosome</article-title>. <source>EMBO J.</source> <volume>23</volume>, <fpage>4330</fpage>&#x02013;<lpage>4341</lpage>. <pub-id pub-id-type="doi">10.1038/sj.emboj.7600434</pub-id><pub-id pub-id-type="pmid">15470498</pub-id></citation></ref>
<ref id="B54">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>van Sinderen</surname> <given-names>D.</given-names></name> <name><surname>ten Berge</surname> <given-names>A.</given-names></name> <name><surname>Hayema</surname> <given-names>B. J.</given-names></name> <name><surname>Hamoen</surname> <given-names>L.</given-names></name> <name><surname>Venema</surname> <given-names>G.</given-names></name></person-group> (<year>1994</year>). <article-title>Molecular cloning and sequence of <italic>comK</italic>, a gene required for genetic competence in <italic>Bacillus subtilis</italic></article-title>. <source>Mol. Microbiol</source>. <volume>11</volume>, <fpage>695</fpage>&#x02013;<lpage>703</lpage>. <pub-id pub-id-type="doi">10.1111/j.1365-2958.1994.tb00347.x</pub-id><pub-id pub-id-type="pmid">8196543</pub-id></citation></ref>
<ref id="B55">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Kim</surname> <given-names>Y.</given-names></name> <name><surname>Ma</surname> <given-names>Q.</given-names></name> <name><surname>Hong</surname> <given-names>S. H.</given-names></name> <name><surname>Pokusaeva</surname> <given-names>K.</given-names></name> <name><surname>Sturino</surname> <given-names>J. M.</given-names></name> <etal/></person-group>. (<year>2010</year>). <article-title>Cryptic prophages help bacteria cope with adverse environments</article-title>. <source>Nat. Commun.</source> <volume>1</volume>:<fpage>147</fpage>. <pub-id pub-id-type="doi">10.1038/ncomms1146</pub-id><pub-id pub-id-type="pmid">21266997</pub-id></citation></ref>
<ref id="B56">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Weber</surname> <given-names>H.</given-names></name> <name><surname>Polen</surname> <given-names>T.</given-names></name> <name><surname>Heuveling</surname> <given-names>J.</given-names></name> <name><surname>Wendisch</surname> <given-names>V. F.</given-names></name> <name><surname>Hengge</surname> <given-names>R.</given-names></name></person-group> (<year>2005</year>). <article-title>Genome-wide analysis of the general stress response network in <italic>Escherichia coli</italic>: sigmaS-dependent genes, promoters, and sigma factor selectivity</article-title>. <source>J. Bacteriol.</source> <volume>187</volume>, <fpage>1591</fpage>&#x02013;<lpage>1603</lpage>. <pub-id pub-id-type="doi">10.1128/JB.187.5.1591-1603.2005</pub-id><pub-id pub-id-type="pmid">15716429</pub-id></citation></ref>
<ref id="B57">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xia</surname> <given-names>S.</given-names></name> <name><surname>Chen</surname> <given-names>J.</given-names></name> <name><surname>Arsala</surname> <given-names>D.</given-names></name> <name><surname>Emerson</surname> <given-names>J. J.</given-names></name> <name><surname>Long</surname> <given-names>M.</given-names></name></person-group> (<year>2025</year>). <article-title>Functional innovation through new genes as a general evolutionary process</article-title>. <source>Nat. Genet</source>. <volume>57</volume>, <fpage>295</fpage>&#x02013;<lpage>309</lpage>. <pub-id pub-id-type="doi">10.1038/s41588-024-02059-0</pub-id><pub-id pub-id-type="pmid">39875578</pub-id></citation></ref>
<ref id="B58">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ying</surname> <given-names>B. W.</given-names></name> <name><surname>Tsuru</surname> <given-names>S.</given-names></name> <name><surname>Seno</surname> <given-names>S.</given-names></name> <name><surname>Matsuda</surname> <given-names>H.</given-names></name> <name><surname>Yomo</surname> <given-names>T.</given-names></name></person-group> (<year>2014</year>). <article-title>Gene expression scaled by distance to the genome replication site</article-title>. <source>Mol. Biosyst</source>. <volume>10</volume>, <fpage>375</fpage>&#x02013;<lpage>379</lpage>. <pub-id pub-id-type="doi">10.1039/C3MB70254E</pub-id><pub-id pub-id-type="pmid">24336896</pub-id></citation></ref>
<ref id="B59">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhu</surname> <given-names>B.</given-names></name> <name><surname>St&#x000FC;lke</surname> <given-names>J.</given-names></name></person-group> (<year>2018</year>). <article-title>SubtiWiki in 2018: from genes and proteins to functional network annotation of the model organism <italic>Bacillus subtilis</italic></article-title>. <source>Nucl. Acids Res.</source> <volume>46</volume>, <fpage>D743</fpage>&#x02013;<lpage>D748</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkx908</pub-id><pub-id pub-id-type="pmid">29788229</pub-id></citation></ref>
<ref id="B60">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zupancic</surname> <given-names>M. L.</given-names></name> <name><surname>Tran</surname> <given-names>H.</given-names></name> <name><surname>Hofmeister</surname> <given-names>A. E.</given-names></name></person-group> (<year>2001</year>). <article-title>Chromosomal organization governs the timing of cell type-specific gene expression required for spore formation in <italic>Bacillus subtilis</italic></article-title>. <source>Mol. Microbiol</source>. <volume>39</volume>, <fpage>1471</fpage>&#x02013;<lpage>1481</lpage>. <pub-id pub-id-type="doi">10.1046/j.1365-2958.2001.02331.x</pub-id><pub-id pub-id-type="pmid">11260465</pub-id></citation></ref>
</ref-list>
</back>
</article>