<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fgene.2021.654256</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Evolutionary Perspective and Expression Analysis of Intronless Genes Highlight the Conservation of Their Regulatory Role</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Avi&#x00F1;a-Padilla</surname> <given-names>Katia</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/970530/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Ram&#x00ED;rez-Rafael</surname> <given-names>Jos&#x00E9; Antonio</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1249593/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Herrera-Oropeza</surname> <given-names>Gabriel Emilio</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1376950/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Muley</surname> <given-names>Vijaykumar Yogesh</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/534929/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Valdivia</surname> <given-names>Dulce I.</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1255114/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>D&#x00ED;az-Valenzuela</surname> <given-names>Erik</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1234756/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Garc&#x00ED;a-Garc&#x00ED;a</surname> <given-names>Andr&#x00E9;s</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Varela-Echavarr&#x00ED;a</surname> <given-names>Alfredo</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/36185/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Hern&#x00E1;ndez-Rosales</surname> <given-names>Maribel</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c002"><sup>&#x002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/631695/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Instituto de Neurobiolog&#x00ED;a, Universidad Nacional Aut&#x00F3;noma de M&#x00E9;xico</institution>, <addr-line>Quer&#x00E9;taro</addr-line>, <country>Mexico</country></aff>
<aff id="aff2"><sup>2</sup><institution>Centro de Investigacio&#x00EC;n y de Estudios Avanzados del IPN, Unidad Irapuato</institution>, <addr-line>Guanajuato</addr-line>, <country>Mexico</country></aff>
<aff id="aff3"><sup>3</sup><institution>Centro de F&#x00ED;sica Aplicada y Tecnolog&#x00ED;a Avanzada, Universidad Nacional Aut&#x00F3;noma de M&#x00E9;xico</institution>, <addr-line>Quer&#x00E9;taro</addr-line>, <country>Mexico</country></aff>
<aff id="aff4"><sup>4</sup><institution>Centre for Developmental Neurobiology, Institute of Psychiatry, Psychology, and Neuroscience, King&#x2019;s College London</institution>, <addr-line>London</addr-line>, <country>United Kingdom</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Katja Nowick, Freie Universit&#x00E4;t Berlin, Germany</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Jaros&#x0142;aw Bryk, University of Huddersfield, United Kingdom; Scott William Roy, San Francisco State University, United States; Ekaterina Shelest, University of Portsmouth, United Kingdom</p></fn>
<corresp id="c001">&#x002A;Correspondence: Alfredo Varela-Echavarr&#x00ED;a, <email>avarela@unam.mx</email></corresp>
<corresp id="c002">Maribel Hern&#x00E1;ndez-Rosales, <email>maribel.hr@cinvestav.mx</email></corresp>
<fn fn-type="other" id="fn004"><p>This article was submitted to Evolutionary and Population Genetics, a section of the journal Frontiers in Genetics</p></fn>
</author-notes>
<pub-date pub-type="epub">
<day>09</day>
<month>07</month>
<year>2021</year>
</pub-date>
<pub-date pub-type="collection">
<year>2021</year>
</pub-date>
<volume>12</volume>
<elocation-id>654256</elocation-id>
<history>
<date date-type="received">
<day>15</day>
<month>01</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>01</day>
<month>06</month>
<year>2021</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2021 Avi&#x00F1;a-Padilla, Ram&#x00ED;rez-Rafael, Herrera-Oropeza, Muley, Valdivia, D&#x00ED;az-Valenzuela, Garc&#x00ED;a-Garc&#x00ED;a, Varela-Echavarr&#x00ED;a and Hern&#x00E1;ndez-Rosales.</copyright-statement>
<copyright-year>2021</copyright-year>
<copyright-holder>Avi&#x00F1;a-Padilla, Ram&#x00ED;rez-Rafael, Herrera-Oropeza, Muley, Valdivia, D&#x00ED;az-Valenzuela, Garc&#x00ED;a-Garc&#x00ED;a, Varela-Echavarr&#x00ED;a and Hern&#x00E1;ndez-Rosales</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>The structure of eukaryotic genes is generally a combination of exons interrupted by intragenic non-coding DNA regions (introns) removed by RNA splicing to generate the mature mRNA. A fraction of genes, however, comprise a single coding exon with introns in their untranslated regions or are intronless genes (IGs), lacking introns entirely. The latter code for essential proteins involved in development, growth, and cell proliferation and their expression has been proposed to be highly specialized for neuro-specific functions and linked to cancer, neuropathies, and developmental disorders. The abundant presence of introns in eukaryotic genomes is pivotal for the precise control of gene expression. Notwithstanding, IGs exempting splicing events entail a higher transcriptional fidelity, making them even more valuable for regulatory roles. This work aimed to infer the functional role and evolutionary history of IGs centered on the mouse genome. IGs consist of a subgroup of genes with one exon including coding genes, non-coding genes, and pseudogenes, which conform approximately 6% of a total of 21,527 genes. To understand their prevalence, biological relevance, and evolution, we identified and studied 1,116 IG functional proteins validating their differential expression in transcriptomic data of embryonic mouse telencephalon. Our results showed that overall expression levels of IGs are lower than those of MEGs. However, strongly up-regulated IGs include transcription factors (TFs) such as the class 3 of POU (HMG Box), <italic>Neurog1, Olig1</italic>, and <italic>BHLHe22, BHLHe23</italic>, among other essential genes including the &#x03B2;-cluster of protocadherins. Most striking was the finding that IG-encoded <italic>BHLH</italic> TFs fit the criteria to be classified as microproteins. Finally, predicted protein orthologs in other six genomes confirmed high conservation of IGs associated with regulating neural processes and with chromatin organization and epigenetic regulation in <italic>Vertebrata</italic>. Moreover, this study highlights that IGs are essential modulators of regulatory processes, such as the Wnt signaling pathway and biological processes as pivotal as sensory organ developing at a transcriptional and post-translational level. Overall, our results suggest that IG proteins have specialized, prevalent, and unique biological roles and that functional divergence between IGs and MEGs is likely to be the result of specific evolutionary constraints.</p>
</abstract>
<kwd-group>
<kwd>intronless genes</kwd>
<kwd>exon-intron architecture</kwd>
<kwd>embryonic telencephalon</kwd>
<kwd>protocadherins</kwd>
<kwd>histones</kwd>
<kwd>transcription factors</kwd>
<kwd>microproteins</kwd>
<kwd>evolutionary histories</kwd>
</kwd-group>
<counts>
<fig-count count="10"/>
<table-count count="2"/>
<equation-count count="0"/>
<ref-count count="75"/>
<page-count count="18"/>
<word-count count="0"/>
</counts>
</article-meta>
</front>
<body>
<sec id="S1">
<title>Introduction</title>
<p><bold>Most euk</bold>aryotic genes contain introns, nucleotide DNA sequences that after transcription as part of the messenger RNA are removed by splicing during its maturation. Since the introns interrupt the multiple exonic sequences, these genes are thus termed multiple exon genes (MEGs). Eukaryotic genomes, however, also contain an important proportion of genes in which the coding sequence is contained within a single exon. Diverse studies of genes of this type have been performed over the past decades and have been variously referred to as &#x201C;single-exon genes&#x201D; (SEGs) and &#x201C;intronless genes&#x201D; (IGs), both terms carrying some ambiguity as genes containing an intron in their 5<bold>&#x2032; UTR are often</bold> included among them (<xref ref-type="bibr" rid="B61">Sunahara et al., 1990</xref>; <xref ref-type="bibr" rid="B16">Gentles and Karlin, 1999</xref>; <xref ref-type="bibr" rid="B53">Sakharkar et al., 2002</xref>, <xref ref-type="bibr" rid="B52">2005a</xref>, <xref ref-type="bibr" rid="B51">2006</xref>; <xref ref-type="bibr" rid="B65">Tine et al., 2011</xref>; <xref ref-type="bibr" rid="B75">Zou et al., 2011</xref>; <xref ref-type="bibr" rid="B72">Yan et al., 2014</xref>). For example, a recent ontology defines SEGs as nuclear genes with functional protein-coding capacity whose coding sequence comprises only one exon, thus including genes with introns in their untranslated regions termed uiSEGs, as well as those lacking introns entirely, termed &#x201C;<italic>Intronless Genes&#x201D;</italic> (<xref ref-type="bibr" rid="B26">Jorquera et al., 2018</xref>). Pseudogenes, functional RNAs, tRNA, rRNA, ribozyme long non-coding RNAs, and miRNAs are excluded from this definition. To avoid any possible ambiguity, in this article we use the term &#x201C;Intronless genes&#x201D; in the narrow definition of <xref ref-type="bibr" rid="B26">Jorquera et al. (2018)</xref> as protein-coding nuclear genes completely devoid of introns.</p>
<p>Owing to their prokaryotic-like architecture, IGs in eukaryotic genomes, provide interesting datasets for computational analysis in comparative genomics and for the study of evolutionary trajectories. Comparative analysis of their sequences in different genomes could allow the identification of their unique and conserved features, thus providing insights into the role of introns in gene evolution leading to a better understanding of genome architecture and arrangement.</p>
<p>The abundant presence of introns in most genes of multicellular organisms entails regulatory processes associated with the generation of multiple splice variants missing in intronless genes. The absence of splicing events in IGs represents a higher transcriptional fidelity, making them even more valuable for regulatory roles. To date, more than 2000 genes with a single coding exon in the human genome have been classified (<xref ref-type="bibr" rid="B27">Jorquera et al., 2016</xref>). Among them, a considerable fraction of IGs encode G-protein-coupled receptors (GPCRs), core canonical histones which are integral part of nucleosomes and often confer specific structural and functional features, transcription factors, proteins involved in signal transduction, regulation of development, growth, and cell proliferation (<xref ref-type="bibr" rid="B52">Sakharkar et al., 2005a</xref>; <xref ref-type="bibr" rid="B18">Grzybowska, 2012</xref>).</p>
<p>The expression of IGs has been proposed to be highly specialized for neural functions and linked to diseases such as cancer, neuropathies, and developmental disorders. Examples of IGs with clinical relevance are the <italic>RPRM</italic> gene related to gastric cancer which causes increased cell proliferation and possesses tumor suppression activity (<xref ref-type="bibr" rid="B1">Amigo et al., 2018</xref>) and the protein kinase <italic>CK2</italic>&#x03B1; gene which is up-regulated in all human cancers (<xref ref-type="bibr" rid="B24">Hung et al., 2010</xref>). Other IGs linked to cancer include <italic>CLDN8</italic> in colorectal carcinoma and renal cell tumors, <italic>ARLTS1</italic> in melanoma, and <italic>PURA</italic> and <italic>TAL2</italic> in leukemia (<xref ref-type="bibr" rid="B18">Grzybowska, 2012</xref>). IGs have also been associated with neuropathies, such as <italic>ECDR1</italic>, a cerebellar degeneration-related protein, and <italic>NPBWR2</italic>, a neuropeptide B/W receptor type (<xref ref-type="bibr" rid="B39">Louhichi et al., 2011</xref>).</p>
<p>Regarding their role in the diseases described above, IGs in humans are potential clinical biomarkers and drug targets that deserve careful consideration (<xref ref-type="bibr" rid="B46">Ohki et al., 2000</xref>; <xref ref-type="bibr" rid="B18">Grzybowska, 2012</xref>; <xref ref-type="bibr" rid="B36">Liu et al., 2017</xref>). Their functional role and their evolutionary conservation in other genomes, however, remains poorly understood. Furthermore, there is a current debate between related theories that place the origin of introns, early or late during the evolutionary history of eukaryotes (<xref ref-type="bibr" rid="B9">de Souza, 2003</xref>; <xref ref-type="bibr" rid="B14">Fedorova and Fedorov, 2003</xref>).</p>
<p>With this backdrop, our work aimed to characterize the functional role of mouse IGs and to infer their evolutionary pattern across six additional vertebrate genomes. We have analyzed their expression, particularly during brain development at early embryonic stages, and their potential as transcriptional as well as post-translational modulators.</p>
<p>Overall, this study sheds light on the concerted role played by this peculiar group of genes and helps contrast the functional features of intron-containing and intronless genes across vertebrate species and their collective evolutionary roadmaps.</p>
</sec>
<sec id="S2" sec-type="materials|methods">
<title>Materials and Methods</title>
<sec id="S2.SS1">
<title>Data Extraction and Curation for MEGs and IGs</title>
<p>Data were extracted using Python scripts<sup><xref ref-type="fn" rid="footnote1">1</xref></sup> and Ensembl APIs. Seven vertebrate genomes including <italic>Mus musculus, Homo sapiens, Pan troglodytes, Monodelphis domestica, Rattus norvegicus, Gallus gallus</italic>, and <italic>Danio rerio</italic> assembled at a chromosome level were accessed at the Ensembl REST API platform<sup><xref ref-type="fn" rid="footnote2">2</xref></sup> (using Python with the ensembl_rest package). For an explanation of the species choice see section &#x201C;<italic>Search for Orthologs of Mouse IGs.</italic>&#x201D; The pipeline process was as follows: protein-coding genes with CDS identifiers for transcripts for all chromosomes were retrieved and classified into a temporary dataset that contained genes with a single coding exon (temp-DS1) and a dataset containing &#x201C;multiple exon genes&#x201D; (MEGs) depending on exon and transcript count (<xref ref-type="fig" rid="F1">Figure 1</xref>). The former was then submitted to the Intron DB<sup><xref ref-type="fn" rid="footnote3">3</xref></sup> to filter out genes with UTR introns. The output of the pipeline was a second temporary dataset containing protein-coding genes that did not contain introns in their entire length (temp-DS2). After data extraction, a manual curation step was performed to assess their nuclear nature and protein-coding transcript biotype, which allowed us to discard proteins encoded in the mitochondrial genome, hence yielding the final dataset containing only protein-coding nuclear genes completely devoid of introns, or &#x201C;Intronless genes&#x201D; (IG) (<xref ref-type="fig" rid="F1">Figure 1</xref>). The final MEG dataset contained 20,694 protein-coding genes with two or more exons and the IG dataset contained 1,116 protein-coding genes with only one exon and one transcript.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption><p>Bioinformatics pipeline for generating IG and MEG datasets. Automatized steps (represented as 1&#x2013;4): all protein-coding genes were retrieved for each species by accessing the Ensembl REST API platform. The number of exons for each gene were counted, and genes with two or more exons were identified as MEGs. Genes with one single exon were identified as SEGs and were subsequently filtered by their number of transcripts, keeping only those with one transcript. Using the IntronDB, we then classified these genes according to the presence of introns within their UTRs. Genes with introns within these regions were identified as MEGs and those without, as IGs. Manual curation was performed to filter out mitochondrial genes and genes with incomplete protein annotations (step 5). Output files are depicted as green squares.</p></caption>
<graphic xlink:href="fgene-12-654256-g001.tif"/>
</fig>
</sec>
<sec id="S2.SS2">
<title>Computational Prediction of Mouse Intronless Gene Function</title>
<p>The mouse IG and MEG datasets were used to perform an over-representation analysis of functional assignment using the following databases: SUPERFAMILY<sup><xref ref-type="fn" rid="footnote4">4</xref></sup> (proteins of known three-dimensional structure); Pfam<sup><xref ref-type="fn" rid="footnote5">5</xref></sup> (protein domains); and PROSITE<sup><xref ref-type="fn" rid="footnote6">6</xref></sup> (biologically meaningful signatures or motifs). All tests used MEGs as controls to determine unique, shared or overrepresented features among both types of genes. For data visualization of SUPERFAMILY and Pfam results we employed the ClusterProfiler R package (<xref ref-type="bibr" rid="B73">Yu et al., 2012</xref>), and Python scripts using a hypergeometric test for PROSITE enrichment.</p>
</sec>
<sec id="S2.SS3">
<title>Functional Enrichment Analysis of IG and MEG Proteins</title>
<p>The functional enrichment analyses were performed using Metascape<sup><xref ref-type="fn" rid="footnote7">7</xref></sup> for the biological process category, including KEGG and Reactome pathways. First, the functional enrichment of the 1,116 mouse IG proteins was performed using all mouse proteins as a background &#x201C;universe&#x201D; (selecting input as species: <italic>M. musculus</italic>, analysis as species: <italic>M. musculus</italic>). Then, in a second approach the meta-analysis workflow was used to compare enriched terms for the list of orthologs of mouse IGs regarding their grouping into five <italic>&#x201C;ages,&#x201D;</italic> each of them corresponding to one of the taxonomic categories: <italic>Vertebrata</italic>, <italic>Tetrapoda</italic>, <italic>Theria</italic>, <italic>Eutheria</italic>, and <italic>Muridae</italic>.</p>
<p>Out of the 1,116 IGs, 543 have orthologs across the analyzed species. Mouse IG orthologs that conserved the IG structure were compared against MEGs conserved as MEGs in other organisms. The meta-analysis workflow was used to compare enriched terms for the list of mouse orthologs in the aforementioned genomes to the pathways of three random samples of the same size of multi-exon genes to confirm that we obtained similar results. For this analysis, orthologs were clustered regarding their previously inferred <italic>&#x201C;age&#x201D;</italic> in five groups (selecting input as species: any species, analysis as species: <italic>M. musculus</italic>).</p>
<p>Finally, we performed a third approach to determine the conservation of the functional role of IGs. First, we determined the overrepresented GO terms for biological processes and molecular function of the orthologs from the seven genomes using AmiGO2<sup><xref ref-type="fn" rid="footnote8">8</xref></sup> were obtained. Then, GO terms with their corresponding <italic>p-</italic>values were clusterized using REVIGO which finds a representative subset of the terms using an algorithm that relies on semantic similarity measures (<xref ref-type="bibr" rid="B62">Supek et al., 2011</xref>).</p>
</sec>
<sec id="S2.SS4">
<title>Data Source and Differential Expression Analysis</title>
<p>Read counts from a previous transcriptomic analysis of mouse embryonic telencephalon were used to identify differentially expressed genes (<xref ref-type="bibr" rid="B43">Muley et al., 2020</xref>). The transcriptomes were obtained using the Illumina HiSeq RNA sequencing (RNA-seq) platform. The procedure for read-counts normalization, and to calculate differential expression analysis is described in <ext-link ext-link-type="uri" xlink:href="https://data.mendeley.com/datasets/rdt5757cbw/1">https://data.mendeley.com/datasets/rdt5757cbw/1</ext-link>. A gene was considered expressed if its count-per-million (CPM) value was above 5.66-7. Mouse IG and MEG datasets were submitted to analysis to determine the directionality of the change in expression at developmental stage A (E.9.5) compared to stage B (E.10.5). Genes having significant <italic>p</italic>-values with positive log2&#x2013;fold change represent an increased expression (UP), those with negative log2-fold change are considered with decreased expression (DN), while gene expression with <italic>p</italic>-values above 0.05 represents no change between stages (NC), and read-count lower than five in less than four samples out of eight are considered not expressed (NE).</p>
</sec>
<sec id="S2.SS5">
<title>Functional Enrichment Analysis of Differentially Expressed IGs and MEGs</title>
<p>The functional enrichment was assessed using the over-representation analysis of the functional assignment. Genes with differential expression up to two log2-fold change values were considered as up-regulated with a <italic>p</italic>- and <italic>q</italic>-value set at 0.05 and 0.10, respectively. The ClusterProfiler R package (<xref ref-type="bibr" rid="B73">Yu et al., 2012</xref>) was employed for data analysis and visualization.</p>
</sec>
<sec id="S2.SS6">
<title>Post-translational Modifications and Regulatory Assignment of IG Proteins</title>
<p>For post-translational modification assignments of IG and MEG proteins, the dbPTM<sup><xref ref-type="fn" rid="footnote9">9</xref></sup> was used. A two proportion <italic>Z</italic>-test was used to assess whether the proportions of each post-translational modification among IG and MEG proteins were similar. The <italic>p</italic>-value was set at 0.05. When the resulting <italic>p</italic>-value was not significant, meaning that the proportions of IG and MEG proteins were similar for a specific post-translational modification, this was classified as &#x201C;similar.&#x201D; On the other hand, when the resulting <italic>p</italic>-value was &#x003C;0.05 the post-translational modification was classified as more abundant in &#x2018;&#x2018;IG&#x2019;&#x2019; or &#x2018;&#x2018;MEG&#x2019;&#x2019; depending on which one had a higher relative percentage of such modification. Post-translational modifications exclusive of either IG or MEG proteins were classified as &#x2018;&#x2018;unique.&#x2019;&#x2019; Then, using the miPFinder program<sup><xref ref-type="fn" rid="footnote10">10</xref></sup>, we determined the mouse gene candidates for IG-encoding microproteins.</p>
</sec>
<sec id="S2.SS7">
<title>Search for Orthologs of Mouse IGs</title>
<p>Mouse peptide sequences were submitted to Proteinortho (<xref ref-type="bibr" rid="B31">Lechner et al., 2011</xref>, <xref ref-type="bibr" rid="B32">2014</xref>) to infer orthologous genes in the genomes of rat, human, chimp, opossum, chicken, and zebrafish. As a first step, Proteinortho performs sequence comparison between each pair of genomes and reports best bidirectional hits (BBHs) for alignments with equal or above fifty percent of sequence identity. In a second step, it represents each gene or protein as a node of a graph and places an edge between two genes if they were identified as a BBH, then, it applies a clustering algorithm and finally reports orthogroups and the orthology relations as pairs of genes in two different genomes.</p>
<p>Each of the species used in this study is a model organism of a different taxonomic level, and therefore, the conservation of mouse orthologs in close or distant related species reveals the &#x201C;age&#x201D; of the gene. The conservation was measured by gradually including more species, and the resulting groups were labeled with the name of the largest taxonomical category that includes all species within a group. Therefore, for orthologs of mouse IG genes that were identified in rat, they are said to be conserved in <italic>Muridae;</italic> those present in <italic>Muridae</italic>, in human, and in chimp are said to be conserved in the group <italic>Eutheria;</italic> those found in all the previous species and in opossum as well are conserved in the group <italic>Theria;</italic> those also present in chick are conserved in <italic>Tetrapoda</italic>; and finally, those also conserved in zebrafish are conserved in <italic>Vertebrata</italic>. This classification, however, is only used to refer to the conservation among the species analyzed in the present study.</p>
</sec>
<sec id="S2.SS8">
<title>Reconstruction of the Evolutionary History of Mouse IGs and Their Conservation in Other Organisms</title>
<p>From the ProteinOrtho predictions, orthology graphs were constructed, and an in-house developed method for the evolutionary reconstruction of gene families was used. This method implements the theory reported in <xref ref-type="bibr" rid="B23">Hernandez-Rosales et al. (2012)</xref> and <xref ref-type="bibr" rid="B22">Hellmuth et al. (2013)</xref>, and it can be found at <ext-link ext-link-type="uri" xlink:href="https://gitlab.com/jarr.tecn/revolutionh-tl">https://gitlab.com/jarr.tecn/revolutionh-tl</ext-link>. This tool starts by performing a modular decomposition (<xref ref-type="bibr" rid="B64">Tedder et al., 2008</xref>) on orthology graphs and then inferring the corresponding gene trees. Each internal node of these trees represents an evolutionary event: duplication or speciation. Subsequently, the gene trees are reconciled with the species tree to determine in which branch of the species tree duplication events occur and, at the same time, infer gene losses. This method allows us to infer how ancestral a gene is, determined by its orthologs in the other species, as well as to identify species-specific genes. Finally, we identified the orthologs of the 1,116 mouse IG proteins that were conserved as IGs, or that were identified as MEGs in the abovementioned genomes.</p>
</sec>
<sec id="S2.SS9">
<title>Syntenic Conservation of the &#x03B2;-Protocadherin Cluster</title>
<p>To determine the syntenic conservation of the mouse protocadherin IG members of the beta cluster across the selected genomes, the genomic coordinates of orthologs genes were retrieved from GTF files employing custom R scripts and plotted using the genoPlotR R package.</p>
</sec>
</sec>
<sec id="S3">
<title>Results</title>
<sec id="S3.SS1">
<title>Functional Assignment of Protein Coding IGs in the Mouse Genome</title>
<p>The origin of IGs has been explained mostly by retrotransposition, which occurs by homologous recombination between the genomic copy of a gene and an intronless cDNA (<xref ref-type="bibr" rid="B28">Kaessmann et al., 2009</xref>). Mouse IGs represent 6% of the total number of one-exon genes, while retrotransposed single-exon pseudogenes with lost molecular function constitute almost half of them (coding, non-coding, pseudogenes) (<xref ref-type="supplementary-material" rid="FS1">Supplementary Figure 1</xref>).</p>
<p>Computational analysis was performed to identify mouse IGs. Then, based on the comparative annotation of IG and MEG datasets, a study was performed to identify their unique and shared molecular and biological features.</p>
<p>The grouping of IGs by protein domains that have an evolutionary relationship (SUPERFAMILY database) revealed a higher enrichment of the histone fold, 4-helical cytokine family of signal transducers, and transcription factor families including the Poxvirus and Zinc finger (POZ) domain, &#x201C;Winged helix&#x201D; DNA-binding domain, High Mobility Box group (HMG-box), and A DNA-binding domain in eukaryotes, as well as the transmembrane protein families Cadherin-like, and Frizzled cysteine-rich domain. MEG-encoded proteins, in contrast, are enriched in protein kinase-like, immunoglobulin, Kr&#x00FC;ppel associated box (KRAB) domain, and Armadillo repeat motifs (ARM) repeat families. The top enriched structural families of IG and MEG groups are shown in <xref ref-type="supplementary-material" rid="FS2">Supplementary Figure 2</xref>.</p>
<p>The analysis of the conserved functional domains (Pfam database) among the enriched protein families encoded by mouse IGs, revealed 598 hits of three main classes: 253 were transmembrane protein receptors, 101 core histones, 84 transcription factors, and 160 that belong to other classes (<xref ref-type="fig" rid="F2">Figure 2A</xref>). Among the transmembrane protein receptors, the most enriched domains were Taste 2 receptors (TAS2R), Vomer-nasal 1 receptors (V1R), and seven transmembrane group 1 (7tm_1), common in GPCR and vomeronasal receptors (<xref ref-type="fig" rid="F2">Figure 2B</xref>). Other domains identified were cadherin, Pheripheral myelin protein 22 (PMP22-claudin), (Desintegrin and metalloproteinase domain (ADAM), Disintegrin, and Frizzled (FZ). In the transcription factor group, Broad-Complex, tramtrack, and bric- -brac (BTB), Myb DNA-binding, HMG-Box, and forkhead were enriched protein domains in the mouse IGs compared to MEGs (<xref ref-type="fig" rid="F2">Figure 2C</xref>). Meanwhile, in the histone group, four domains were observed: Histone, Histone H2A 1363 C-terminal (H2AC), Centromere kinetochore component CENP-T histone (CENPTC), and Linker histone (<xref ref-type="fig" rid="F2">Figure 2D</xref>). Finally, in the other groups we found among others Keratin, Interferon, Ubiquitin, Actin, and FYTT enriched domains (<xref ref-type="fig" rid="F2">Figure 2E</xref>). The classification of IGs in functional groups mentioned above was then used for the transcriptional analysis (<xref ref-type="fig" rid="F2">Figure 2</xref>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption><p>Enrichment of the Pfam protein domains in mouse IG proteins. <bold>(A)</bold> Enriched domains of proteins grouped into four main classes, transmembrane proteins (red), histones (blue), transcription factors (yellow), and others (gray). Enrichment of domain terms was calculated with a significant background list associated with the Pfam domain and p-adjusted values above 0.05. <bold>(B)</bold> Pfam domains grouped in the <italic>transmembrane proteins</italic> class, <bold>(C)</bold> Pfam domains grouped in the <italic>histone</italic> class, <bold>(D)</bold> Pfam domains grouped in the <italic>transcription factors</italic> class, <bold>(E)</bold> Pfam domains considered as <italic>others</italic>.</p></caption>
<graphic xlink:href="fgene-12-654256-g002.tif"/>
</fig>
<p>Analysis of biologically significant motifs (PROSITE database) among MEG and IG proteins identified a total of 1,239 (12,546 hits) and 144 (634 hits) distinct protein signatures, respectively. Interestingly, among the most abundant motifs in the mouse IGs are GPCR, leucine-rich repeat, histone, transcription factor Forkhead domain, Myc-type basic helix-loop-helix (bhlh) motifs, ankyrins, and cadherin domains which were found to be infrequent in MEG proteins (<xref ref-type="fig" rid="F3">Figure 3A</xref>). It is noteworthy, however, that among the top motifs that were unique to IG proteins H2B signature was the largest group (<xref ref-type="fig" rid="F3">Figure 3B</xref>). Hence, these results show that most of the top predictions of IGs signatures are characteristic of transmembrane receptors, histones, and specific transcription factors, having a unique signature for histones.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption><p>Enrichment of the PROSITE signatures in mouse IG proteins compared to MEG proteins. <bold>(A)</bold> The 15 most abundant PROSITE signatures of IG proteins. <bold>(B)</bold> PROSITE signatures exclusive of IG proteins (signatures belonging to the <italic>Histones</italic> class are shown in blue).</p></caption>
<graphic xlink:href="fgene-12-654256-g003.tif"/>
</fig>
<p>Finally, the functional enrichment of mouse IGs revealed biological pathways associated with genetic and protein regulatory processes including detection of chemical stimulus involved in sensory perception of the bitter taste, chromatin silencing, positive regulation of peptidyl-serine phosphorylation of STAT proteins, and nucleosome positioning (&#x2013;log10&#x2013;34.23 &#x003E; &#x2212;3.27). Other functions detected were immune, neuro-specific, and development processes such as mmu05322-Systemic lupus erythematosus, R-MMU-6805567 Keratinization, R-MMU-500792 GPCR ligand binding, R-MMU-1266695 Interleukin 7 signaling, hard palate development, and noradrenergic neuron differentiation (&#x2013;log10&#x2013;29.94 &#x003E; &#x2212;3.75) (<xref ref-type="fig" rid="F4">Figure 4</xref>).</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption><p>Functional enrichment of mouse IG proteins. Gene ontology enrichment and pathways for IG proteins. The color key from yellow to brown indicates high to low <italic>p</italic>-values, respectively.</p></caption>
<graphic xlink:href="fgene-12-654256-g004.tif"/>
</fig>
<p>Altogether, functional assignment analysis suggests that IGs have distinct biological roles in comparison to MEGs.</p>
</sec>
<sec id="S3.SS2">
<title>Up-Regulation of IGs Reveal Their Regulatory Role on Neural Functions Through Mouse Development</title>
<p>Previous studies detected enrichment of neural-related functions among IGs (<xref ref-type="bibr" rid="B18">Grzybowska, 2012</xref>). Moreover, since the expression of MEGs is modulated by the balance between the rate of transcription elongation and the alternative splicing of exons (<xref ref-type="bibr" rid="B15">Fong et al., 2014</xref>), we hypothesized that the natural absence of splicing on IG mRNAs could confer them differential regulatory roles in complex biological processes. Therefore, it was our interest to identify and analyze IGs that are expressed in mice during brain development. For that purpose, we analyzed expression data from the developing mouse telencephalon at stages in which its patterning is taking place (E9.5 and E10.5) (<xref ref-type="bibr" rid="B43">Muley et al., 2020</xref>).</p>
<p>Overall, the expression of IGs was lower than that of MEGs (<xref ref-type="fig" rid="F5">Figure 5A</xref>), which is consistent with previous <italic>in silico</italic> observations (<xref ref-type="bibr" rid="B52">Sakharkar et al., 2005a</xref>). Out of 1,116 transcripts, differential expression analysis was performed for 1,087, with 37 of them (3.4%) showing up-regulation and nine down-regulation (0.82%) from gestational day E9.5 to E10.5 Moreover, 387 (35.63%) did not change expression, and 653 (60.12%) were not expressed during the analyzed stages (<xref ref-type="fig" rid="F5">Figure 5B</xref>). Meanwhile, among MEGs, 1247 were up-regulated (6.13%), 789 were down-regulated (3.88%), 13,198 had no expression changes (64.93%), and 5090 did not show expression (25.04%) (<xref ref-type="fig" rid="F5">Figure 5B</xref>). It is noteworthy that an inverse expression pattern of genes with no expression changes (a higher percentage of MEGs than of IGs) and those not expressed (a higher percentage of IGs than of MEGs) was found in this comparison.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption><p>Expression levels of IGs in mouse embryonic telencephalon compared to MEGs. <bold>(A)</bold> IG and MEG normalized expression at 9.5 and 10.5 embryonic stages; <bold>(B)</bold> IG and MEG, gene differential expression groups in 9.5 and 10.5 stages, (UP) upregulated, (DN) downregulated, (NC) no expression changes, (NE) not expressed. Enrichment of functions in upregulated IGs <bold>(C)</bold> and MEGs <bold>(D)</bold>. Each barplot shows significantly enriched gene ontology (GO) terms on the y-axis, colored by their <italic>p</italic>-adjusted values, while gene count is represented on the x-axis. Test gene sets for enrichment analysis were the up-regulated genes in each dataset, and the background set was all up-regulated genes. GO terms are grouped by BP (biological process), CC (cellular component), and MF (molecular function).</p></caption>
<graphic xlink:href="fgene-12-654256-g005.tif"/>
</fig>
<p>Our analysis revealed that all up-regulated IGs are exclusively enriched in biological pathways in eye and sensory organ development processes compared to MEGs also involved in other developmental and neural function pathways (<xref ref-type="fig" rid="F5">Figures 5C,D</xref>). Moreover, significantly enriched terms in molecular function found for up-regulated IGs are consistent with their regulatory role, including rRNA methyltransferase and DNA-binding transcription repressor activities. In contrast, in the up-regulated MEGs the molecular function terms are highly enriched for transmembrane transporters and channel voltage activities (<xref ref-type="fig" rid="F5">Figure 5D</xref>).</p>
<p>From the IG transmembrane protein group, transcripts for <italic>Tram1l1, Cdk5r2, Nrarp, Kcnf1, Fzd7, Fzd8, Fzd10</italic>, and <italic>Cldn5</italic> were up-regulated. Strikingly, from the cluster of 22 &#x03B2;<italic>-</italic>protocadherins (pcdhbs), which contains 18 IGs, 9 of these were among those up-regulated in the E10.5 telencephalon (<italic>Pcdhb3, Pcdhb4, Pcdhb7, Pcdhb10, Pcdhb11, Pcdhb17, Pcdhb19, Pcdhb20</italic>, and <italic>Pcdhb21</italic>) (<xref ref-type="fig" rid="F6">Figures 6A</xref>, <xref ref-type="fig" rid="F7">7A</xref>). It is important to note that all but one of all protocadherins of this cluster were expressed in the developing telencephalon. Our expression analysis additionally revealed up-regulation of <italic>Olig1, Bhlhe22, Bhlhe23, Pou3f1, Pou3f2, Pou3f4, Foxq1</italic>, and <italic>Neurog1</italic>, most of which are BHLH transcription factors crucial for the regulation of brain development and neuro-specific functions (<xref ref-type="fig" rid="F6">Figure 6B</xref>). Moreover, regarding IGs within the histone group, <italic>H2bc21, H2bu2, H2aw</italic> were up-regulated during the mouse embryonic stages (<xref ref-type="fig" rid="F6">Figure 6C</xref>). Finally, IGs from other groups with up-regulation were also observed (<xref ref-type="fig" rid="F6">Figure 6D</xref>).</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption><p>Differentially expressed IGs in the mouse embryonic telencephalon. Expression of genes grouped by their Pfam assignment, determined by their log2-fold change values. Up-regulated genes are highlighted in green, while downregulated genes are colored in red. <bold>(A)</bold> Gene expression of transmembrane proteins, <bold>(B)</bold> histones, <bold>(C)</bold> transcription factors, and <bold>(D)</bold> other protein families.</p></caption>
<graphic xlink:href="fgene-12-654256-g006.tif"/>
</fig>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption><p>Expression and evolution analysis of the mouse cluster of &#x03B2;-protocadherins. <bold>(A)</bold> Cluster of the 22 &#x03B2;-protocadherins in the chromosome 18 depicting in black up-regulated genes (names highlighted in green) in telencephalon between embryonic stages 9.5 and 10.5, in gray expressed genes with no relative changes between stages, and in white not expressed genes; <bold>(B)</bold> Syntenic map of the approximately 300 kb &#x03B2;-protocadherin locus across five mammalian lineages and chicken as an outgroup. Colored lines depict orthology relationships across the phylogenetic tree. Genes connected by the same color belong to the same orthogroup identified by ProteinOrtho. Genes are shown as black squares for single-copy orthologs and gray squares for expanded genes; <bold>(C)</bold> Reconciliation tree of the protocadherin gene trees and the species tree. Each gene tree represents an orthogroup, internal nodes represent evolutionary events (blue squares represent duplications, red bullets represent speciations) and black crosses in leaves represent inferred gene loss events; <bold>(D)</bold> Pairwise syntenic comparisons of the mouse &#x03B2;-protocadherin locus to four mammalian genomes and chick highlighting the lineage-specific loss and expansions of the <italic>Pcdhb18</italic> gene.</p></caption>
<graphic xlink:href="fgene-12-654256-g007.tif"/>
</fig>
</sec>
<sec id="S3.SS3">
<title>The &#x03B2;-Protocadherin Gene Cluster Displays a High Degree of Syntenic Conservation Across Mammalian Genomes</title>
<p>To gain further insight on the evolutionary conservation of IGs with a functional role in telencephalon patterning, we studied the syntenic conservation of the ortholog genes of the 18 mouse IG &#x03B2;<italic>-</italic>protocadherins (<xref ref-type="fig" rid="F7">Figure 7A</xref>) in our set of seven species. We determined that human, chimp, rat, opossum, and chick contain orthologs of the mouse single exon <italic>pcdhb</italic> genes, which are absent in zebrafish. Overall, all the orthologous genes of the cluster are located in a single locus in their respective genomes, with varying lengths ranging from &#x223C;128 to 310 kb, displaying a few local inversions (<xref ref-type="fig" rid="F7">Figure 7B</xref>). These results are consistent with previous studies that have explored the syntenic conservation of <italic>pcdhb</italic> genes across other vertebrate species (<xref ref-type="bibr" rid="B45">Noonan et al., 2004</xref>; <xref ref-type="bibr" rid="B74">Yu et al., 2008</xref>). Even though we found syntenic conservation of some members of the &#x03B2;<italic>-</italic>protocadherin cluster, we observed slight disruptions of the order of genes due to gene expansions, which can be either gene duplications or <italic>de novo</italic> formation. These gains are most notorious in the mammalian genomes (<xref ref-type="fig" rid="F7">Figure 7B</xref>), suggesting gene expansion of the intronless &#x03B2;<italic>-</italic>protocadherins could be relevant for their role in neurogenesis, as well as other neuro-specific functions associated with the Wnt canonical pathway.</p>
<p>We looked in more detail at the evolutionary histories of &#x03B2;<italic>-</italic>protocadherins, by reconciling the gene trees of this gene cluster with the taxonomic species tree (<xref ref-type="fig" rid="F7">Figure 7C</xref>). First, we observed that none of these genes is conserved in zebrafish. Moreover, some genes are gained in specific lineages, for example, <italic>Pcdhb17</italic> is only observed in mice and rats, while eight genes are shared across the mammals in the study. Three &#x03B2;-protocadherins appear to be shared among primates and the marsupial opossum, while only <italic>Pcdhb7, Pcdhb15</italic>, and <italic>Pcdhb19</italic> are shared between mouse and chick, and across other intermediate species, suggesting that these are the oldest &#x03B2;<italic>-</italic>protocadherins that give origin to the rest of them.</p>
<p>Then, by assessing syntenic conservation in a pair-wise fashion, we found relevant lineage-specific gene losses (<xref ref-type="fig" rid="F7">Figure 7D</xref>). For instance, <italic>Pcdhb18</italic> is absent in rats while it is present in mice and duplicated in primates. This evidence suggests that the complexity of nervous system characteristic of mammals could also be associated with the duplication of single exon genes besides splicing-derived protein isoform diversity.</p>
</sec>
<sec id="S3.SS4">
<title>Characterization and Functional Role of Post-translational Modifications in Mouse IG Proteins</title>
<p>In addition to alternative splicing, and mRNA editing, post-translational modifications (PTMs) constitute a defining factor of the complexity of proteomes by increasing structural and functional diversity of each proteoform, the set of multiple protein molecules encoded by one gene. Hence, protein PTMs have an essential role in protein structure-function, including activity, stability, folding, and turnover (<xref ref-type="bibr" rid="B66">Uversky, 2015</xref>). Since IGs fit the &#x201C;one gene&#x2014;one protein&#x201D; concept we aimed to determine whether PTMs represent exclusive mechanisms of regulation for these genes. In our analysis, we observed that Succinylation and S-nitrosylation had similar prevalence in IG and MEG groups. These were followed with much lower frequency by Glutathionylation, Glutarylation, Palmitoylation, and Oxidation (<xref ref-type="fig" rid="F8">Figure 8A</xref>). In contrast, PTMs with a unique presence in MEGs were Nitration, Myristoylation, Sulfation, Carboxylation, GPI-anchor, and Pyrrolidone carboxylic acid (<xref ref-type="fig" rid="F8">Figure 8B</xref>).</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption><p>Distribution of post-translational modifications in mouse IG proteins compared to MEG proteins. <bold>(A)</bold> PTMs with similar distribution between IG and MEG proteins. <bold>(B)</bold> PTMs exclusive to MEGs. <bold>(C)</bold> PTMs predominant in IG, and <bold>(D)</bold> in MEG proteins.</p></caption>
<graphic xlink:href="fgene-12-654256-g008.tif"/>
</fig>
<p>Notably, in accordance with their protein assignment, a differential enrichment for IG proteins was observed of Acetylation, Crotonylation, Methylation, Malonylation, and Hydroxylation (<xref ref-type="fig" rid="F8">Figure 8C</xref>) which are characteristic features of the core histones. Other PTMs of IG proteins, albeit at lower frequency, were Citrullination, Sumoylation, and Amidation (<xref ref-type="fig" rid="F8">Figure 8C</xref>). The complementary group of PTMs more enriched in MEG proteins included Phosphorylation, followed by O-linked Glycosylation, Ubiquitination, and N-linked Glycosylation (<xref ref-type="fig" rid="F8">Figure 8D</xref>). PTMs classified based on the modification-enabled functionality for membrane localization such as Myristoylation and GPI-anchor were found to be more frequent for MEGs (<xref ref-type="fig" rid="F8">Figure 8D</xref>).</p>
</sec>
<sec id="S3.SS5">
<title>Functional Assignment of IGs as Microproteins</title>
<p>The regulation of multidomain proteins at the post-translational level can be mediated by microproteins (miPs) (<xref ref-type="bibr" rid="B57">Staudt and Wenkel, 2011</xref>) which are small proteins containing a single domain that form heterodimers with their targets and exert dominant-negative regulatory effects (<xref ref-type="bibr" rid="B8">de Klein et al., 2015</xref>; <xref ref-type="bibr" rid="B13">Eguen et al., 2015</xref>). In <italic>Eukarya</italic>, microproteins have been found to have a remarkable influence on diverse biological processes.</p>
<p>Aware of the differential occurrence of PTMs on IG proteins, the DNA binding repressor activity molecular function of up-regulated IGs during mouse brain development, and due to the remarkable role of miPs, we assessed whether this group of genes encoded proteins fitting the miP definition. Characteristic features of miPs are the short length of their primary structure, a homodimer domain, and negative modulating activity of protein multi-complexes. Our first approach was to analyze the peptide length of IG and MEG proteins. The highest length-frequency for IG peptides was in the range of 200&#x2013;400 amino acids, compared to that of MEG peptides which was 300&#x2013;500. Then, using the miPfinder tool (<xref ref-type="bibr" rid="B60">Straub and Wenkel, 2017</xref>), we identified the following IGs as microprotein candidates: the BHLH transcription factors <italic>Bhlha9</italic>, <italic>Msg1</italic>, <italic>Ferd3l</italic>, <italic>Bhlhe23</italic>, and <italic>Ascl5</italic> (<italic>e</italic>-value 4.6E&#x2013;30), as well as the histones <italic>H1f0, H1f1, Hils1, H1f2 H1f3, H1f4, H1f5, H1f6</italic>, and <italic>H1f10</italic> (<italic>e</italic>-value 7.4E&#x2013;09), corresponding to the H1 linker histone group.</p>
</sec>
<sec id="S3.SS6">
<title>Conservation and Evolution of IGs Across Vertebrata</title>
<p>To infer the evolutionary age of genes we implemented a bioinformatics method to assess the extent and patterns of distribution of each gene&#x2019;s orthologs and paralogs in different species. The rationale of this approach is that widespread conservation of the orthologs of a gene in the different vertebrate taxa is an indication of old age for that particular gene. This approach allowed us to determine the conservation of IGs across 7 genomes, as well as to identify species-specific mouse IGs (<xref ref-type="fig" rid="F9">Figure 9</xref>). In this analysis, we found that 543 out of the 1,116 mouse IGs have orthologs in at least one of the other species. For the mammalian genomes, we found 442 genes conserved as IGs out of 501 orthologs in the rat genome, 335 orthologs in chimp with 250 conserved as IGs, 397 with 262 IGs in human, and 258 with 167 IGs in opossum (<xref ref-type="table" rid="T1">Table 1</xref>). Meanwhile, we found 133 orthologs in chick with 78 conserved as IGs, and 220 in zebrafish with 91 conserved as IGs (<xref ref-type="table" rid="T1">Table 1</xref>). We also identified out-paralogs of mouse IGs (genes that arose via duplication before a speciation) that are conserved in the other species: 36 in rat, 16 in human, 11 in chimp, 9 in opossum, 2 in chick and none in zebrafish. Finally, we identified 573 IGs with no orthologs in the other species, suggesting that these are species-specific mouse IGs.</p>
<table-wrap position="float" id="T1">
<label>TABLE 1</label>
<caption><p>Summary of mouse IG orthologs in selected genomes.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<td valign="top" align="left">Genome</td>
<td valign="top" align="left">IGs</td>
<td valign="top" align="left">MEGs</td>
<td valign="top" align="left">Others</td>
<td valign="top" align="left">Total</td>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Zebrafish</td>
<td valign="top" align="left">91</td>
<td valign="top" align="left">90</td>
<td valign="top" align="left">39</td>
<td valign="top" align="left">220</td>
</tr>
<tr>
<td valign="top" align="left">Chick</td>
<td valign="top" align="left">78</td>
<td valign="top" align="left">55</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">133</td>
</tr>
<tr>
<td valign="top" align="left">Opposum</td>
<td valign="top" align="left">167</td>
<td valign="top" align="left">87</td>
<td valign="top" align="left">4</td>
<td valign="top" align="left">258</td>
</tr>
<tr>
<td valign="top" align="left">Chimp</td>
<td valign="top" align="left">250</td>
<td valign="top" align="left">59</td>
<td valign="top" align="left">26</td>
<td valign="top" align="left">335</td>
</tr>
<tr>
<td valign="top" align="left">Human</td>
<td valign="top" align="left">262</td>
<td valign="top" align="left">97</td>
<td valign="top" align="left">38</td>
<td valign="top" align="left">397</td>
</tr>
<tr>
<td valign="top" align="left">Rat</td>
<td valign="top" align="left">442</td>
<td valign="top" align="left">57</td>
<td valign="top" align="left">2</td>
<td valign="top" align="left">501</td>
</tr>
</tbody>
</table></table-wrap>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption><p>Clusterization of conserved mouse IG and MEG orthologs. Reconciliation trees representing the evolutionary history of mouse IG <bold>(A)</bold>, and MEG <bold>(B)</bold> gene families. Blue numbers at each internal node represent the number of ancestral genes found in that clade that were inherited by an older ancestor. Blue numbers in brackets represent ancestral genes that might be generated at that evolutionary point, since they are not found in an outgroup of the clade. Green numbers represent gene gains due to duplication events and red numbers represent gene losses. Numbers at the tip of the branches represent the number of orthologous genes in other species, and numbers in square brackets represent the number of out-paralogs (genes that arose via duplication before speciation) in other species in the study. Orthologous genes are grouped by age determined by the clade they are conserved in; the histogram on top of the upset plot shows the number of genes that are specific to each clade; the histogram to the left of the upset plot represents the number of mouse orthologs for each species. Moreover, the bar for mouse shows the number of IGs for which an ortholog in another species was found.</p></caption>
<graphic xlink:href="fgene-12-654256-g009.tif"/>
</fig>
<p>Overall, we found that 70% of the IG orthologs are IGs as well, and 30% are MEGs (<xref ref-type="table" rid="T1">Table 1</xref>). As for MEGs (<xref ref-type="fig" rid="F9">Figure 9</xref>), less than 5% of their orthologs are IGs and the rest are MEGs (<xref ref-type="table" rid="T1">Tables 1</xref>, <xref ref-type="table" rid="T2">2</xref>). As expected, due to its evolutionary closeness with the mouse, the genome with the highest conservation in gene architecture is the rat, with approximately 88% of conserved IGs, while the largest difference was found for the zebrafish genome with only 41% of conserved IGs. Furthermore, at the superfamily level as identified by SUPERFAMILY, we found that 24% of the IG superfamilies are conserved as IG-only, while 76% are predominantly IGs but contain at least one MEG ortholog in another species. Similarly, for MEG superfamilies, 35% were conserved as MEG-only, while 65% were predominantly MEGs with at least one IG ortholog. Hence, these analyses revealed that most of the IGs identified in the mouse genome remained with this genetic structure in other species thus supporting their high conservation across vertebrate genomes.</p>
<table-wrap position="float" id="T2">
<label>TABLE 2</label>
<caption><p>Summary of mouse MEG orthologs in selected genomes.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<td valign="top" align="left">Genome</td>
<td valign="top" align="left">IGs</td>
<td valign="top" align="left">MEGs</td>
<td valign="top" align="left">Others</td>
<td valign="top" align="left">Total</td>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Zebrafish</td>
<td valign="top" align="left">108</td>
<td valign="top" align="left">7,602</td>
<td valign="top" align="left">1,584</td>
<td valign="top" align="left">9,294</td>
</tr>
<tr>
<td valign="top" align="left">Chick</td>
<td valign="top" align="left">122</td>
<td valign="top" align="left">7,137</td>
<td valign="top" align="left">7</td>
<td valign="top" align="left">7,266</td>
</tr>
<tr>
<td valign="top" align="left">Opposum</td>
<td valign="top" align="left">159</td>
<td valign="top" align="left">10,304</td>
<td valign="top" align="left">966</td>
<td valign="top" align="left">11,429</td>
</tr>
<tr>
<td valign="top" align="left">Chimp</td>
<td valign="top" align="left">523</td>
<td valign="top" align="left">8,040</td>
<td valign="top" align="left">184</td>
<td valign="top" align="left">8,747</td>
</tr>
<tr>
<td valign="top" align="left">Human</td>
<td valign="top" align="left">603</td>
<td valign="top" align="left">9,520</td>
<td valign="top" align="left">374</td>
<td valign="top" align="left">10,497</td>
</tr>
<tr>
<td valign="top" align="left">Rat</td>
<td valign="top" align="left">1,171</td>
<td valign="top" align="left">10,457</td>
<td valign="top" align="left">59</td>
<td valign="top" align="left">11,687</td>
</tr>
</tbody>
</table></table-wrap>
<p>From the previous analysis we clusterized IGs and MEGs into five age-groups named by the taxonomic category that includes all the species of each group and thus represents the most recent common ancestor (MRCA) of each ortholog as inferred from the extant species analyzed. These groups were: <italic>Vertebrata</italic>, <italic>Tetrapoda</italic>, <italic>Theria</italic>, <italic>Eutheria</italic>, and <italic>Muridae</italic> (<xref ref-type="fig" rid="F9">Figure 9</xref>). From the reconstruction of the evolutionary history of the mouse IGs, our results revealed that their conservation is more marked in the <italic>Muridae</italic> as it contains the largest number of orthologs common to its members (<xref ref-type="fig" rid="F9">Figure 9A</xref>) followed in abundance by <italic>Vertebrata</italic>. This indicates that a large number of IGs are sufficiently old to have orthologs in all the vertebrates analyzed, and that the clades that include the closest relatives to mice have increasing IG ortholog abundance. In contrast, the highest conservation of MEGs in gene numbers is among <italic>Vertebrata</italic> thereby revealing a much older age than that of <italic>Muridae</italic> IGs (<xref ref-type="fig" rid="F9">Figure 9B</xref>). For both IG and MEG orthologs, the number of paralog-related genes increases with gene age consistently with the rate of duplication of the edges of each clade (<xref ref-type="fig" rid="F9">Figure 9</xref>). Moreover, a significant number of in-paralogous genes in the zebrafish genome, generated via duplication after speciation, have an ortholog in the mouse genome.</p>
<p>With the purpose of determining whether there was a differential functional enrichment of IGs according to their evolutionary age, we analyzed the enrichment of molecular pathway GO terms in both IGs and MEGs. In agreement with a specialized role of IGs, our results show that IG and MEG orthologs are involved in different biological pathways although some shared pathways were detected as well (<xref ref-type="fig" rid="F10">Figure 10</xref>). Conserved IG proteins with the MRCA among <italic>Vertebrata</italic> are histones highly enriched in negative regulation of megakaryocyte differentiation (&#x2013;log10, &#x2013;20.82). Other orthologs conserved to this group are linked in a lower level to thermogenesis, basal cell carcinoma, positive regulation of protein kinase A signaling, ribosomal large subunit assembly, wound healing, and vascular process in the circulatory system, platelet aggregation and development process such as cell-fate specification, negative regulation of animal organ morphogenesis, pituitary gland development, and regulation of bicellular tight junction assembly (&#x2013;log10, &#x2013;9.33 &#x003E; &#x2212;2.81). For <italic>Theria</italic> we found G alpha signaling events (&#x2013;log10, &#x2013;8.86), while in the <italic>Eutheria</italic> group peptidyl-serine phosphorylation of STAT protein, and chromatin silencing were enriched (&#x2013;log10, &#x2013;6.47;&#x2013;5.69). Noticeably, the most recent genes which belong to the <italic>Muridae</italic> group are exclusively enriched in intracellular sequestering of iron, complement receptor-mediated signaling pathway, and histone deubiquitination (&#x2013;log10, &#x2013;8.64 &#x003E; &#x2212;3.63).</p>
<fig id="F10" position="float">
<label>FIGURE 10</label>
<caption><p>Functional enrichment of conserved mouse IG and MEG orthologs. <bold>(A)</bold> Gene ontology enrichment and pathways for mouse IG conserved as IG in the selected species, orthologs are clustered regarding their &#x201C;age&#x201D; group. <bold>(B)</bold> Gene ontology enrichment and pathways for a sample of 1,116 mouse MEGs conserved as MEG in the selected species, orthologs are clustered regarding their &#x201C;age&#x201D; group. The color key from yellow to brown indicates high to low <italic>p</italic>-values, respectively.</p></caption>
<graphic xlink:href="fgene-12-654256-g010.tif"/>
</fig>
<p>Our analysis also identified IG proteins with enriched pathways shared among the various age groups. Detection of chemical stimulus involved in sensory perception of bitter taste, and keratinization are GO terms shared among <italic>Muridae</italic> (&#x2013;log10, &#x2013;22.91;&#x2013;7.07), <italic>Eutheria</italic> (&#x2013;log10, &#x2013;13.36;&#x2013;2.10) and <italic>Theria</italic> (&#x2013;log10, &#x2013;12.37;&#x2013;2.39). Meanwhile, GPCR ligand binding is shared among <italic>Vertebrata</italic> (&#x2013;log10, &#x2013;5.59), <italic>Theria</italic> (&#x2013;log10, &#x2013;6.11), and <italic>Eutheria</italic> (&#x2013;log10, &#x2013;3.05) (<xref ref-type="fig" rid="F10">Figure 10A</xref>).</p>
<p>Then we focused on determining the conservation of the biological role of IG orthologs among the different genomes. GO terms that are highly enriched in the seven genomes analyzed were detection of chemical stimulus involved in sensory perception of smell, organic substance metabolism, DNA packaging, signaling, multicellular organismal process, cell communication, transport, and localization, while GO molecular function terms enriched are olfactory receptor activity, Wnt protein binding, odorant binding, protein binding, catalytic, and molecular transducer activity.</p>
</sec>
</sec>
<sec id="S4">
<title>Discussion</title>
<p>The mechanism of alternative splicing is a pivotal contributor to the diversity of proteins and the functional complexity of eukaryotic genomes. Intron-containing genes are capable of generating multiple protein isoforms by this process by which exons can be removed, lengthened, or shortened (<xref ref-type="bibr" rid="B54">Sakharkar et al., 2005b</xref>). In contrast, protein coding genes lacking introns produce a single peptide of predictable sequence which may undergo postranslational fine tuning. The availability of detailed annotation of sequenced genomes for many organisms contributes toward a better understanding of their structure which has been shaped by flexible evolutionary pressure (<xref ref-type="bibr" rid="B6">Bult et al., 2019</xref>). Studying the evolutionary dynamics of exon-intron patterns at the genomic level is likely to shed light on their role in genome structure and gene architecture.</p>
<p>To further our insight into the structure and the evolution of mouse IGs, we examined their function, differential expression in the developing brain, the signatures for post-translational modifications of their encoded proteins, their potential as modulators of multiprotein complexes, as well as their evolutionary dynamics in comparison to their orthologs in other vertebrates.</p>
<p>Our work revealed that, in accordance with previous studies, IGs and MEGs appear to specialize in different functions which is supported by their enrichment in distinct biological pathways and differential abundance of post-translational modifications. As an additional indication of this specialization, IGs that are up-regulated during the development of the mouse telencephalon, are associated with specific developmental programs in this structure and display a functional enrichment profile that differs from that of up-regulated MEGs. Moreover, mouse IGs, some of which fit the criteria to be defined as regulatory microproteins, appear to be of more recent origin than MEGs in vertebrates. Consistent with this notion, about half of IGs do not appear to have orthologs in other genomes thus suggesting a relevant role in mouse evolution. The synteny of the &#x03B2;<italic>-</italic>protocadherins, however, points out to a mammalian conserved function of these IGs although some species-specific changes in this gene cluster were observed for the various species analyzed.</p>
<sec id="S4.SS1">
<title>Functional Assignment of IGs Highlights Prevalent and Unique Biological Roles</title>
<p>In the mouse genome, coding genes are predominantly of MEG type<sup><xref ref-type="fn" rid="footnote11">11</xref></sup> (80% of a total of 22,481). However, a considerable number of IGs are present in this genome, and the conservation of this fraction has been reported for other mammalian genomes (<xref ref-type="bibr" rid="B51">Sakharkar et al., 2006</xref>).</p>
<p>Our comparative analysis of the types of proteins encoded by IGs and MEGs revealed that these two populations have very divergent functional profiles. The most abundant types of proteins found among the former were the chromatin components histones and centromere proteins, transmembrane proteins of the G-protein coupled receptor family 1 and cadherins, and transcription factors containing BTB, forkhead, and HLH domains. In contrast, among MEGs, the most abundant proteins were those containing zinc finger, Pkinase (Protein-kinase), and PH (Pleckstrin homology) domains. These observations are consistent with previous findings that IGs are highly enriched in GPCRs, and seven transmembrane domain proteins and reveal a functional divergence between IGs and MEGs likely to be the result of differential evolutionary constraints (<xref ref-type="bibr" rid="B51">Sakharkar et al., 2006</xref>).</p>
<p>Among the transmembrane IG proteins, vomeronasal and taste receptors stand out as the most abundant. These proteins play a highly relevant role in chemoreception which is the most salient means of the interaction of <italic>Muridae</italic> with their environment, with conspecifics, with potential prey or predators. These findings suggest that some olfaction and taste receptors are required to be constantly transcribed in an efficient and rapid process, which may be a factor that favors their overrepresentation among IGs. Moreover, the taste receptor cells of vertebrates are continually renewed throughout the organism&#x2019;s life which suggests a high demand for the housekeeping expression of these genes.</p>
<p>A considerable enrichment was also observed among IGs of BTB, Forkhead, HLH, HMG-Box transcription factors, and the chromatin components core histones H2AC, CENP-TC, and the histone Linker cluster. Thus, this indicates that IGs are playing also an important role in packaging, transcription and chromatin assembly.</p>
<p>Altogether, these results suggest that intronless genes have specialized roles, and a strong link to gene expression regulation and chromatin structure. Overall, our results suggest that IG proteins have specialized, prevalent, and unique biological roles.</p>
</sec>
<sec id="S4.SS2">
<title>Differential Expression of IGs in Telencephalon Development, Key Genes for Wnt Signaling</title>
<p>Embryonic development relies on the complex interplay of fundamental cellular processes, including proliferation, differentiation, and apoptosis. Regulation of these events is essential for the establishment of structures and organ development. The formation of the telencephalic architecture results from the interaction of the signaling centers located on the edges of the pallium. In this process, the Wnt signaling pathway plays an essential role in the dorsomedial pattern, where signals from the cortical hem direct the morphogenesis of the hippocampus, the corpus callosum, and the generation of migratory Cajal-Retzius cells. From the lateral pallium, the anti-hem signals, EGF, FGF, Frizzled, and Sfrp determine the development of the olfactory cortex.</p>
<p>In a previous study, we highlighted the up-regulation of Wnt signaling genes of the canonical pathway in the early stages of the developing telencephalon in mice (<xref ref-type="bibr" rid="B43">Muley et al., 2020</xref>). The main receptors of the Wnt/beta-catenin signaling pathway are Frizzled domain proteins (Fzd), a family of seven-transmembrane G-protein coupled receptors that also possess a large extracellular cysteine-rich domain.</p>
<p>In this work, we found <italic>Fzd7, Fzd8</italic>, and <italic>Fzd10</italic> among the IG transmembrane proteins that were developmentally regulated in the embryonic telencephalon, as well as a group of 11 IGs of the protocadherin &#x03B2;-cluster. Previous studies have described <italic>Fzd8</italic> as an essential receptor of the Wnt pathway implicated in brain development and size (<xref ref-type="bibr" rid="B3">Boyd et al., 2015</xref>). This gene is also highly expressed in two human cancer cell lines, indicating that it may play a role in tumorigenesis (<xref ref-type="bibr" rid="B33">Li et al., 2017</xref>; <xref ref-type="bibr" rid="B44">Murillo-Garz&#x00F3;n et al., 2018</xref>). <italic>Fzd10</italic> functions in the canonical Wnt/beta-catenin signaling pathway which may be involved in signal transduction during tissue morphogenesis (<xref ref-type="bibr" rid="B69">Wang et al., 2005</xref>). In keeping with this, protocadherins have also been described as regulators in the Wnt signaling pathway (<xref ref-type="bibr" rid="B41">Mah and Weiner, 2017</xref>). More specifically, protocadherins of the &#x03B2;-cluster, along with those of the &#x03B1; and &#x03B3; clusters, act cooperatively in mice in olfactory-axon targeting, in the formation of diverse neural circuits, and in neuronal survival (<xref ref-type="bibr" rid="B21">Hasegawa et al., 2016</xref>, <xref ref-type="bibr" rid="B20">2017</xref>). These functions, however, correspond to developmental stages that occur later than the one addressed in this study. Hence, our striking finding that half of the 22 &#x03B2;-protocadherins are up-regulated along with Wnt receptors during the development of the telencephalon, suggest that this group of mostly IGs has a differential function thus far unknown related to Wnt signaling at this early stage. Consistent with this idea, the Wnt binding molecular function GO term is one of the most conserved among IGs in the genomes analyzed in this study.</p>
<p>Our expression analysis additionally revealed up-regulation of <italic>Olig1, Bhlhe22, Bhlhe23, Pou3f1, Pou3f2, Pou3f4, Foxq1</italic>, and <italic>Neurog1</italic>. Notably, this represents the up-regulation of three of the four members of the <italic>Pou3</italic> class of transcription factors present in mouse. The <italic>Pou</italic> genes encode a broad family of 6 classes (<italic>Pou1f</italic>&#x2013;<italic>Pou6f</italic>) which are involved in developmental processes, mainly cell fate determination and differentiation (<xref ref-type="bibr" rid="B63">Tantin, 2013</xref>). Among those, the four members of the <italic>Pou3f</italic> class are preferentially expressed in ectodermal derivatives such as the developing mammalian nervous system (<xref ref-type="bibr" rid="B2">Bally-Cuif and Hammerschmidt, 2003</xref>). The human <italic>Pou3f3</italic> is an intronless gene also named <italic>Brain-1</italic>, which is a well-known transcription factor involved in the development of the central nervous system and its variant alleles have been associated with intellectual disability and language neurodevelopmental disorders (<xref ref-type="bibr" rid="B55">Snijders Blok et al., 2019</xref>). Furthermore, an important role of <italic>Neurog1</italic> is as a promoter of proliferation or neuronal differentiation, while <italic>Olig1</italic> is involved in the generation and maturation of specific neural cells during the development of the spinal cord (<xref ref-type="bibr" rid="B48">Qi et al., 2016</xref>; <xref ref-type="bibr" rid="B56">Song et al., 2017</xref>). <italic>Bhlhe22</italic> and <italic>Bhlhe23</italic> in turn, are among those that were up-regulated the most in mice during telencephalon development. In humans, <italic>Bhlhe22</italic> has been identified as a highly methylated gene in endometrial cancer with potential epigenetic biomarkers in cervical scrapings (<xref ref-type="bibr" rid="B35">Liew et al., 2019</xref>), while <italic>Bhlhe23</italic> has been linked to mammalian retinal development (<xref ref-type="bibr" rid="B71">Woods et al., 2018</xref>).</p>
<p>Finally, among the histone group, <italic>H2bc21 (Hist2h2be), H2bu2 (Hist3h2ba), H2aw (Hist3h2a)</italic> were also up-regulated. The <italic>H2b</italic> histone family members are responsible for the chromosomal fiber nucleosome structure in eukaryotes. <italic>H2bc21/Hist2h2be</italic> has been described in mouse as expressed in olfactory epithelium, while <italic>H2bu2 (Hist3h2ba)</italic> in neocortex and lens of camera type-eye, and <italic>H2aw (Hist3h2a)</italic> in retina<sup><xref ref-type="fn" rid="footnote12">12</xref></sup>. In humans, <italic>Hist2h2be</italic> is a hub gene related to poor prognosis in rhabdomyosarcoma tumors in pediatric patients (<xref ref-type="bibr" rid="B34">Li et al., 2019</xref>).</p>
<p>Summarizing, IGs appear to play crucial roles in the mouse telencephalon involved in gliogenesis, eye, and sensory organ development, canonical Wnt signaling, nucleosome organization, and have molecular regulatory roles. Therefore, in accordance with the functional assignment, our expression analysis supports that IGs play a critical role during mammalian brain development.</p>
</sec>
<sec id="S4.SS3">
<title>IG Proteins in the Histone Category Have Unique Signatures and Undergo Specific PTMs</title>
<p>In agreement with the link of IGs to chromatin structure found in this and previous works, we also identified unique and highly represented PTM signatures in the histone protein category. Proteins encoded by mouse IGs have enriched signatures for histone <italic>H2A</italic> and <italic>H2B</italic>, characteristic of key core histones involved in chromatin structure in eukaryotic cells, as well as linker histone <italic>H1/H5</italic> and the <italic>CENB-type HTH</italic>. In addition to the identification of exclusive signatures, we compared potential regulatory mechanisms of IG and MEG PTMs. Although the variability of PTMs is high, these modifications are typically very specific and, altogether, 300 types are known to occur in proteins (<xref ref-type="bibr" rid="B70">Witze et al., 2007</xref>). Among all PTMs, we found that the most abundant (Phosphorylation and Acetylation) are differentially represented among IG and MEG proteins.</p>
<p>As it could be expected from the observed enrichment in chromatin remodeling protein domains, our results show that proteins encoded by IGs undergo specific PTMs for histones such as crotonylation, methylation, sumoylation, citrullination, and sumoylation. However, our results suggest that IG-encoded histones have high specificity for Lysine-crotonylation, which is a recently identified post-translational modification associated with active promoters to directly stimulate transcription. Moreover, PTMs with changes in the physicochemical properties of amino acids like citrullination and amidation, are a characteristic feature highly enriched in IG proteins.</p>
</sec>
<sec id="S4.SS4">
<title>Potential Role of IGs as miPs in Neural Development and Function</title>
<p>When we assessed the potential role of IGs as microproteins we found proteins with strong potential to be modulators of multi-protein complexes. The targets or microproteins are mostly transcription factors that bind DNA as dimers. In this study, we found potential miPs encoded by intronless genes that are <italic>bHLH</italic> transcription factors with a regulatory role during critical events such as neural development and function. For example, <italic>Ferd3l</italic>, an evolutionarily conserved <italic>bHLH</italic> protein, is expressed in the developing central nervous system and functions as a transcriptional inhibitor. Other examples are <italic>bHLHe23</italic>, a transcriptional regulator in the pancreas and brain that marks the dimesencephalic boundary (<xref ref-type="bibr" rid="B4">Bramblett et al., 2002</xref>), <italic>Bhlha9</italic> a regulator of apical ectodermal ridge formation during limb development (<xref ref-type="bibr" rid="B29">Kataoka et al., 2018</xref>), <italic>Msg1</italic> which is predominantly expressed in nascent mesoderm, the heart tube, limb bud, and sclerotome during mouse embryogenesis (<xref ref-type="bibr" rid="B12">Dunwoodie et al., 1998</xref>), and <italic>Ascl5</italic> member of the ASCL family of proneural transcription factors that control the development of the nervous system, particularly neuroblast cell fate determination (<xref ref-type="bibr" rid="B19">Guillemot et al., 1993</xref>). Moreover, its potential role in tumorigenesis has been described with up-regulation in lung cancer and down-regulation in brain tumors such as glioblastoma, anaplastic oligoastrocytoma, anaplastic oligodendroglioma, and oligodendroglioma (<xref ref-type="bibr" rid="B68">Wang et al., 2017</xref>). Additionally, consistent with the potential role in the development of IG-encoded miPs, we identified members of the <italic>H1</italic> linker histone group that fit the criteria to be classified as miPs. These histone proteins belong to a complex family with distinct specificity for tissues, developmental stages, and organisms in which they are expressed (<xref ref-type="bibr" rid="B25">Izzo et al., 2008</xref>).</p>
</sec>
<sec id="S4.SS5">
<title>Patterns of Evolution of IGs Differ From Those of MEGs in Vertebrates</title>
<p>According to earlier studies that reported the high conservation of mouse IG orthologs among other eukaryotic genomes, our analysis across seven species belonging to three classes of vertebrates revealed that the most numerous orthologs in each species were also IGs. This high rate of genetic structure conservation has been previously associated with their essential role in cell housekeeping functions, particularly those functionally pivotal proteins involved in molecular and biological roles such as transcription, translation, energy metabolism, amino-acid biosynthesis, and binding, which must be highly conserved (<xref ref-type="bibr" rid="B51">Sakharkar et al., 2006</xref>). Individually, eutherians (rat, chimp, human) are the species with the most orthologs. Moreover, <italic>Muridae</italic>, the clade that includes the mouse and rat, has the largest number of conserved orthologs and this number decreases as the clades gradually include the more distantly related species. Moreover, a distinctly large number was also found common to all species analyzed (<italic>Vertebrata</italic>), which is consistent with previous findings that identified functional and evolutionary conservation of eukaryotic IGs with highly distant genomes such as bacteria (<xref ref-type="bibr" rid="B51">Sakharkar et al., 2006</xref>). The higher abundance of conserved IG orthologs among species more closely related to mouse and lower in groups including more distant species, could be the result of the gradual loss of IG orthologs during the divergence of the diverse vertebrate branches. This abundance, however, could also be due to an increased rate of IG generation among mammals. Evidence supporting the latter possibility comes from our finding that in stark contrast, an increase of conserved MEG orthologs among species more closely related to mouse was not observed.</p>
<p>The clade <italic>Vertebrata</italic> contains the older IGs that are involved in diverse functions, among which nucleosome structure stands out. Histone IGs are conserved among all species, with some losses in opossum and chick. As histones are basic proteins known to be conserved across eukaryotes, it is not surprising that they are found to be some of the oldest IGs. In some cases, histones are conserved in the genome as clusters, and in some others, they appear to have been generated in a specific lineage, due to multiple gene duplications. <italic>Muridae</italic>, in contrast to <italic>Vertebrata</italic>, contains the more recent IGs which are involved in keratinization, GPCRs, and chemodetection by GPCRs. We can conclude that <italic>Muridae</italic> IGs are younger and have more specific functions, whereas IGs conserved in all vertebrates are more ancient and have more general or basic functions, as expected due to the high conservation of sequence and function among vertebrates.</p>
<p>Gene duplication is an important mechanism for the acquisition of new genes, frequently providing specialized or new gene functions (<xref ref-type="bibr" rid="B40">Magadum et al., 2013</xref>). Known mechanisms of gene duplication include retroposition, tandem duplication, and genome duplication (<xref ref-type="bibr" rid="B47">Pan and Zhang, 2008</xref>). Our analysis shows that the vast majority (48%) of one exon genes in the mouse genome are a consequence of retroposition. Moreover, regarding the duplication events, we found clear examples of IG tandem repeat cluster organization. For example, the syntenic conservation of the tandem cluster of &#x03B2;-protocadherins and their neural tissue-specific expression suggest that some aspects of the nervous system characteristic of mammals could be associated with the duplication of intronless genes, such as olfactory-axon targeting, the formation of neural circuits, neuronal survival, or neurite self-avoidance during development (<xref ref-type="bibr" rid="B10">Dennis et al., 2016</xref>; <xref ref-type="bibr" rid="B20">Hasegawa et al., 2017</xref>; <xref ref-type="bibr" rid="B5">Brasch et al., 2019</xref>). Similar to the single exon &#x03B2;-cluster of protocadherins, we also observed that IG histones in the mouse genome are present as tandem families with a tendency to cluster in their chromosome organization. An example of this is the <italic>H2A</italic> histone family member <italic>L1J</italic>, a family of ten IG members in the mouse X chromosome, with only one ortholog (<italic>H2AL1RP</italic>) in human and one (<italic>H2A-beta</italic>) in opossum. Almost all of the tandem repeat genes have parallel transcription orientation, which means they are encoded on the same strand.</p>
<p>The disrupted gene structure of most eukaryotic genes has led to a long-lasting debate regarding the origin of introns. The &#x201C;exon theory of genes&#x201D; also known as &#x201C;introns-early,&#x201D; proposed the presence of introns in prokaryotic primordial genes (<xref ref-type="bibr" rid="B17">Gilbert, 1987</xref>; <xref ref-type="bibr" rid="B49">Roy and Gilbert, 2006</xref>), while in the &#x201C;insertional theory of introns&#x201D; or &#x201C;introns-late theory,&#x201D; introns are a eukaryotic innovation (<xref ref-type="bibr" rid="B11">Doolittle and Stoltzfus, 1993</xref>; <xref ref-type="bibr" rid="B58">Stoltzfus, 1994</xref>; <xref ref-type="bibr" rid="B59">Stoltzfus et al., 1994</xref>; <xref ref-type="bibr" rid="B42">Mattick, 1994</xref>; <xref ref-type="bibr" rid="B37">Logsdon, 1998</xref>). Recent genomic evidence supports a view that combines aspects of both theories but still placing the invasion of eukaryotic genes by introns at the emergence of eukaryotic cells (<xref ref-type="bibr" rid="B30">Koonin, 2007</xref>). In accordance with this combined view, the comparison of mouse intron-bearing and intron-lacking IG orthologs among the analyzed organisms, suggests that IGs are more recent than MEGs. This is also consistent with findings that have revealed that intron-exon gene structure is highly stable among vertebrates and that individual intron losses outnumber intron gains in diverse vertebrate lineages (<xref ref-type="bibr" rid="B50">Roy et al., 2003</xref>; <xref ref-type="bibr" rid="B7">Coulombe-Huntington and Majewski, 2007</xref>; <xref ref-type="bibr" rid="B38">Loh et al., 2008</xref>; <xref ref-type="bibr" rid="B67">Venkatesh et al., 2014</xref>). This evolutionary stability also suggests that the observed increase in IG abundance in <italic>Muridae</italic> is due to gene duplication of IGs rather than intron loss of MEG orthologs. Our findings, however, also revealed some interesting gene superfamilies of a few members each, containing only IGs which were restricted to one or two species. These IGs are also likely to have been generated recently in the branches leading to the species analyzed herein but the mechanism involved remains to be studied further.</p>
<p>The present study aimed to identify the conservation of the role of intronless genes in mammals and other vertebrate genomes. A comprehensive understanding of their biological function is essential to compare and contrast their evolution with that of intron-containing genes. Hence, we studied the complex regulatory role of intronless genes and their conservation in cellular environments using computational functional assignment, gene expression analysis, and evolutionary reconstruction. First, we determined that the functions associated with IGs are very different from those associated with MEGs. Expression analysis of the developing telencephalon also revealed specific up-regulation of IGs that encode genes involved in Wnt signaling, <italic>bHLH</italic> and <italic>Pou</italic> transcription factors, as well as chromatin proteins. Among Wnt signaling-related proteins, it was striking to detect up-regulation of half of all protocadherins of the &#x03B2;-cluster. Moreover, some IG transcription factors meet the criteria to be considered microproteins and thus appear to have modulatory properties of protein complex formation. Overall, our results highlight a role for IGs as essential modulators of diverse biological processes as pivotal as cortical development, chemosensory functions, chromatin condensation, and gene silencing. In fact, specific modifications of IG proteins indicate that their regulatory roles extend to the post-translational level. Notably, some of the IGs highlighted in this study also have potential clinical relevance in humans. For example, <italic>Fzd8</italic> and <italic>pcdhs</italic> which are associated to Wnt signaling, an evolutionarily conserved regulatory pathway related to cell fate determination and proliferation during development, have also been identified as part of a key mechanism in cancer biology. Other IGs discussed in this study and linked with cancer and neurodevelopmental disorders were <italic>Pou3f3, bHLHE22, ASCL5</italic>, and <italic>Hist2h2be.</italic></p>
<p>Furthermore, the analysis of the evolutionary patterns of IGs revealed a large fraction of genes that appear to be of more recent generation as compared to the older and more conserved MEGs. Overall, this analysis reveals specific functions of IGs that distinguish them from MEGs and therefore strengthen the notion suggested by previous observations that these two groups are under differential evolutionary constraints.</p>
</sec>
</sec>
<sec id="S5">
<title>Data Availability Statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: <ext-link ext-link-type="uri" xlink:href="https://data.mendeley.com/datasets/rdt5757cbw/1">https://data.mendeley.com/datasets/rdt5757cbw/1</ext-link>.</p>
</sec>
<sec id="S6">
<title>Author Contributions</title>
<p>KA-P: project design, performed data collection, manuscript writing, proofreading, carried out bioinformatic analyses, prepared figures, and their interpretation. JR-R: expertise in bioinformatic analysis methods, performed evolutionary reconstruction, prepared figures. GH-O: literature search, bioinformatic analyses, and prepared figures. DV: writing, bioinformatic analysis, and prepared figures. ED-V: writing, bioinformatic analysis, and prepared figures. AG-G: expertise in data analysis methods for the API-REST and data collection, prepared figures. VM: proofreading, performed DEG analysis, prepared figures. AV-E: supervised the study, provided advice on the research strategy, and participated in manuscript writing. MH-R: co-director of the study and project development, performed bioinformatic analysis and interpretation, writing, and proofreading. All authors contributed to the article and approved the submitted version.</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
</body>
<back>
<fn-group>
<fn fn-type="financial-disclosure">
<p><bold>Funding.</bold> This project was supported by research funding provided by CONACYT grants QRO-2018-01-01-88344, 314869, 254206, 267749, and 315802 as well as DGAPA IN229620. KA-P received financial support from the DGAPA program for a postdoctoral fellowship at the INB UNAM and is a current holder of support from CONACyT (CVU: 227919).</p>
</fn>
</fn-group>
<ack>
<p>KA-P acknowledges the CABANA program for training in bioinformatics. For technical support we thank Luis Alberto Aguilar Bautista, Alejandro de Le&#x00F3;n Cuevas, Carlos Sair Flores Bautista, and Jair Garc&#x00ED;a of the Laboratorio Nacional de Visualizaci&#x00F3;n Cient&#x00ED;fica Avanzada (LAVIS). Critical comments and suggestions to this project development were received from Roddy Jorquera, Carolina Gonzalez, Michael Jerzioski, and Carlos Lozano Flores.</p>
</ack>
<sec id="S9" sec-type="supplementary material">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fgene.2021.654256/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fgene.2021.654256/full#supplementary-material</ext-link></p>
<supplementary-material id="TS1">
<label>Supplementary Code</label>
<caption><p><ext-link ext-link-type="uri" xlink:href="https://github.com/GEmilioHO/intronless_genes">https://github.com/GEmilioHO/intronless_genes</ext-link>.</p></caption>
</supplementary-material>
<supplementary-material id="TS2">
<label>Supplementary Tool</label>
<caption><p><ext-link ext-link-type="uri" xlink:href="https://gitlab.com/jarr.tecn/revolutionh-tl">https://gitlab.com/jarr.tecn/revolutionh-tl</ext-link>.</p></caption>
</supplementary-material>
<supplementary-material xlink:href="Image_1.TIFF" id="FS1" mimetype="image/tiff" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Supplementary Figure 1</label>
<caption><p>Prevalence of intronless protein-coding genes among single-exon genes in the mouse genome. All mouse genes having one exon are classified regarding their gene biotype, proportion of protein-coding intronless genes (IGs) is highlighted in pink.</p></caption>
</supplementary-material>
<supplementary-material xlink:href="Image_2.TIFF" id="FS2" mimetype="image/tiff" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Supplementary Figure 2</label>
<caption><p>Enrichment of SUPERFAMILY assignments of mouse IG and MEG proteins. <bold>(A)</bold> Enriched scop families in IG proteins: The scop families with the largest gene ratios are plotted in order of gene ratio. The size of the dots represents the number of genes in the significant background list associated with the scop family, while the color of the dots represents the adjusted p-values, <bold>(B)</bold> Enriched scop families in MEG proteins: The scop families with the largest gene ratios are plotted in order of gene ratio. The size of the dots represents the number of genes in the significant background list associated with the scop family, while the color of the dots represents the p-adjusted values.</p></caption>
</supplementary-material>
</sec>
<ref-list>
<title>References</title>
<ref id="B1"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Amigo</surname> <given-names>J. D.</given-names></name> <name><surname>Opazo</surname> <given-names>J. C.</given-names></name> <name><surname>Jorquera</surname> <given-names>R.</given-names></name> <name><surname>Wichmann</surname> <given-names>I. A.</given-names></name> <name><surname>Garcia-Bloj</surname> <given-names>B. A.</given-names></name> <name><surname>Alarcon</surname> <given-names>M. A.</given-names></name><etal/></person-group> (<year>2018</year>). <article-title>The reprimo gene family: a novel gene lineage in gastric cancer with tumor suppressive properties.</article-title> <source><italic>Int. J. Mol. Sci.</italic></source> <volume>19</volume>:<issue>1862</issue>. <pub-id pub-id-type="doi">10.3390/ijms19071862</pub-id> <pub-id pub-id-type="pmid">29941787</pub-id></citation></ref>
<ref id="B2"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bally-Cuif</surname> <given-names>L.</given-names></name> <name><surname>Hammerschmidt</surname> <given-names>M.</given-names></name></person-group> (<year>2003</year>). <article-title>Induction and patterning of neuronal development, and its connection to cell cycle control.</article-title> <source><italic>Curr. Opin. Neurobiol.</italic></source> <volume>13</volume> <fpage>16</fpage>&#x2013;<lpage>25</lpage>. <pub-id pub-id-type="doi">10.1016/s0959-4388(03)00015-1</pub-id></citation></ref>
<ref id="B3"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Boyd</surname> <given-names>J. L.</given-names></name> <name><surname>Skove</surname> <given-names>S. L.</given-names></name> <name><surname>Rouanet</surname> <given-names>J. P.</given-names></name> <name><surname>Pilaz</surname> <given-names>L. J.</given-names></name> <name><surname>Bepler</surname> <given-names>T.</given-names></name> <name><surname>Gord&#x00E2;n</surname> <given-names>R.</given-names></name><etal/></person-group> (<year>2015</year>). <article-title>Human-chimpanzee differences in a FZD8 enhancer alter cell-cycle dynamics in the developing neocortex.</article-title> <source><italic>Curr. Biol.</italic></source> <volume>25</volume> <fpage>772</fpage>&#x2013;<lpage>779</lpage>. <pub-id pub-id-type="doi">10.1016/j.cub.2015.01.041</pub-id> <pub-id pub-id-type="pmid">25702574</pub-id></citation></ref>
<ref id="B4"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bramblett</surname> <given-names>D. E.</given-names></name> <name><surname>Copeland</surname> <given-names>N. G.</given-names></name> <name><surname>Jenkins</surname> <given-names>N. A.</given-names></name> <name><surname>Tsai</surname> <given-names>M. J.</given-names></name></person-group> (<year>2002</year>). <article-title>BHLHB4 Is a BHLH transcriptional regulator in pancreas and brain that marks the dimesencephalic boundary.</article-title> <source><italic>Genomics</italic></source> <volume>79</volume> <fpage>402</fpage>&#x2013;<lpage>412</lpage>. <pub-id pub-id-type="doi">10.1006/geno.2002.6708</pub-id> <pub-id pub-id-type="pmid">11863370</pub-id></citation></ref>
<ref id="B5"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Brasch</surname> <given-names>J.</given-names></name> <name><surname>Goodman</surname> <given-names>K. M.</given-names></name> <name><surname>Noble</surname> <given-names>A. J.</given-names></name> <name><surname>Micah</surname> <given-names>R.</given-names></name> <name><surname>Seetha</surname> <given-names>M.</given-names></name> <name><surname>Fabiana</surname> <given-names>B.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>Visualization of clustered protocadherin neuronal self-recognition complexes&#x201D;.</article-title> <source><italic>Nature</italic></source> <volume>569</volume> <fpage>280</fpage>&#x2013;<lpage>283</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-019-1089-3</pub-id> <pub-id pub-id-type="pmid">30971825</pub-id></citation></ref>
<ref id="B6"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bult</surname> <given-names>C. J.</given-names></name> <name><surname>Blake</surname> <given-names>J. A.</given-names></name> <name><surname>Smith</surname> <given-names>C. L.</given-names></name> <name><surname>Kadin</surname> <given-names>J. A.</given-names></name> <name><surname>Richardson</surname> <given-names>J. E.</given-names></name></person-group> (<year>2019</year>). <article-title>Mouse Genome Database (MGD) 2019.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>47</volume> <fpage>D801</fpage>&#x2013;<lpage>D806</lpage>.</citation></ref>
<ref id="B7"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Coulombe-Huntington</surname> <given-names>J.</given-names></name> <name><surname>Majewski</surname> <given-names>J.</given-names></name></person-group> (<year>2007</year>). <article-title>Characterization of intron loss events in mammals.</article-title> <source><italic>Genome Res.</italic></source> <volume>17</volume> <fpage>23</fpage>&#x2013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1101/gr.5703406</pub-id> <pub-id pub-id-type="pmid">17108319</pub-id></citation></ref>
<ref id="B8"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>de Klein</surname> <given-names>N.</given-names></name> <name><surname>Magnani</surname> <given-names>E.</given-names></name> <name><surname>Banf</surname> <given-names>M.</given-names></name> <name><surname>Rhee</surname> <given-names>S. Y.</given-names></name></person-group> (<year>2015</year>). <article-title>MicroProtein Prediction Program (MiP3): a software for predicting microproteins and their target transcription factors.</article-title> <source><italic>Int. J. Genomics</italic></source> <volume>2015</volume>:<issue>734147</issue>.</citation></ref>
<ref id="B9"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>de Souza</surname> <given-names>S. J.</given-names></name></person-group> (<year>2003</year>). &#x201C;<article-title>The emergence of a synthetic theory of intron evolution</article-title>,&#x201D; in <source><italic>Origin and Evolution of New Gene Functions</italic></source>, <role>ed.</role> <person-group person-group-type="editor"><name><surname>Long</surname> <given-names>M.</given-names></name></person-group> (<publisher-loc>Dordrecht</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>117</fpage>&#x2013;<lpage>121</lpage>. <pub-id pub-id-type="doi">10.1007/978-94-010-0229-5_2</pub-id></citation></ref>
<ref id="B10"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dennis</surname> <given-names>D.</given-names></name> <name><surname>Picketts</surname> <given-names>D.</given-names></name> <name><surname>Slack</surname> <given-names>R. S.</given-names></name> <name><surname>Schuurmans</surname> <given-names>C.</given-names></name></person-group> (<year>2016</year>). <article-title>Forebrain neurogenesis: from embryo to adult.</article-title> <source><italic>Trends Dev. Biol.</italic></source> <volume>9</volume>:<issue>77</issue>.</citation></ref>
<ref id="B11"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Doolittle</surname> <given-names>W. F.</given-names></name> <name><surname>Stoltzfus</surname> <given-names>A.</given-names></name></person-group> (<year>1993</year>). <article-title>Genes-in-pieces revisited.</article-title> <source><italic>Nature</italic></source> <volume>361</volume>:<issue>403</issue>. <pub-id pub-id-type="doi">10.1038/361403a0</pub-id> <pub-id pub-id-type="pmid">8429878</pub-id></citation></ref>
<ref id="B12"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dunwoodie</surname> <given-names>S. L.</given-names></name> <name><surname>Rodriguez</surname> <given-names>T. A.</given-names></name> <name><surname>Beddington</surname> <given-names>R. S.</given-names></name></person-group> (<year>1998</year>). <article-title>Msg1 and Mrg1, founding members of a gene family, show distinct patterns of gene expression during mouse embryogenesis.</article-title> <source><italic>Mech. Dev.</italic></source> <volume>72</volume> <fpage>27</fpage>&#x2013;<lpage>40</lpage>. <pub-id pub-id-type="doi">10.1016/s0925-4773(98)00011-2</pub-id></citation></ref>
<ref id="B13"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Eguen</surname> <given-names>T.</given-names></name> <name><surname>Straub</surname> <given-names>D.</given-names></name> <name><surname>Graeff</surname> <given-names>M.</given-names></name> <name><surname>Wenkel</surname> <given-names>S.</given-names></name></person-group> (<year>2015</year>). <article-title>MicroProteins: small size&#x2013;big impact.</article-title> <source><italic>Trends Plant Sci.</italic></source> <volume>20</volume> <fpage>477</fpage>&#x2013;<lpage>482</lpage>. <pub-id pub-id-type="doi">10.1016/j.tplants.2015.05.011</pub-id> <pub-id pub-id-type="pmid">26115780</pub-id></citation></ref>
<ref id="B14"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fedorova</surname> <given-names>L.</given-names></name> <name><surname>Fedorov</surname> <given-names>A.</given-names></name></person-group> (<year>2003</year>). &#x201C;<article-title>Introns in gene evolution</article-title>,&#x201D; in <source><italic>Origin and Evolution of New Gene Functions</italic></source>, <role>ed.</role> <person-group person-group-type="editor"><name><surname>Long</surname> <given-names>M.</given-names></name></person-group> (<publisher-loc>Berlin</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>123</fpage>&#x2013;<lpage>131</lpage>. <pub-id pub-id-type="doi">10.1007/978-94-010-0229-5_3</pub-id></citation></ref>
<ref id="B15"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fong</surname> <given-names>N.</given-names></name> <name><surname>Kim</surname> <given-names>H.</given-names></name> <name><surname>Zhou</surname> <given-names>Y.</given-names></name> <name><surname>Ji</surname> <given-names>X.</given-names></name> <name><surname>Qiu</surname> <given-names>J.</given-names></name> <name><surname>Saldi</surname> <given-names>T.</given-names></name><etal/></person-group> (<year>2014</year>). <article-title>Pre-MRNA splicing is facilitated by an optimal RNA polymerase II elongation rate.</article-title> <source><italic>Genes Dev.</italic></source> <volume>28</volume> <fpage>2663</fpage>&#x2013;<lpage>2676</lpage>. <pub-id pub-id-type="doi">10.1101/gad.252106.114</pub-id> <pub-id pub-id-type="pmid">25452276</pub-id></citation></ref>
<ref id="B16"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gentles</surname> <given-names>A. J.</given-names></name> <name><surname>Karlin</surname> <given-names>S.</given-names></name></person-group> (<year>1999</year>). <article-title>Why are human G-Protein-coupled receptors predominantly intronless?</article-title> <source><italic>Trends Genet.</italic></source> <volume>15</volume> <fpage>47</fpage>&#x2013;<lpage>49</lpage>. <pub-id pub-id-type="doi">10.1016/s0168-9525(98)01648-5</pub-id></citation></ref>
<ref id="B17"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gilbert</surname> <given-names>W.</given-names></name></person-group> (<year>1987</year>). <article-title>The exon theory of genes.</article-title> <source><italic>Cold Spring Harb. Symp. Quant. Biol.</italic></source> <volume>52</volume> <fpage>901</fpage>&#x2013;<lpage>905</lpage>.</citation></ref>
<ref id="B18"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Grzybowska</surname> <given-names>E. A.</given-names></name></person-group> (<year>2012</year>). <article-title>Human intronless genes: functional groups, associated diseases, evolution, and MRNA processing in absence of splicing.</article-title> <source><italic>Biochem. Biophys. Res. Commun.</italic></source> <volume>424</volume> <fpage>1</fpage>&#x2013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.1016/j.bbrc.2012.06.092</pub-id> <pub-id pub-id-type="pmid">22732409</pub-id></citation></ref>
<ref id="B19"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Guillemot</surname> <given-names>F.</given-names></name> <name><surname>Lo</surname> <given-names>L. C.</given-names></name> <name><surname>Johnson</surname> <given-names>J. E.</given-names></name> <name><surname>Auerbach</surname> <given-names>A.</given-names></name> <name><surname>Anderson</surname> <given-names>D. J.</given-names></name> <name><surname>Joyner</surname> <given-names>A. L.</given-names></name></person-group> (<year>1993</year>). <article-title>Mammalian achaete-scute homolog 1 is required for the early development of olfactory and autonomic neurons.</article-title> <source><italic>Cell</italic></source> <volume>75</volume> <fpage>463</fpage>&#x2013;<lpage>476</lpage>. <pub-id pub-id-type="doi">10.1016/0092-8674(93)90381-y</pub-id></citation></ref>
<ref id="B20"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hasegawa</surname> <given-names>S.</given-names></name> <name><surname>Kobayashi</surname> <given-names>H.</given-names></name> <name><surname>Kumagai</surname> <given-names>M.</given-names></name> <name><surname>Nishimaru</surname> <given-names>H.</given-names></name> <name><surname>Tarusawa</surname> <given-names>E.</given-names></name> <name><surname>Kanda</surname> <given-names>H.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>Clustered protocadherins are required for building functional neural circuits.</article-title> <source><italic>Front. Mol. Neurosci.</italic></source> <volume>10</volume>:<issue>114</issue>. <pub-id pub-id-type="doi">10.3389/fnmol.2017.00114</pub-id> <pub-id pub-id-type="pmid">28484370</pub-id></citation></ref>
<ref id="B21"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hasegawa</surname> <given-names>S.</given-names></name> <name><surname>Kumagai</surname> <given-names>M.</given-names></name> <name><surname>Hagihara</surname> <given-names>M.</given-names></name> <name><surname>Nishimaru</surname> <given-names>H.</given-names></name> <name><surname>Hirano</surname> <given-names>K.</given-names></name> <name><surname>Kaneko</surname> <given-names>R.</given-names></name><etal/></person-group> (<year>2016</year>). <article-title>Distinct and cooperative functions for the protocadherin-&#x03B1;,-&#x03B2; and-&#x03B3; clusters in neuronal survival and axon targeting.</article-title> <source><italic>Front. Mol. Neurosci.</italic></source> <volume>9</volume>:<issue>155</issue>. <pub-id pub-id-type="doi">10.3389/fnmol.2016.00155</pub-id> <pub-id pub-id-type="pmid">28066179</pub-id></citation></ref>
<ref id="B22"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hellmuth</surname> <given-names>M.</given-names></name> <name><surname>Hernandez-Rosales</surname> <given-names>M.</given-names></name> <name><surname>Huber</surname> <given-names>K. T.</given-names></name> <name><surname>Moulton</surname> <given-names>V.</given-names></name> <name><surname>Stadler</surname> <given-names>P. F.</given-names></name> <name><surname>Wieseke</surname> <given-names>N.</given-names></name></person-group> (<year>2013</year>). <article-title>Orthology relations, symbolic ultrametrics, and cographs.</article-title> <source><italic>J. Math. Biol.</italic></source> <volume>66</volume> <fpage>399</fpage>&#x2013;<lpage>420</lpage>. <pub-id pub-id-type="doi">10.1007/s00285-012-0525-x</pub-id> <pub-id pub-id-type="pmid">22456957</pub-id></citation></ref>
<ref id="B23"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hernandez-Rosales</surname> <given-names>M.</given-names></name> <name><surname>Hellmuth</surname> <given-names>M.</given-names></name> <name><surname>Wieseke</surname> <given-names>N.</given-names></name> <name><surname>Huber</surname> <given-names>K. T.</given-names></name> <name><surname>Moulton</surname> <given-names>V.</given-names></name> <name><surname>Stadler</surname> <given-names>P. F.</given-names></name></person-group> (<year>2012</year>). <article-title>From event-labeled gene trees to species trees.</article-title> <source><italic>BMC Bioinformatics</italic></source> <volume>13</volume>:<issue>S6</issue>.</citation></ref>
<ref id="B24"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hung</surname> <given-names>M. S.</given-names></name> <name><surname>Lin</surname> <given-names>Y. C.</given-names></name> <name><surname>Mao</surname> <given-names>J. H.</given-names></name> <name><surname>Kim</surname> <given-names>I. J.</given-names></name> <name><surname>Xu</surname> <given-names>Z.</given-names></name> <name><surname>Yang</surname> <given-names>C. T.</given-names></name><etal/></person-group> (<year>2010</year>). <article-title>Functional polymorphism of the CK2&#x03B1; intronless gene plays oncogenic roles in lung cancer.</article-title> <source><italic>PLoS One</italic></source> <volume>5</volume>:<issue>e11418</issue>. <pub-id pub-id-type="doi">10.1371/journal.pone.0011418</pub-id> <pub-id pub-id-type="pmid">20625391</pub-id></citation></ref>
<ref id="B25"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Izzo</surname> <given-names>A.</given-names></name> <name><surname>Kamieniarz</surname> <given-names>K.</given-names></name> <name><surname>Schneider</surname> <given-names>R.</given-names></name></person-group> (<year>2008</year>). <article-title>The histone H1 family: specific members, specific functions?</article-title> <source><italic>Biol. Chem.</italic></source> <volume>389</volume> <fpage>333</fpage>&#x2013;<lpage>343</lpage>. <pub-id pub-id-type="doi">10.1515/bc.2008.037</pub-id> <pub-id pub-id-type="pmid">18208346</pub-id></citation></ref>
<ref id="B26"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jorquera</surname> <given-names>R.</given-names></name> <name><surname>Gonz&#x00E1;lez</surname> <given-names>C.</given-names></name> <name><surname>Clausen</surname> <given-names>P.</given-names></name> <name><surname>Petersen</surname> <given-names>B.</given-names></name> <name><surname>Holmes</surname> <given-names>D. S.</given-names></name></person-group> (<year>2018</year>). <article-title>Improved ontology for eukaryotic single-exon coding sequences in biological databases.</article-title> <source><italic>Database</italic></source> <volume>2018</volume> <fpage>1</fpage>&#x2013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.1002/9783527678679.dg08413</pub-id></citation></ref>
<ref id="B27"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jorquera</surname> <given-names>R.</given-names></name> <name><surname>Ortiz</surname> <given-names>R.</given-names></name> <name><surname>Ossandon</surname> <given-names>F.</given-names></name> <name><surname>Cardenas</surname> <given-names>J. P.</given-names></name> <name><surname>Sepulveda</surname> <given-names>R.</given-names></name> <name><surname>Gonzalez</surname> <given-names>C.</given-names></name><etal/></person-group> (<year>2016</year>). <article-title>SinEx DB: a database for single exon coding sequences in mammalian genomes.</article-title> <source><italic>Database</italic></source> <volume>2016</volume>:<issue>baw095</issue>. <pub-id pub-id-type="doi">10.1093/database/baw095</pub-id> <pub-id pub-id-type="pmid">27278816</pub-id></citation></ref>
<ref id="B28"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kaessmann</surname> <given-names>H.</given-names></name> <name><surname>Vinckenbosch</surname> <given-names>N.</given-names></name> <name><surname>Long</surname> <given-names>M.</given-names></name></person-group> (<year>2009</year>). <article-title>RNA-based gene duplication: mechanistic and evolutionary insights.</article-title> <source><italic>Nat. Rev. Genet.</italic></source> <volume>10</volume> <fpage>19</fpage>&#x2013;<lpage>31</lpage>. <pub-id pub-id-type="doi">10.1038/nrg2487</pub-id> <pub-id pub-id-type="pmid">19030023</pub-id></citation></ref>
<ref id="B29"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kataoka</surname> <given-names>K.</given-names></name> <name><surname>Matsushima</surname> <given-names>T.</given-names></name> <name><surname>Ito</surname> <given-names>Y.</given-names></name> <name><surname>Sato</surname> <given-names>T.</given-names></name> <name><surname>Yokoyama</surname> <given-names>S.</given-names></name> <name><surname>Asahara</surname> <given-names>H.</given-names></name></person-group> (<year>2018</year>). <article-title>Bhlha9 regulates apical ectodermal ridge formation during limb development.</article-title> <source><italic>J. Bone Miner. Metab.</italic></source> <volume>36</volume> <fpage>64</fpage>&#x2013;<lpage>72</lpage>. <pub-id pub-id-type="doi">10.1007/s00774-017-0820-0</pub-id> <pub-id pub-id-type="pmid">28324176</pub-id></citation></ref>
<ref id="B30"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Koonin</surname> <given-names>E. V.</given-names></name></person-group> (<year>2007</year>). <article-title>The biological big bang model for the major transitions in evolution.</article-title> <source><italic>Biol. Direct</italic></source> <volume>2</volume>:<issue>21</issue>. <pub-id pub-id-type="doi">10.1186/1745-6150-2-21</pub-id> <pub-id pub-id-type="pmid">17708768</pub-id></citation></ref>
<ref id="B31"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lechner</surname> <given-names>M.</given-names></name> <name><surname>Findei&#x00DF;</surname> <given-names>S.</given-names></name> <name><surname>Steiner</surname> <given-names>L.</given-names></name> <name><surname>Marz</surname> <given-names>M.</given-names></name> <name><surname>Stadler</surname> <given-names>P. F.</given-names></name> <name><surname>Prohaska</surname> <given-names>S. J.</given-names></name></person-group> (<year>2011</year>). <article-title>Proteinortho: detection of (Co-) orthologs in large-scale analysis.</article-title> <source><italic>BMC Bioinformatics</italic></source> <volume>12</volume>:<issue>124</issue>.</citation></ref>
<ref id="B32"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lechner</surname> <given-names>M.</given-names></name> <name><surname>Hernandez-Rosales</surname> <given-names>M.</given-names></name> <name><surname>Doerr</surname> <given-names>D.</given-names></name> <name><surname>Wieseke</surname> <given-names>N.</given-names></name> <name><surname>Th&#x00E9;venin</surname> <given-names>A.</given-names></name> <name><surname>Stoye</surname> <given-names>J.</given-names></name><etal/></person-group> (<year>2014</year>). <article-title>Orthology detection combining clustering and synteny for very large datasets.</article-title> <source><italic>PLoS One</italic></source> <volume>9</volume>:<issue>e105015</issue>. <pub-id pub-id-type="doi">10.1371/journal.pone.0105015</pub-id> <pub-id pub-id-type="pmid">25137074</pub-id></citation></ref>
<ref id="B33"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>Q.</given-names></name> <name><surname>Ye</surname> <given-names>L.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Wang</surname> <given-names>M.</given-names></name> <name><surname>Lin</surname> <given-names>C.</given-names></name> <name><surname>Huang</surname> <given-names>S.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>FZD8, a Target of P53, promotes bone metastasis in prostate cancer by activating canonical Wnt/&#x03B2;-Catenin signaling.</article-title> <source><italic>Cancer Lett.</italic></source> <volume>402</volume> <fpage>166</fpage>&#x2013;<lpage>176</lpage>. <pub-id pub-id-type="doi">10.1016/j.canlet.2017.05.029</pub-id> <pub-id pub-id-type="pmid">28602974</pub-id></citation></ref>
<ref id="B34"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>Q.</given-names></name> <name><surname>Zhang</surname> <given-names>L.</given-names></name> <name><surname>Jiang</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>Q.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>CDK1 and CCNB1 as potential diagnostic markers of rhabdomyosarcoma: validation following bioinformatics analysis.</article-title> <source><italic>BMC Med. Genomics</italic></source> <volume>12</volume>:<issue>198</issue>.</citation></ref>
<ref id="B35"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liew</surname> <given-names>P. L.</given-names></name> <name><surname>Huang</surname> <given-names>R. L.</given-names></name> <name><surname>Wu</surname> <given-names>T. I.</given-names></name> <name><surname>Liao</surname> <given-names>C. C.</given-names></name> <name><surname>Chen</surname> <given-names>C. W.</given-names></name> <name><surname>Su</surname> <given-names>P. H.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>Combined genetic mutations and DNA-methylated genes as biomarkers for endometrial cancer detection from cervical scrapings.</article-title> <source><italic>Clin. Epigenetics</italic></source> <volume>11</volume>:<issue>170</issue>.</citation></ref>
<ref id="B36"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>X. Y.</given-names></name> <name><surname>Fan</surname> <given-names>Y. C.</given-names></name> <name><surname>Gao</surname> <given-names>S.</given-names></name> <name><surname>Zhao</surname> <given-names>J.</given-names></name> <name><surname>Chen</surname> <given-names>L. Y.</given-names></name> <name><surname>Li</surname> <given-names>F.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>Methylation of SOX1 and VIM promoters in serum as potential biomarkers for hepatocellular carcinoma.</article-title> <source><italic>Neoplasma</italic></source> <volume>64</volume> <fpage>745</fpage>&#x2013;<lpage>753</lpage>. <pub-id pub-id-type="doi">10.4149/neo_2017_513</pub-id></citation></ref>
<ref id="B37"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Logsdon</surname> <given-names>J. M.</given-names> <suffix>Jr.</suffix></name></person-group> (<year>1998</year>). <article-title>The recent origins of spliceosomal introns revisited.</article-title> <source><italic>Curr. Opin. Genet. Dev.</italic></source> <volume>8</volume> <fpage>637</fpage>&#x2013;<lpage>648</lpage>. <pub-id pub-id-type="doi">10.1016/s0959-437x(98)80031-2</pub-id></citation></ref>
<ref id="B38"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Loh</surname> <given-names>Y. H.</given-names></name> <name><surname>Brenner</surname> <given-names>S.</given-names></name> <name><surname>Venkatesh</surname> <given-names>B.</given-names></name></person-group> (<year>2008</year>). <article-title>Investigation of Loss and gain of introns in the compact genomes of pufferfishes (Fugu and Tetraodon).</article-title> <source><italic>Mol. Biol. Evol.</italic></source> <volume>25</volume> <fpage>526</fpage>&#x2013;<lpage>535</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msm278</pub-id> <pub-id pub-id-type="pmid">18089580</pub-id></citation></ref>
<ref id="B39"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Louhichi</surname> <given-names>A.</given-names></name> <name><surname>Fourati</surname> <given-names>A.</given-names></name> <name><surname>Reba&#x00EF;</surname> <given-names>A.</given-names></name></person-group> (<year>2011</year>). <article-title>IGD: a resource for intronless genes in the human genome.</article-title> <source><italic>Gene</italic></source> <volume>488</volume> <fpage>35</fpage>&#x2013;<lpage>40</lpage>. <pub-id pub-id-type="doi">10.1016/j.gene.2011.08.013</pub-id> <pub-id pub-id-type="pmid">21914464</pub-id></citation></ref>
<ref id="B40"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Magadum</surname> <given-names>S.</given-names></name> <name><surname>Banerjee</surname> <given-names>U.</given-names></name> <name><surname>Murugan</surname> <given-names>P.</given-names></name> <name><surname>Gangapur</surname> <given-names>D.</given-names></name> <name><surname>Ravikesavan</surname> <given-names>R.</given-names></name></person-group> (<year>2013</year>). <article-title>Gene duplication as a major force in evolution.</article-title> <source><italic>J. Genet.</italic></source> <volume>92</volume> <fpage>155</fpage>&#x2013;<lpage>161</lpage>. <pub-id pub-id-type="doi">10.1007/s12041-013-0212-8</pub-id> <pub-id pub-id-type="pmid">23640422</pub-id></citation></ref>
<ref id="B41"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mah</surname> <given-names>K. M.</given-names></name> <name><surname>Weiner</surname> <given-names>R. A.</given-names></name></person-group> (<year>2017</year>). <article-title>Regulation of Wnt signaling by protocadherins.</article-title> <source><italic>Semin. Cell Dev. Biol.</italic></source> <volume>69</volume> <fpage>158</fpage>&#x2013;<lpage>171</lpage>. <pub-id pub-id-type="doi">10.1016/j.semcdb.2017.07.043</pub-id> <pub-id pub-id-type="pmid">28774578</pub-id></citation></ref>
<ref id="B42"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mattick</surname> <given-names>J. S.</given-names></name></person-group> (<year>1994</year>). <article-title>Introns: evolution and function.</article-title> <source><italic>Curr. Opin. Genet. Dev.</italic></source> <volume>4</volume> <fpage>823</fpage>&#x2013;<lpage>831</lpage>. <pub-id pub-id-type="doi">10.1016/0959-437x(94)90066-3</pub-id></citation></ref>
<ref id="B43"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Muley</surname> <given-names>V. Y.</given-names></name> <name><surname>L&#x00F3;pez-Victorio</surname> <given-names>C. J.</given-names></name> <name><surname>Ayala-Sumuano</surname> <given-names>J. T.</given-names></name> <name><surname>Gonz&#x00E1;lez-Gallardo</surname> <given-names>A.</given-names></name> <name><surname>Gonz&#x00E1;lez-Santos</surname> <given-names>L.</given-names></name> <name><surname>Lozano-Flores</surname> <given-names>C.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Conserved and divergent expression dynamics during early patterning of the telencephalon in mouse and chick embryos.</article-title> <source><italic>Prog. Neurobiol.</italic></source> <volume>186</volume>:<issue>101735</issue>. <pub-id pub-id-type="doi">10.1016/j.pneurobio.2019.101735</pub-id> <pub-id pub-id-type="pmid">31846713</pub-id></citation></ref>
<ref id="B44"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Murillo-Garz&#x00F3;n</surname> <given-names>V.</given-names></name> <name><surname>Gorro&#x00F1;o-Etxebarria</surname> <given-names>I.</given-names></name> <name><surname>&#x00C5;kerfelt</surname> <given-names>M.</given-names></name> <name><surname>Puustinen</surname> <given-names>M. C.</given-names></name> <name><surname>Sistonen</surname> <given-names>L.</given-names></name> <name><surname>Nees</surname> <given-names>M.</given-names></name><etal/></person-group> (<year>2018</year>). <article-title>Frizzled-8 integrates Wnt-11 and transforming growth factor-&#x03B2; signaling in prostate cancer.</article-title> <source><italic>Nat. Commun.</italic></source> <volume>9</volume>:<issue>1747</issue>.</citation></ref>
<ref id="B45"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Noonan</surname> <given-names>J. P.</given-names></name> <name><surname>Grimwood</surname> <given-names>J.</given-names></name> <name><surname>Schmutz</surname> <given-names>J.</given-names></name> <name><surname>Dickson</surname> <given-names>M.</given-names></name> <name><surname>Myers</surname> <given-names>R. M.</given-names></name></person-group> (<year>2004</year>). <article-title>Gene conversion and the evolution of protocadherin gene cluster diversity.</article-title> <source><italic>Genome Res.</italic></source> <volume>14</volume> <fpage>354</fpage>&#x2013;<lpage>366</lpage>. <pub-id pub-id-type="doi">10.1101/gr.2133704</pub-id> <pub-id pub-id-type="pmid">14993203</pub-id></citation></ref>
<ref id="B46"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ohki</surname> <given-names>R.</given-names></name> <name><surname>Nemoto</surname> <given-names>J.</given-names></name> <name><surname>Murasawa</surname> <given-names>H.</given-names></name> <name><surname>Oda</surname> <given-names>E.</given-names></name> <name><surname>Inazawa</surname> <given-names>J.</given-names></name> <name><surname>Tanaka</surname> <given-names>N.</given-names></name><etal/></person-group> (<year>2000</year>). <article-title>Reprimo, a new candidate mediator of the P53-mediated cell cycle arrest at the G2 phase.</article-title> <source><italic>J. Biol. Chem.</italic></source> <volume>275</volume> <fpage>22627</fpage>&#x2013;<lpage>22630</lpage>. <pub-id pub-id-type="doi">10.1074/jbc.c000235200</pub-id> <pub-id pub-id-type="pmid">10930422</pub-id></citation></ref>
<ref id="B47"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pan</surname> <given-names>D.</given-names></name> <name><surname>Zhang</surname> <given-names>L.</given-names></name></person-group> (<year>2008</year>). <article-title>Tandemly arrayed genes in vertebrate genomes.</article-title> <source><italic>Comp. Funct. Genomics</italic></source> <volume>2008</volume>:<issue>545269</issue>.</citation></ref>
<ref id="B48"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Qi</surname> <given-names>Q.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Shen</surname> <given-names>L.</given-names></name> <name><surname>Wang</surname> <given-names>R.</given-names></name> <name><surname>Zhou</surname> <given-names>J.</given-names></name> <name><surname>L&#x00FC;</surname> <given-names>H.</given-names></name><etal/></person-group> (<year>2016</year>). <article-title>Olig1 Expression pattern in neural cells during rat spinal cord development.</article-title> <source><italic>Neuropsychiatr. Dis. Treat.</italic></source> <volume>12</volume> <fpage>909</fpage>&#x2013;<lpage>916</lpage>. <pub-id pub-id-type="doi">10.2147/ndt.s99257</pub-id> <pub-id pub-id-type="pmid">27143892</pub-id></citation></ref>
<ref id="B49"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Roy</surname> <given-names>S. W.</given-names></name> <name><surname>Gilbert</surname> <given-names>W.</given-names></name></person-group> (<year>2006</year>). <article-title>The evolution of spliceosomal introns: patterns, puzzles and progress.</article-title> <source><italic>Nat. Rev. Genet.</italic></source> <volume>7</volume> <fpage>211</fpage>&#x2013;<lpage>221</lpage>. <pub-id pub-id-type="doi">10.1038/nrg1807</pub-id> <pub-id pub-id-type="pmid">16485020</pub-id></citation></ref>
<ref id="B50"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Roy</surname> <given-names>S. W.</given-names></name> <name><surname>Fedorov</surname> <given-names>A.</given-names></name> <name><surname>Gilbert</surname> <given-names>W.</given-names></name></person-group> (<year>2003</year>). <article-title>Large-Scale comparison of intron positions in mammalian genes shows intron loss but no gain.</article-title> <source><italic>Proc. Natl. Acad. Sci. U.S.A.</italic></source> <volume>100</volume> <fpage>7158</fpage>&#x2013;<lpage>7162</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1232297100</pub-id> <pub-id pub-id-type="pmid">12777620</pub-id></citation></ref>
<ref id="B51"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sakharkar</surname> <given-names>K. R.</given-names></name> <name><surname>Sakharkar</surname> <given-names>M. K.</given-names></name> <name><surname>Culiat</surname> <given-names>C. T.</given-names></name> <name><surname>Chow</surname> <given-names>V. T.</given-names></name> <name><surname>Pervaiz</surname> <given-names>S.</given-names></name></person-group> (<year>2006</year>). <article-title>Functional and evolutionary analyses on expressed intronless genes in the mouse genome.</article-title> <source><italic>FEBS Lett.</italic></source> <volume>580</volume> <fpage>1472</fpage>&#x2013;<lpage>1478</lpage>. <pub-id pub-id-type="doi">10.1016/j.febslet.2006.01.070</pub-id> <pub-id pub-id-type="pmid">16469316</pub-id></citation></ref>
<ref id="B52"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sakharkar</surname> <given-names>M. K.</given-names></name> <name><surname>Chow</surname> <given-names>V. T.</given-names></name> <name><surname>Ghosh</surname> <given-names>K.</given-names></name> <name><surname>Chaturvedi</surname> <given-names>I.</given-names></name> <name><surname>Lee</surname> <given-names>P. C.</given-names></name> <name><surname>Bagavathi</surname> <given-names>S. P.</given-names></name><etal/></person-group> (<year>2005a</year>). <article-title>Computational prediction of SEG (Single Exon Gene) function in humans.</article-title> <source><italic>Front. Biosci.</italic></source> <volume>10</volume>:<fpage>1382</fpage>&#x2013;<lpage>1395</lpage>. <pub-id pub-id-type="doi">10.2741/1627</pub-id> <pub-id pub-id-type="pmid">15769633</pub-id></citation></ref>
<ref id="B53"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sakharkar</surname> <given-names>M. K.</given-names></name> <name><surname>Kangueane</surname> <given-names>P.</given-names></name> <name><surname>Petrov</surname> <given-names>D. A.</given-names></name> <name><surname>Kolaskar</surname> <given-names>A. S.</given-names></name> <name><surname>Subbiah</surname> <given-names>S.</given-names></name></person-group> (<year>2002</year>). <article-title>&#x201C;SEGE: a databasse on &#x2018;intron less/single exonic&#x2019; genes from eukaryotes.</article-title> <source><italic>Bioinformatics</italic></source> <volume>18</volume> <fpage>1266</fpage>&#x2013;<lpage>1267</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/18.9.1266</pub-id> <pub-id pub-id-type="pmid">12217920</pub-id></citation></ref>
<ref id="B54"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sakharkar</surname> <given-names>M. K.</given-names></name> <name><surname>Perumal</surname> <given-names>B. S.</given-names></name> <name><surname>Lim</surname> <given-names>Y. P.</given-names></name> <name><surname>Chern</surname> <given-names>L. P.</given-names></name> <name><surname>Yu</surname> <given-names>Y.</given-names></name> <name><surname>Kangueane</surname> <given-names>P.</given-names></name></person-group> (<year>2005b</year>). <article-title>Alternatively spliced human genes by exon skipping&#x2013;a database (ASHESdb).</article-title> <source><italic>In Silico Biol.</italic></source> <volume>5</volume> <fpage>221</fpage>&#x2013;<lpage>225</lpage>.</citation></ref>
<ref id="B55"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Snijders Blok</surname> <given-names>L.</given-names></name> <name><surname>Kleefstra</surname> <given-names>T.</given-names></name> <name><surname>Venselaar</surname> <given-names>H.</given-names></name> <name><surname>Maas</surname> <given-names>S.</given-names></name> <name><surname>Kroes</surname> <given-names>H. Y.</given-names></name> <name><surname>Lachmeijer</surname> <given-names>A. M. A.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>De novo variants disturbing the transactivation capacity of POU3F3 cause a characteristic neurodevelopmental disorder.</article-title> <source><italic>Am. J. Hum. Genet.</italic></source> <volume>105</volume> <fpage>403</fpage>&#x2013;<lpage>412</lpage>. <pub-id pub-id-type="doi">10.1016/j.ajhg.2019.06.007</pub-id> <pub-id pub-id-type="pmid">31303265</pub-id></citation></ref>
<ref id="B56"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Song</surname> <given-names>Z.</given-names></name> <name><surname>Jadali</surname> <given-names>A.</given-names></name> <name><surname>Fritzsch</surname> <given-names>B.</given-names></name> <name><surname>Kwan</surname> <given-names>K. Y.</given-names></name></person-group> (<year>2017</year>). <article-title>NEUROG1 regulates CDK2 to promote proliferation in otic progenitors.</article-title> <source><italic>Stem Cell Rep.</italic></source> <volume>9</volume> <fpage>1516</fpage>&#x2013;<lpage>1529</lpage>. <pub-id pub-id-type="doi">10.1016/j.stemcr.2017.09.011</pub-id> <pub-id pub-id-type="pmid">29033307</pub-id></citation></ref>
<ref id="B57"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Staudt</surname> <given-names>A. C.</given-names></name> <name><surname>Wenkel</surname> <given-names>S.</given-names></name></person-group> (<year>2011</year>). <article-title>Regulation of protein function by &#x2018;MicroProteins.&#x2019;.</article-title> <source><italic>EMBO Rep.</italic></source> <volume>12</volume> <fpage>35</fpage>&#x2013;<lpage>42</lpage>. <pub-id pub-id-type="doi">10.1038/embor.2010.196</pub-id> <pub-id pub-id-type="pmid">21151039</pub-id></citation></ref>
<ref id="B58"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Stoltzfus</surname> <given-names>A.</given-names></name></person-group> (<year>1994</year>). <article-title>Origin of introns-early or late?</article-title> <source><italic>Nature</italic></source> <volume>369</volume> <fpage>526</fpage>&#x2013;<lpage>527</lpage>. <pub-id pub-id-type="doi">10.1038/369526b0</pub-id> <pub-id pub-id-type="pmid">8202153</pub-id></citation></ref>
<ref id="B59"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Stoltzfus</surname> <given-names>A.</given-names></name> <name><surname>Spencer</surname> <given-names>D. F.</given-names></name> <name><surname>Zuker</surname> <given-names>M.</given-names></name> <name><surname>Logsdon</surname> <given-names>J. M.</given-names></name> <name><surname>Doolittle</surname> <given-names>W. F.</given-names></name></person-group> (<year>1994</year>). <article-title>Testing the exon theory of genes: the evidence from protein structure.</article-title> <source><italic>Science</italic></source> <volume>265</volume> <fpage>202</fpage>&#x2013;<lpage>207</lpage>. <pub-id pub-id-type="doi">10.1126/science.8023140</pub-id> <pub-id pub-id-type="pmid">8023140</pub-id></citation></ref>
<ref id="B60"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Straub</surname> <given-names>D.</given-names></name> <name><surname>Wenkel</surname> <given-names>S.</given-names></name></person-group> (<year>2017</year>). <article-title>Cross-species genome wide identification of evolutionary conserved microproteins.</article-title> <source><italic>Genome Biol. Evol.</italic></source> <volume>9</volume> <fpage>777</fpage>&#x2013;<lpage>789</lpage>. <pub-id pub-id-type="doi">10.1093/gbe/evx041</pub-id> <pub-id pub-id-type="pmid">28338802</pub-id></citation></ref>
<ref id="B61"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sunahara</surname> <given-names>R. K.</given-names></name> <name><surname>Niznik</surname> <given-names>H. B.</given-names></name> <name><surname>Weiner</surname> <given-names>D. M.</given-names></name> <name><surname>Stormann</surname> <given-names>T. M.</given-names></name> <name><surname>Brann</surname> <given-names>M. R.</given-names></name> <name><surname>Kennedy</surname> <given-names>J. L.</given-names></name><etal/></person-group> (<year>1990</year>). <article-title>Human dopamine D 1 receptor encoded by an intronless gene on chromosome 5.</article-title> <source><italic>Nature</italic></source> <volume>347</volume> <fpage>80</fpage>&#x2013;<lpage>83</lpage>. <pub-id pub-id-type="doi">10.1038/347080a0</pub-id> <pub-id pub-id-type="pmid">1975640</pub-id></citation></ref>
<ref id="B62"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Supek</surname> <given-names>F.</given-names></name> <name><surname>Bo&#x0161;njak</surname> <given-names>M.</given-names></name> <name><surname>&#x0160;kunca</surname> <given-names>N.</given-names></name> <name><surname>&#x0160;muc</surname> <given-names>T.</given-names></name></person-group> (<year>2011</year>). <article-title>REVIGO Summarizes and visualizes long lists of gene ontology terms.</article-title> <source><italic>PLoS One</italic></source> <volume>6</volume>:<issue>e21800</issue>. <pub-id pub-id-type="doi">10.1371/journal.pone.0021800</pub-id> <pub-id pub-id-type="pmid">21789182</pub-id></citation></ref>
<ref id="B63"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tantin</surname> <given-names>D.</given-names></name></person-group> (<year>2013</year>). <article-title>Oct transcription factors in development and stem cells: insights and mechanisms.</article-title> <source><italic>Development</italic></source> <volume>140</volume> <fpage>2857</fpage>&#x2013;<lpage>2866</lpage>. <pub-id pub-id-type="doi">10.1242/dev.095927</pub-id> <pub-id pub-id-type="pmid">23821033</pub-id></citation></ref>
<ref id="B64"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tedder</surname> <given-names>M.</given-names></name> <name><surname>Corneil</surname> <given-names>D.</given-names></name> <name><surname>Habib</surname> <given-names>M.</given-names></name> <name><surname>Paul</surname> <given-names>C.</given-names></name></person-group> (<year>2008</year>). &#x201C;<article-title>Simpler linear-time modular decomposition via recursive factorizing permutations</article-title>,&#x201D; in <source><italic>International Colloquium on Automata, Languages, and Programming</italic></source>, <role>eds</role> <person-group person-group-type="editor"><name><surname>Aceto</surname> <given-names>L.</given-names></name> <name><surname>Damg&#x00E5;rd</surname> <given-names>I.</given-names></name> <name><surname>Goldberg</surname> <given-names>L. A.</given-names></name> <name><surname>Halld&#x00F3;rsson</surname> <given-names>M. M.</given-names></name> <name><surname>Ing&#x00F3;lfsd&#x00F3;ttir</surname> <given-names>A.</given-names></name> <name><surname>Walukiewicz</surname> <given-names>I.</given-names></name></person-group> (<publisher-loc>Berlin</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>634</fpage>&#x2013;<lpage>645</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-540-70575-8_52</pub-id></citation></ref>
<ref id="B65"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tine</surname> <given-names>M.</given-names></name> <name><surname>Kuhl</surname> <given-names>H.</given-names></name> <name><surname>Beck</surname> <given-names>A.</given-names></name> <name><surname>Bargelloni</surname> <given-names>L.</given-names></name> <name><surname>Reinhardt</surname> <given-names>R.</given-names></name></person-group> (<year>2011</year>). <article-title>Comparative Analysis of intronless genes in teleost fish genomes: insights into their evolution and molecular function.</article-title> <source><italic>Mar. Genomics</italic></source> <volume>4</volume> <fpage>109</fpage>&#x2013;<lpage>119</lpage>. <pub-id pub-id-type="doi">10.1016/j.margen.2011.03.004</pub-id> <pub-id pub-id-type="pmid">21620332</pub-id></citation></ref>
<ref id="B66"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Uversky</surname> <given-names>V. N.</given-names></name></person-group> (<year>2015</year>). <article-title>Functional roles of transiently and intrinsically disordered regions within proteins.</article-title> <source><italic>FEBS J.</italic></source> <volume>282</volume> <fpage>1182</fpage>&#x2013;<lpage>1189</lpage>. <pub-id pub-id-type="doi">10.1111/febs.13202</pub-id> <pub-id pub-id-type="pmid">25631540</pub-id></citation></ref>
<ref id="B67"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Venkatesh</surname> <given-names>B.</given-names></name> <name><surname>Lee</surname> <given-names>A. P.</given-names></name> <name><surname>Ravi</surname> <given-names>V.</given-names></name> <name><surname>Maurya</surname> <given-names>A. K.</given-names></name> <name><surname>Lian</surname> <given-names>M. M.</given-names></name> <name><surname>Swann</surname> <given-names>J. B.</given-names></name><etal/></person-group> (<year>2014</year>). <article-title>Elephant shark genome provides unique insights into gnathostome evolution.</article-title> <source><italic>Nature</italic></source> <volume>505</volume> <fpage>174</fpage>&#x2013;<lpage>179</lpage>. <pub-id pub-id-type="doi">10.1038/nature12826</pub-id> <pub-id pub-id-type="pmid">24402279</pub-id></citation></ref>
<ref id="B68"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>C. Y.</given-names></name> <name><surname>Shahi</surname> <given-names>P.</given-names></name> <name><surname>Huang</surname> <given-names>J. T.</given-names></name> <name><surname>Phan</surname> <given-names>N. N.</given-names></name> <name><surname>Sun</surname> <given-names>Z.</given-names></name> <name><surname>Lin</surname> <given-names>Y. C.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>Systematic analysis of the achaete-scute complex-like gene signature in clinical cancer patients.</article-title> <source><italic>Mol. Clin. Oncol.</italic></source> <volume>6</volume> <fpage>7</fpage>&#x2013;<lpage>18</lpage>. <pub-id pub-id-type="doi">10.3892/mco.2016.1094</pub-id> <pub-id pub-id-type="pmid">28123722</pub-id></citation></ref>
<ref id="B69"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>Z.</given-names></name> <name><surname>Shu</surname> <given-names>W.</given-names></name> <name><surname>Lu</surname> <given-names>M. M.</given-names></name> <name><surname>Morrisey</surname> <given-names>E. E.</given-names></name></person-group> (<year>2005</year>). <article-title>Wnt7b activates canonical signaling in epithelial and vascular smooth muscle cells through interactions with Fzd1, Fzd10, and LRP5.</article-title> <source><italic>Mol. Cell. Biol.</italic></source> <volume>25</volume> <fpage>5022</fpage>&#x2013;<lpage>5030</lpage>. <pub-id pub-id-type="doi">10.1128/mcb.25.12.5022-5030.2005</pub-id> <pub-id pub-id-type="pmid">15923619</pub-id></citation></ref>
<ref id="B70"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Witze</surname> <given-names>E. S.</given-names></name> <name><surname>Old</surname> <given-names>W. M.</given-names></name> <name><surname>Resing</surname> <given-names>K. A.</given-names></name> <name><surname>Ahn</surname> <given-names>N. G.</given-names></name></person-group> (<year>2007</year>). <article-title>Mapping protein post-translational modifications with mass spectrometry.</article-title> <source><italic>Nat. Methods</italic></source> <volume>4</volume> <fpage>798</fpage>&#x2013;<lpage>806</lpage>.</citation></ref>
<ref id="B71"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Woods</surname> <given-names>S. M.</given-names></name> <name><surname>Mountjoy</surname> <given-names>E.</given-names></name> <name><surname>Muir</surname> <given-names>D.</given-names></name> <name><surname>Ross</surname> <given-names>S. E.</given-names></name> <name><surname>Atan</surname> <given-names>D.</given-names></name></person-group> (<year>2018</year>). <article-title>A comparative analysis of rod bipolar cell transcriptomes identifies novel genes implicated in night vision.</article-title> <source><italic>Sci. Rep.</italic></source> <volume>8</volume>:<issue>5506</issue>.</citation></ref>
<ref id="B72"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yan</surname> <given-names>H.</given-names></name> <name><surname>Zhang</surname> <given-names>W.</given-names></name> <name><surname>Lin</surname> <given-names>Y.</given-names></name> <name><surname>Dong</surname> <given-names>Q.</given-names></name> <name><surname>Peng</surname> <given-names>X.</given-names></name> <name><surname>Jiang</surname> <given-names>H.</given-names></name><etal/></person-group> (<year>2014</year>). <article-title>Different evolutionary patterns among intronless genes in maize genome.</article-title> <source><italic>Biochem. Biophys. Res. Commun.</italic></source> <volume>449</volume> <fpage>146</fpage>&#x2013;<lpage>150</lpage>. <pub-id pub-id-type="doi">10.1016/j.bbrc.2014.05.008</pub-id> <pub-id pub-id-type="pmid">24820954</pub-id></citation></ref>
<ref id="B73"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>G.</given-names></name> <name><surname>Wang</surname> <given-names>L. G.</given-names></name> <name><surname>Han</surname> <given-names>Y.</given-names></name> <name><surname>He</surname> <given-names>Q. Y.</given-names></name></person-group> (<year>2012</year>). <article-title>ClusterProfiler: an R package for comparing biological themes among gene clusters.</article-title> <source><italic>Omics</italic></source> <volume>16</volume> <fpage>284</fpage>&#x2013;<lpage>287</lpage>. <pub-id pub-id-type="doi">10.1089/omi.2011.0118</pub-id> <pub-id pub-id-type="pmid">22455463</pub-id></citation></ref>
<ref id="B74"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>W. P.</given-names></name> <name><surname>Rajasegaran</surname> <given-names>V.</given-names></name> <name><surname>Yew</surname> <given-names>K.</given-names></name> <name><surname>Loh</surname> <given-names>W. L.</given-names></name> <name><surname>Tay</surname> <given-names>B. H.</given-names></name> <name><surname>Amemiya</surname> <given-names>C. T.</given-names></name><etal/></person-group> (<year>2008</year>). <article-title>Elephant shark sequence reveals unique insights into the evolutionary history of vertebrate genes: a comparative analysis of the protocadherin cluster.</article-title> <source><italic>Proc. Natl. Acad. Sci. U.S.A.</italic></source> <volume>105</volume> <fpage>3819</fpage>&#x2013;<lpage>3824</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.0800398105</pub-id> <pub-id pub-id-type="pmid">18319338</pub-id></citation></ref>
<ref id="B75"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zou</surname> <given-names>M.</given-names></name> <name><surname>Guo</surname> <given-names>B.</given-names></name> <name><surname>He</surname> <given-names>S.</given-names></name></person-group> (<year>2011</year>). <article-title>The roles and evolutionary patterns of intronless genes in deuterostomes.</article-title> <source><italic>Comp. Funct. Genomics</italic></source> <volume>2011</volume>:<issue>680673</issue>.</citation></ref>
</ref-list>
<fn-group>
<fn id="footnote1">
<label>1</label>
<p><ext-link ext-link-type="uri" xlink:href="https://github.com/GEmilioHO/intronless_genes">https://github.com/GEmilioHO/intronless_genes</ext-link></p></fn>
<fn id="footnote2">
<label>2</label>
<p><ext-link ext-link-type="uri" xlink:href="http://rest.ensembl.org">http://rest.ensembl.org</ext-link></p></fn>
<fn id="footnote3">
<label>3</label>
<p><ext-link ext-link-type="uri" xlink:href="http://www.nextgenbioinformatics.org/IntronDB/">http://www.nextgenbioinformatics.org/IntronDB/</ext-link></p></fn>
<fn id="footnote4">
<label>4</label>
<p><ext-link ext-link-type="uri" xlink:href="http://supfam.org/">http://supfam.org/</ext-link></p></fn>
<fn id="footnote5">
<label>5</label>
<p><ext-link ext-link-type="uri" xlink:href="https://pfam.xfam.org/">https://pfam.xfam.org/</ext-link></p></fn>
<fn id="footnote6">
<label>6</label>
<p><ext-link ext-link-type="uri" xlink:href="https://prosite.expasy.org/">https://prosite.expasy.org/</ext-link></p></fn>
<fn id="footnote7">
<label>7</label>
<p><ext-link ext-link-type="uri" xlink:href="http://metascape.org/">http://metascape.org/</ext-link></p></fn>
<fn id="footnote8">
<label>8</label>
<p><ext-link ext-link-type="uri" xlink:href="http://amigo.geneontology.org/amigo/landing">http://amigo.geneontology.org/amigo/landing</ext-link></p></fn>
<fn id="footnote9">
<label>9</label>
<p><ext-link ext-link-type="uri" xlink:href="http://dbptm.mbc.nctu.edu.tw">http://dbptm.mbc.nctu.edu.tw</ext-link></p></fn>
<fn id="footnote10">
<label>10</label>
<p><ext-link ext-link-type="uri" xlink:href="https://github.com/DaStraub/miPFinder">https://github.com/DaStraub/miPFinder</ext-link></p></fn>
<fn id="footnote11">
<label>11</label>
<p><ext-link ext-link-type="uri" xlink:href="https://www.ensembl.org/index.html">https://www.ensembl.org/index.html</ext-link></p></fn>
<fn id="footnote12">
<label>12</label>
<p><ext-link ext-link-type="uri" xlink:href="https://bgee.org/">https://bgee.org/</ext-link></p></fn>
</fn-group>
</back>
</article>