<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Bioinform.</journal-id>
<journal-title>Frontiers in Bioinformatics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Bioinform.</abbrev-journal-title>
<issn pub-type="epub">2673-7647</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1491735</article-id>
<article-id pub-id-type="doi">10.3389/fbinf.2025.1491735</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Bioinformatics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Divergent evolution of low-complexity regions in the vertebrate CPEB protein family</article-title>
<alt-title alt-title-type="left-running-head">Vaglietti et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fbinf.2025.1491735">10.3389/fbinf.2025.1491735</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Vaglietti</surname>
<given-names>Serena</given-names>
</name>
<uri xlink:href="https://loop.frontiersin.org/people/2836461/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Boggio Bozzo</surname>
<given-names>Stefania</given-names>
</name>
<uri xlink:href="https://loop.frontiersin.org/people/2835623/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ghirardi</surname>
<given-names>Mirella</given-names>
</name>
<uri xlink:href="https://loop.frontiersin.org/people/1033736/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Fiumara</surname>
<given-names>Ferdinando</given-names>
</name>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/191580/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff>
<institution>&#x201c;Rita Levi-Montalcini&#x201d; Department of Neuroscience</institution>, <institution>University of Turin</institution>, <addr-line>Turin</addr-line>, <country>Italy</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/861429/overview">Fabia Ursula Battistuzzi</ext-link>, Oakland University, United States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/676616/overview">Soham Sengupta</ext-link>, St. Jude Children&#x2019;s Research Hospital, United States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2232236/overview">Jack M. Craig</ext-link>, Temple University, United States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2254760/overview">Marcel Van Tuinen</ext-link>, University of Groningen, Netherlands</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Ferdinando Fiumara, <email>ferdinando.fiumara@unito.it</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>20</day>
<month>03</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>5</volume>
<elocation-id>1491735</elocation-id>
<history>
<date date-type="received">
<day>05</day>
<month>09</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>28</day>
<month>01</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Vaglietti, Boggio Bozzo, Ghirardi and Fiumara.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Vaglietti, Boggio Bozzo, Ghirardi and Fiumara</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>The <italic>cytoplasmic polyadenylation element-binding proteins</italic> (CPEBs) are a family of translational regulators involved in multiple biological processes, including memory-related synaptic plasticity. In vertebrates, four paralogous genes (CPEB1-4) encode proteins with phylogenetically conserved C-terminal RNA-binding domains and variable N-terminal regions (NTRs). The CPEB NTRs are characterized by low-complexity regions (LCRs), including homopolymeric amino acid repeats (AARs), and have been identified as mediators of liquid-liquid phase separation (LLPS) and prion-like aggregation. After their appearance following gene duplication, the four paralogous CPEB proteins functionally diverged in terms of activation mechanisms and modes of mRNA binding. The paralog-specific NTRs may have contributed substantially to such functional diversification but their evolutionary history remains largely unexplored. Here, we traced the evolution of vertebrate CPEBs and their LCRs/AARs focusing on primary sequence composition, complexity, repetitiveness, and their possible functional impact on LLPS propensity and prion-likeness. We initially defined these composition- and function-related quantitative parameters for the four human CPEB paralogs and then systematically analyzed their evolutionary variation across more than 500 species belonging to nine major clades of different stem age, from Chondrichthyes to Euarchontoglires, along the vertebrate lineage. We found that the four CPEB proteins display highly divergent, paralog-specific evolutionary trends in composition- and function-related parameters, primarily driven by variation in their LCRs/AARs and largely related to clade stem ages. These findings shed new light on the molecular and functional evolution of LCRs in the CPEB protein family, in both quantitative and qualitative terms, highlighting the emergence of CPEB2 as a proline-rich prion-like protein in younger vertebrate clades, including Primates.</p>
</abstract>
<kwd-group>
<kwd>cytoplasmic polyadenylation element-binding protein</kwd>
<kwd>CPEB proteins</kwd>
<kwd>liquid-liquid phase separation (LLPS)</kwd>
<kwd>prion-like proteins</kwd>
<kwd>low-complexity regions (LCRs)</kwd>
<kwd>homopolymeric amino acid repeats</kwd>
<kwd>divergent evolution</kwd>
<kwd>paralogous proteins</kwd>
</kwd-group>
<contract-sponsor id="cn001">Universit&#xe0; degli Studi di Torino<named-content content-type="fundref-id">10.13039/501100006692</named-content>
</contract-sponsor>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Evolutionary Bioinformatics</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Introduction</title>
<p>The <italic>cytoplasmic polyadenylation element-binding proteins</italic> (CPEBs) are a family of RNA-binding proteins regulating mRNA translation (<xref ref-type="bibr" rid="B80">Richter, 2007</xref>) involved in various cellular processes, ranging from translational activation in oocytes to the control of local protein synthesis in memory-related synaptic plasticity (<xref ref-type="bibr" rid="B80">Richter, 2007</xref>; <xref ref-type="bibr" rid="B41">Kandel, 2012</xref>; <xref ref-type="bibr" rid="B38">Huang et al., 2023</xref>), also through prion-like mechanisms (<xref ref-type="bibr" rid="B87">Si et al., 2003a</xref>; <xref ref-type="bibr" rid="B86">Si et al., 2003b</xref>; <xref ref-type="bibr" rid="B91">Stephan et al., 2015</xref>). CPEBs have also been implicated in the pathogenesis of several diseases, ranging from cancer to post-traumatic stress disorder (PTSD) and autism spectrum disorders (ASDs; <xref ref-type="bibr" rid="B43">Kozlov et al., 2021</xref>; <xref ref-type="bibr" rid="B53">Lu et al., 2021</xref>).</p>
<p>In vertebrates, four paralogous genes encode a family of proteins (CPEB1-4) each made of a conserved C-terminal region (CTR), with two RNA-recognition motifs (RRMs) and a zinc finger (ZnF) domain, and an N-terminal region (NTR) characterized by low-complexity regions (LCRs), including homopolymeric amino acid repeats (AARs), that vary quite extensively across CPEB paralogs and orthologs (<xref ref-type="bibr" rid="B104">Wang and Cooper, 2010</xref>; <xref ref-type="bibr" rid="B21">Fiumara et al., 2010</xref>). At the functional level, CPEBs can act both as repressors and activators of mRNA translation (<xref ref-type="bibr" rid="B80">Richter, 2007</xref>), switching between these two states through paralog-specific mechanisms, like phosphorylation or prion-like structural transitions (<xref ref-type="bibr" rid="B87">Si et al., 2003a</xref>; <xref ref-type="bibr" rid="B54">Majumdar et al., 2012</xref>; <xref ref-type="bibr" rid="B91">Stephan et al., 2015</xref>). The prion-like switch relies on a structural transition from a soluble to a fibrillary form enriched in &#x3b2;-sheets and/or coiled-coil structures in different CPEB orthologs (<xref ref-type="bibr" rid="B21">Fiumara et al., 2010</xref>; <xref ref-type="bibr" rid="B41">Kandel, 2012</xref>; <xref ref-type="bibr" rid="B42">Kandel et al., 2013</xref>; <xref ref-type="bibr" rid="B75">Raveendra et al., 2013</xref>; <xref ref-type="bibr" rid="B11">Cervantes et al., 2016</xref>; <xref ref-type="bibr" rid="B36">Hervas et al., 2020</xref>; <xref ref-type="bibr" rid="B35">Hervas et al., 2021</xref>; <xref ref-type="bibr" rid="B78">Reselammal et al., 2021</xref>; <xref ref-type="bibr" rid="B5">Bowler et al., 2022</xref>). These self-sustaining prion-like transitions have been attributed to LCRs, or &#x2018;prion-like&#x2019; domains (PrDs), in the NTRs of these proteins, (<xref ref-type="bibr" rid="B85">Si, 2015</xref>; <xref ref-type="bibr" rid="B87">Si et al., 2003a</xref>; <xref ref-type="bibr" rid="B34">Heinrich and Lindquist, 2011</xref>; <xref ref-type="bibr" rid="B91">Stephan et al., 2015</xref>; <xref ref-type="bibr" rid="B36">Hervas et al., 2020</xref>; <xref ref-type="bibr" rid="B35">Hervas et al., 2021</xref>; <xref ref-type="bibr" rid="B78">Reselammal et al., 2021</xref>). More recently, different CPEB orthologs have been shown to undergo liquid-liquid phase separation (LLPS; <xref ref-type="bibr" rid="B25">Ford et al., 2019</xref>; <xref ref-type="bibr" rid="B24">Ford et al., 2023</xref>; <xref ref-type="bibr" rid="B4">Ashami et al., 2021</xref>; <xref ref-type="bibr" rid="B19">Duran-Arqu&#xe9; et al., 2022</xref>; <xref ref-type="bibr" rid="B74">Ram&#xed;rez de Mingo et al, 2022</xref>; <xref ref-type="bibr" rid="B73">Ram&#xed;rez de Mingo et al., 2023</xref>), a biophysical process by which proteins assemble into transient &#x2018;condensates&#x2019; (e.g., <xref ref-type="bibr" rid="B100">Vaglietti et al., 2023</xref>). Notably, the ability of CPEB proteins to undergo LLPS has been also attributed to their N-terminal LCRs (<xref ref-type="bibr" rid="B19">Duran-Arqu&#xe9; et al., 2022</xref>; <xref ref-type="bibr" rid="B73">Ram&#xed;rez de Mingo et al., 2023</xref>).</p>
<p>CPEB genes appeared in Metazoa (<xref ref-type="bibr" rid="B63">Paps and Holland, 2018</xref>). The four vertebrate genes originated from an ancestral one by duplication (<xref ref-type="bibr" rid="B19">Duran-Arqu&#xe9; et al., 2022</xref>; <xref ref-type="bibr" rid="B82">Rouhana et al., 2023</xref>) and are divided into the CPEB1 and CPEB2-4 subfamilies based on sequence similarity (<xref ref-type="bibr" rid="B30">Hake and Richter, 1994</xref>; <xref ref-type="bibr" rid="B45">Kurihara et al., 2003</xref>; <xref ref-type="bibr" rid="B96">Theis et al., 2003</xref>). While sharing fundamental features, the four CPEBs diverged functionally in several respects, including mRNA binding modes, activation mechanisms, and subcellular localization (<xref ref-type="bibr" rid="B19">Duran-Arqu&#xe9; et al., 2022</xref>; <xref ref-type="bibr" rid="B38">Huang et al., 2023</xref>). The CPEB CTRs display a considerable degree of conservation (<xref ref-type="bibr" rid="B80">Richter, 2007</xref>), suggesting that the evolution of the variable NTRs may have substantially contributed to the functional diversification of the four CPEB paralogs. Indeed, the paralog-specific LLPS and prion-like behaviors of CPEBs rely on their variable NTRs (<xref ref-type="bibr" rid="B91">Stephan et al., 2015</xref>; <xref ref-type="bibr" rid="B19">Duran-Arqu&#xe9; et al., 2022</xref>), consistent with the fact that changes in LCRs/AARs composition and length can alter LLPS propensity and prion-like behavior (<xref ref-type="bibr" rid="B21">Fiumara et al., 2010</xref>; <xref ref-type="bibr" rid="B100">Vaglietti et al., 2023</xref>). Therefore, the emergence of paralog-specific CPEB functions (neo-/sub-functionalization) may have derived from at least two mechanisms, i.e., gene duplication and LCR divergence, whose interplay has key roles in genome evolution (<xref ref-type="bibr" rid="B68">Persi et al., 2016</xref>), promoting the functional divergence of proteins, including nucleic-acid binding proteins (<xref ref-type="bibr" rid="B72">Rad&#xf2;-Trilla et al., 2015</xref>; <xref ref-type="bibr" rid="B13">Chiu et al., 2022</xref>).</p>
<p>The discovery of the ability of the CPEB NTRs to drive both LLPS and prion-like aggregation raised several new biological questions. In general terms, the functional and temporal relationships between the transient LLPS-driven condensation and the persistent, prion-like fibrillization of CPEBs are still not well defined. <xref ref-type="bibr" rid="B25">Ford et al. (2019)</xref> proposed that CPEB3 is in the repressive state within LLPS-driven condensates and activates translation upon prion-like fibrillization. However, other groups identified LLPS as a precursor, rather than an alternative state to prion-like fibrillization (<xref ref-type="bibr" rid="B4">Ashami et al., 2021</xref>; <xref ref-type="bibr" rid="B73">Ram&#xed;rez de Mingo et al., 2023</xref>). At the molecular level, it is unclear which compositional and structural features of the NTRs are related to their ability to drive LLPS and prion-like conformational and functional changes. The primary sequence composition and complexity of both LLPS-prone LCRs and PrDs in proteins have been related to their functional behaviors. &#x2018;Molecular grammars&#x2019;, which are still not clearly understood, are thought to specify sequence/function relationships in these regions (<xref ref-type="bibr" rid="B103">Wang et al., 2018</xref>; <xref ref-type="bibr" rid="B83">Saar et al., 2021</xref>; <xref ref-type="bibr" rid="B77">Rekhi et al., 2024</xref>). Therefore, a qualitative and quantitative definition of the key features of the primary sequences of LLPS-prone and prion-like CPEB LCRs may help to better understand their functional properties. Furthermore, whether or not the same portions of the CPEB NTRs drive both LLPS and prion-like structural transitions remains to be defined. <xref ref-type="bibr" rid="B73">Ram&#xed;rez de Mingo et al. (2023)</xref> proposed that the CPEB3 NTR contains one prion-like portion and another one driving LLPS. However, in contrast with this model, the latter region had been identified by <xref ref-type="bibr" rid="B91">Stephan et al. (2015)</xref> as a functional prion-like region. Finally, it is unclear how evolutionary changes in the LCR primary sequences may have contributed to the functional divergence of the four CPEB paralogs once they had appeared in vertebrates.</p>
<p>To address these issues, we systematically defined the amino acid composition, sequence complexity, LLPS-propensity, and prion-likeness of the four human CPEBs, in both quantitative and qualitative terms, and traced the evolutionary history of these parameters in the CPEB orthologs across vertebrate clades.</p>
</sec>
<sec sec-type="results" id="s2">
<title>Results</title>
<sec id="s2-1">
<title>Differential amino acid occurrence and distribution between the human CPEB paralogs</title>
<p>We initially performed a systematic compositional analysis of the four human CPEB paralogs (<xref ref-type="fig" rid="F1">Figures 1A, B</xref>), defining for each protein the percent occurrence of the 20 amino acids (<xref ref-type="fig" rid="F1">Figure 1C</xref>) and their distribution along its primary sequence (<xref ref-type="fig" rid="F2">Figures 2</xref>, <xref ref-type="fig" rid="F3">3</xref>; <xref ref-type="sec" rid="s10">Supplementary Figures S1&#x2013;S4</xref>).</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Differential occurrence of amino acids between CPEB paralogs. <bold>(A)</bold> Schematic representation of the domain structure of the human CPEB1-4 protein family, highlighting (tree on the <italic>left</italic>) the similarity relationships between the four paralogous proteins (as determined using Multalin), which are grouped into the CPEB1 and CPEB2-4 subfamilies. The RNA recognition domains motifs 1 and 2 (RRM1 and RRM2) in the C-terminal region (CTR) are highlighted in <italic>red</italic> and <italic>orange</italic>, respectively, and the zinc finger domain (ZnF) in <italic>yellow</italic>. The more variable N-terminal region (NTR) is in <italic>cyan</italic>. <bold>(B)</bold> Atomic-level structural models of the human CPEB1-4 proteins generated by AlphaFold2 based on their primary sequences. Protein domains are colored as in <bold>(A)</bold>. <bold>(C)</bold> Bar graph reporting the percent occurrence of each amino acid in the primary sequence of each CPEB paralog relative to the mean occurrence of each amino acid across all proteins of the human proteome (CPEB1 in <italic>black</italic>, CPEB2 in <italic>dark gray</italic>, CPEB3 in <italic>gray</italic> and CPEB4 in <italic>light gray</italic>). The <italic>red lines</italic> highlight deviations &#x3e;20% in either direction from the proteome values.</p>
</caption>
<graphic xlink:href="fbinf-05-1491735-g001.tif"/>
</fig>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Amino acid distributions and per-residue scores related to sequence complexity, LLPS propensity, and prion-likeness along the primary sequences of human CPEB1 and CPEB2. The <italic>upper bars</italic> (&#x201c;domains&#x201d;) display a schematic representation of the domain structure of human CPEB1 and CPEB2, as in <xref ref-type="fig" rid="F1">Figure 1A</xref>. RRM1 (a.a. 336-438 in CPEB1, a.a. 775-871 in CPEB2) is in <italic>red</italic>, RRM2 (a.a. 449-530 in CPEB1, a.a. 882-963 in CPEB2) is in <italic>orange</italic>, and the ZnF (a.a 532-581 in CPEB1, a.a. 967-1021 in CPEB2) is in <italic>yellow.</italic> The <italic>bars</italic> below (&#x201c;AAs&#x201d;) display the distribution of the indicated amino acids in CPEB1 and CPEB2 as <italic>thin vertical line segments</italic> (see <xref ref-type="sec" rid="s10">Supplementary Figures S1&#x2013;S2</xref> for other amino acids). Note how some of these amino acids tend to concentrate in the NTR (e.g., A, P, S, L in CPEB1, A, G, P, Q, S in CPEB2) and others in the RNA-binding CTR (e.g., V in CPEB1, D and K in CPEB2). The <italic>gray bars</italic> (&#x201c;AARs&#x201d;) represent schematic representations of the two proteins with colored vertical bars indicating the position of AARs (&#x2265;4 residues; <xref ref-type="bibr" rid="B64">Pelassa et al., 2019</xref>). Note how CPEB1 is devoid of AARs while CPEB2 contains many of them formed by different amino acids, i.e., polyA (<italic>red</italic>), polyC (<italic>gray</italic>), polyG (<italic>green</italic>), polyP (<italic>turquoise</italic>), polyQ (<italic>orange</italic>), and polyS (<italic>blue</italic>). The <italic>four plots</italic> below report the per-residue scores related to sequence simplicity (&#x2018;SIM&#x2019;), repetitiveness (&#x2018;REP&#x2019;), LLPS propensity (FuzDrop pDP score&#x2019;), and prion-likeness (PLAAC PrD score&#x2019;). The SIM and REP scores are plotted on a logarithmic scale. In the SIM and REP plots, <italic>red</italic> and <italic>cyan peaks</italic> highlight protein regions with scores above or below, respectively, the mean value of the two parameters across the proteins of the whole human proteome. In the FuzDrop plot, <italic>red</italic> and <italic>cyan peaks</italic> highlight protein regions with P<sub>DP</sub> scores above or below, respectively, the prediction threshold (P<sub>DP</sub> &#x2265; 0.60) for LLPS-prone regions (<xref ref-type="bibr" rid="B101">Vendruscolo and Fuxreiter, 2022</xref>). In the PLAAC score plot, <italic>red</italic> and <italic>cyan peaks</italic> highlight protein fragments with scores above or below, respectively, the prediction threshold (PrD score &#x2265;0) for prion-like regions (<xref ref-type="bibr" rid="B47">Lancaster et al., 2014</xref>). Note how the most of the CPEB2 NTR has a low-complexity (high SIM score) and repetitive (high REP score) primary sequence which is predicted by FuzDrop to be LLPS-prone for the most part. PLAAC identifies three discrete prion-like regions (<italic>asterisks</italic>) in the more central portion of the NTR. Conversely, the CPEB1 NTR contains some LCRs with relatively low SIM and REP scores. Fuzdrop identifies limited portions of the NTR as LLPS-prone and no prion-like domain is identified by PLAAC. The <italic>tree bars</italic> at the bottom indicate the position of residues identified by ParSe as part of LLPS-prone (P; <italic>red</italic>) or not (D; <italic>gray</italic>) LCRs/IDRs, or as part of folded (F; <italic>turquoise</italic>) regions based on three different algorithms (labelled here as 1, 2, and 3; see <xref ref-type="sec" rid="s4">Methods</xref>; <xref ref-type="bibr" rid="B40">Ibrahim et al., 2023</xref>). Note how, in comparisons with the FuzDrop predictions, the LLPS-prone regions identified by ParSe in the NTR are less extended in both proteins. It is also remarkable how the three PrDs predicted by PLAAC in CPEB2 fall within regions with no LLPS propensity in two or three of the ParSe predictions (<italic>asterisks</italic>).</p>
</caption>
<graphic xlink:href="fbinf-05-1491735-g002.tif"/>
</fig>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Amino acid distributions and per-residue scores related to sequence complexity, LLPS propensity, and prion-likeness along the primary sequences of human CPEB3 and CPEB4 <bold>(A, B)</bold> As in <xref ref-type="fig" rid="F2">Figure 2</xref>, but for CPEB3 (RRM1: a.a. 439-535, RRM2: a.a. 546-627, ZnF: a.a. 631-685), and CPEB4 (RRM1: a.a. 470-566, RRM2: a.a. 577-658, ZnF: a.a. 662-716). Note how some residues concentrate in the NTRs (A, P, and Q in CPEB3; A, G. and H in CPEB4) and others in the CTRs (D and L in both proteins). Plots for all amino acids are in <xref ref-type="sec" rid="s10">Supplementary Figure S3&#x2013;S4</xref>). Multiple AARs are present in CPEB3 (polyA, polyP, polyQ, polyS; color coding as in <xref ref-type="fig" rid="F2">Figure 2</xref>), while CPEB4 contains only a short polyG tract. Both proteins display multiple NTR subregions with high SIM and REP scores and LLPS propensity as predicted by FuzDrop. The ParSe predictions are more conservative and identify multiple LLPS-prone subregions within the two NTRs. PLAAC predicts two prion-like regions in both proteins, but with relatively high PrD scores in CPEB3 and with only borderline scores in CPEB4. Note how the CPEB3 PrDs predicted by PLAAC (<italic>asterisks</italic>) fall within regions with no LLPS propensity in two or three of the ParSe predictions (<italic>asterisks</italic>).</p>
</caption>
<graphic xlink:href="fbinf-05-1491735-g003.tif"/>
</fig>
<p>The percent occurrence of each amino acid in each CPEB protein was compared to its mean percent occurrence across all human proteins (<xref ref-type="fig" rid="F1">Figure 1C</xref>). This analysis revealed that, in one or more of the four CPEBs, the occurrence of several amino acids substantially deviates (&#x3e;20% over-/under-representation) from their occurrence in the human proteome. Deviations of this magnitude in the percent occurrence of a given amino acid in a protein can be related to the presence of compositionally biased protein regions (LCRs/AARs) even of modest length (see <xref ref-type="sec" rid="s4">Methods</xref>). All four CPEBs display an underrepresentation of charged residues like glutamate (E) and lysine (K), and an overrepresentation of proline (P) and serine (S). Compositional differences across paralogs were found mostly between CPEB1 and CPEB2-4, but also between members of the CPEB2-4 subfamily. For example, glutamine (Q) residues are underrepresented in CPEB1 and overrepresented in CPEB2-4, and the degree of P overrepresentation is very different in CPEB2 (&#x2b;146% vs. proteome), CPEB3 (&#x2b;100%), and CPEB4 (&#x2b;44%).</p>
<p>In analyzing the distribution of the 20 amino acids (<xref ref-type="fig" rid="F2">Figures 2</xref>, <xref ref-type="fig" rid="F3">3</xref>; <xref ref-type="sec" rid="s10">Supplementary Figures S1&#x2013;S4</xref>), we found that some of them display a non-homogenous patterning along the CPEB primary sequences. For instance, P and Q residues are concentrated within the NTRs of CPEB2-4. These asymmetric distributions were particularly evident in CPEB2, with many amino acids concentrated in the NTR (e.g., G, S, and A, besides P and Q) and some others more abundant in the CTR (e.g., aspartate, D).</p>
<p>These asymmetries in amino acid distribution across protein regions are also related to the presence of AARs in the NTRs of CPEB2-4 (<xref ref-type="fig" rid="F2">Figures 2</xref>, <xref ref-type="fig" rid="F3">3</xref>), which are absent in CPEB1. AARs are numerous in CPEB2/3, but almost absent in CPEB4, which bears only a short polyglycine (polyG) tract. CPEB2/3 both contain poly-alanine (polyA), -glutamine (polyQ), -proline (polyP), and -serine (polyS) repeats. CPEB2 also contains multiple polyG repeats.</p>
</sec>
<sec id="s2-2">
<title>Differential sequence simplicity and repetitiveness between the human CPEB paralogs</title>
<p>To define quantitatively the complexity of the primary sequence of CPEBs, we calculated two per-residue scores expressing the local degree of sequence simplicity (SIM) and repetitiveness (REP) in a sliding window of 20 residues around each residue of the four proteins. The lesser the number of different amino acids is in the 20-residue window, the higher is the SIM score. This score would be minimum for a sequence with 20 different amino acids (i.e., &#x2018;ACDEFGHIKLMNPQRSTVWY&#x2019; in any order) and maximum for a homopolymeric AAR (e.g., &#x2018;AAAAAAAAAAAAAAAAAAAA&#x2019;). Given a certain degree of complexity, the REP score quantifies primary sequence repetitiveness. Thus, in a region formed by 10 A and 10 Q residues, the score would be lower for &#x2018;AQAQAQAQAQAQAQAQAQAQ&#x2019;, intermediate for &#x2018;AAAAAQQQQQAAAAAQQQQQ&#x2019;, and higher for &#x2018;AAAAAAAAAAQQQQQQQQQQ&#x2019;.</p>
<p>The two scores are higher for CPEB2-4 in comparison with CPEB1 (<xref ref-type="fig" rid="F2">Figures 2</xref>, <xref ref-type="fig" rid="F3">3</xref>). CPEB2-4 proteins display a tripartite organization in terms of complexity, with SIM and REP scores that are higher in the proximal two-thirds of the NTD, intermediate in its distal third, and lower in the CTR (<xref ref-type="fig" rid="F2">Figures 2</xref>, <xref ref-type="fig" rid="F3">3</xref>).</p>
</sec>
<sec id="s2-3">
<title>Differential LLPS propensity and prion-likeness between the human CPEB paralogs</title>
<p>The previous findings prompted us to test whether the observed differences in the composition and complexity of the CPEB NTRs may impact their LLPS propensity and prion-likeness using the FuzDrop, ParSe, and PLAAC algorithms (<xref ref-type="bibr" rid="B101">Vendruscolo and Fuxreiter, 2022</xref>; <xref ref-type="bibr" rid="B40">Ibrahim et al., 2023</xref>; <xref ref-type="bibr" rid="B47">Lancaster et al., 2014</xref>). These well-established prediction tools can provide nuanced per-residue predictions (see <xref ref-type="sec" rid="s4">Methods</xref>) that can help identify LCR subregions specifically involved in driving LLPS-driven condensation and/or prion-like aggregation. Some of these tools were previously used to characterize CPEB3 (<xref ref-type="bibr" rid="B73">Ram&#xed;rez de Mingo et al., 2023</xref>).</p>
<p>Both FuzDrop and ParSe identified the NTRs of all four CPEBs as LLPS-prone regions (<xref ref-type="fig" rid="F2">Figures 2</xref>, <xref ref-type="fig" rid="F3">3</xref>), consistent with experimental evidence that all CPEB paralogs undergo LLPS or are recruited to LLPS-driven compartments (<xref ref-type="bibr" rid="B19">Duran-Arqu&#xe9; et al., 2022</xref>). For CPEB1, which is recruited to LLPS-driven ribonucleoprotein particle (RNP) condensates (<xref ref-type="bibr" rid="B19">Duran-Arqu&#xe9; et al., 2022</xref>; <xref ref-type="bibr" rid="B24">Ford et al., 2023</xref>), the FuzDrop and ParSe predictions were essentially overlapping. For CPEB2-4, both algorithms predicted LLPS-prone regions mostly confined to the two proximal thirds of the NTRs, which also have the highest SIM and REP scores. However, while FuzDrop identified most of these initial LCR portions of CPEB2-4 as LLPS-prone, ParSe was able to identify within them smaller, discrete subregions with LLPS propensity (<xref ref-type="fig" rid="F2">Figures 2</xref>, <xref ref-type="fig" rid="F3">3</xref>).</p>
<p>PLAAC predicted prion-like domains (PrDs) in the central thirds of CPEB2-4, as well as in the N-terminal portion of CPEB3 (<xref ref-type="fig" rid="F2">Figures 2</xref>, <xref ref-type="fig" rid="F3">3</xref>). No PrD was predicted in CPEB1 (<xref ref-type="fig" rid="F2">Figure 2</xref>), consistent with previous experimental observations (<xref ref-type="bibr" rid="B108">Si et al., 2010</xref>). The predicted PrDs in CPEB2-4 are comprised within the extended LLPS-prone regions identified by FuzDrop, which may indicate that they mediate both LLPS and fibrillization. However, the discrete LLPS-prone subregions predicted by ParSe for CPEB2/3 appeared to alternate with the PLAAC PrDs, strongly suggesting that neighboring NTR subregions are alternatively implicated in either LLPS-driven condensation or in prion-like fibrillization.</p>
<p>Together with our previous analyses, these findings indicate that the four CPEB NTRs are formed by compositionally different subregions with distinct structural and functional roles, in agreement with initial evidence available for the CPEB3 paralog (<xref ref-type="bibr" rid="B91">Stephan et al., 2015</xref>; <xref ref-type="bibr" rid="B73">Ram&#xed;rez de Mingo et al., 2023</xref>).</p>
</sec>
<sec id="s2-4">
<title>An evolutionary perspective on the LCRs of the vertebrate CPEB protein family</title>
<p>The previous findings prompted us to test whether the primary sequence features of the four human CPEBs that we highlighted are phylogenetically conserved, or they gradually arose in the evolutionary history of vertebrates, or they represent instead highly variable taxon-/species-specific molecular features. Thus, we explored how the composition, complexity, LLPS propensity, and prion-likeness of the four CPEB paralogs have evolved in the gnathostome vertebrate lineage.</p>
<p>For each CPEB paralog, we selected for this analysis hundreds of orthologs from species belonging to nine major clades of different stem age in the evolutionary tree of the vertebrate lineage (<xref ref-type="fig" rid="F4">Figure 4A</xref>; <xref ref-type="sec" rid="s10">Supplementary Table S1</xref>), from older ones, like Chondrichthyes and Actinopterygii, to younger ones like Glires and Primates (<xref ref-type="fig" rid="F4">Figure 4B</xref>). The stem ages of these clades range from &#x223c;87 to &#x223c;462 million years ago (mya; <xref ref-type="fig" rid="F4">Figure 4B</xref>). Besides Primates (<italic>Pri</italic>, 34 species) and their sister taxon Glires (<italic>Gli</italic>, 45 species, including rodents, rabbits, hares, and pikas) within Euarchontoglires (87 mya), the clades are Laurasiatheria (<italic>Lau</italic>, 119 species (94 mya), including carnivorans, and even-/odd-toed ungulates), Atlantogenata (<italic>Atl</italic>, 9 species (94 mya), comprising species from afrotherian (e.g., elephants), and xenarthran (e.g., armadillos) orders, Marsupialia (<italic>Mar</italic>, 8 species, 99 mya), Sauropsida (<italic>Sau</italic>, 159 species (180 mya), including birds and reptiles), Amphibia (<italic>Amp</italic>, 11 species, 319 mya), and bony (Actinopterygii, <italic>Act</italic>, 175 species, 429 mya) or cartilagineous (Chondrichthyes, <italic>Cho</italic>, 11 species, 462 mya) fishes.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Evolutionary relationships of vertebrate species and clades whose CPEB ortholog protein sequences were analyzed <bold>(A)</bold>. Phylogenetic tree of the 571 species, belonging to the indicated major vertebrate clades, highlighted in different color shades, whose CPEB1-4 primary sequences were analyzed. A three-letter abbreviation of the clade name and the number of species with available CPEB sequences are indicated for each clade <italic>in brackets</italic>, with silhouette drawings indicating a representative species for each clade, i.e., <italic>Homo sapiens</italic> for Primates (Pri), <italic>Mus musculus</italic> for Glires (Gli), <italic>Bos taurus</italic> for Laurasiatheria (Lau), <italic>Loxodonta africana</italic> for Atlantogenata (Atl), <italic>Monodelphis domestica</italic> for Marsupialia (Mar), <italic>Gallus gallus</italic> for Sauropsida (Sau), <italic>Xenopus tropicalis</italic> for Amphibia (Amp), <italic>Danio rerio</italic> for Actinopterygii (Act), and <italic>Amblyraja radiata</italic> for Chondrichthyes (Cho). <bold>(B)</bold> The lower cladogram illustrates the phylogenetic relationships between the nine vertebrate clades shown in <bold>(A)</bold>. Clade stem ages are indicated <italic>on the right</italic>. The <italic>colored bars</italic> on top indicate the higher-level clades (listed <italic>on the right</italic>) that variably comprise the nine clades forming the lower cladogram.</p>
</caption>
<graphic xlink:href="fbinf-05-1491735-g004.tif"/>
</fig>
<p>For each available CPEB primary sequence, we calculated the percent occurrence of the 20 amino acids, the total length of the repeats of each amino acid (AARs), as well as the mean SIM, REP, LLPS propensity (ParSe), and prion-likeness (PLAAC) scores across all residues of each protein (<xref ref-type="fig" rid="F5">Figures 5</xref>&#x2013;<xref ref-type="fig" rid="F8">8</xref>). For each CPEB paralog in each clade, we calculated the mean values of the same parameters across all the available ortholog sequences or, in some analyses, across the orthologs from only five randomly selected species (see <xref ref-type="sec" rid="s4">Methods</xref>).</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Evolution of amino acid occurrence and AAR lengths in vertebrate CPEB1/2 <bold>(A)</bold> The bar <italic>on top</italic> is a simplified representation of the domain structure of human CPEB1 (RRMs and ZnF are in <italic>black</italic>; see <xref ref-type="fig" rid="F2">Figure 2A</xref>). On the <italic>left</italic>, phylogenetic tree of the 9 vertebrate taxa that were analyzed with silhouettes of representative species (as listed in the legend to <xref ref-type="fig" rid="F4">Figure 4A</xref>). The <italic>white bars</italic> display the distribution of A and H residues, represented as <italic>thin vertical line segments</italic> (in <italic>red</italic> and <italic>cyan</italic>, respectively). Along the primary sequence of CPEB1 orthologs in the indicated species. The ortholog bars were graphically aligned to the junction between the NTR and CTR. <bold>(B)</bold> Graph reporting the mean percent occurrence of the indicated amino acids across the ortholog CPEB1 proteins of each clade. Values are normalized to those found in the clade with the oldest stem age (Chondrichthyes). The graph only reports the values relative to those amino acids whose evolutionary variation in occurrence correlates significantly with clade sten ages, as reported in <xref ref-type="sec" rid="s10">Supplementary Table S2</xref>. Glutamate (E), histidine (H), and leucine (L) display significant, clade stem age-related, increases in their percent occurrence along the vertebrate lineage, whereas asparagine (N) and methionine (M) display an opposite, significant trend. Clade specific oscillations of the analyzed values were not analyzed in detail. <bold>(C)</bold> As in <bold>(B)</bold>, for AARs lengths. As CPEB1 is devoid of AAR, the graph is reported here for comparison purposes with the other paralogs (see <bold>(B)</bold> and <xref ref-type="fig" rid="F6">Figure 6</xref>). <bold>(D)</bold> As in <bold>(A)</bold> for CPEB2. Note the considerable increase in the occurrence of proline (P) residues (<italic>red</italic> thin bars) and the elongation of polyG repeats (although the occurrence of G residues did not significantly increase overall). <bold>(E)</bold> As in <bold>(B)</bold> for CPEB2. Note how, along the vertebrate lineage, the occurrence of many amino acids increased or decreased significantly (see <xref ref-type="sec" rid="s10">Supplementary Table S2</xref>). <bold>(F)</bold> As in <bold>(C)</bold> for CPEB2. Note how the total length of polyP and polyG stretches significantly increased from older to younger vertebrate clades (see <xref ref-type="sec" rid="s10">Supplementary Table S2</xref>).</p>
</caption>
<graphic xlink:href="fbinf-05-1491735-g005.tif"/>
</fig>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Evolution of amino acid occurrence and AAR lengths in vertebrate CPEB3/4 <bold>(A&#x2013;C)</bold> As in <xref ref-type="fig" rid="F5">Figure 5</xref> for CPEB3. Note the significant increase in the occurrence of A and P residues along the vertebrate lineage. The increase in A residues is paralleled by an increase in total polyA repeat length. <bold>(D&#x2013;F)</bold> As in <xref ref-type="fig" rid="F5">Figure 5</xref> for CPEB4. Note the disappearance of the polyQ repeat going from older to younger vertebrate clades.</p>
</caption>
<graphic xlink:href="fbinf-05-1491735-g006.tif"/>
</fig>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Divergent evolution of amino acid occurrence, sequence complexity, LLPS, and PrD propensity in CPEB1-4 <bold>(A)</bold>. Scatterplot with regression lines displaying, for each CPEB paralog, the correlation between the mean percent occurrence of P residue and stem ages across the nine clades. Statistically significant <italic>r</italic> correlation coefficients are marked with an <italic>asterisk</italic>. Data points in <italic>dark green</italic> for CPEB1, in <italic>cyan</italic> for CPEB2, in <italic>purple</italic> for CPEB3, and in <italic>light green</italic> for CPEB4. <bold>(B&#x2013;F)</bold> As in <bold>(A)</bold> for polyP length <bold>(B)</bold>, SIM score <bold>(C)</bold>, REP score <bold>(D)</bold>, ParSe LLPS propensity score <bold>(E)</bold>, and PLAAC PrD score <bold>(F)</bold>.</p>
</caption>
<graphic xlink:href="fbinf-05-1491735-g007.tif"/>
</fig>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Evolutionary variation in SIM, REP, LLPS, and prion-likeness scores visualized onto structural models of <italic>Danio rerio</italic> and <italic>Homo sapiens</italic> CPEBs. For each CPEB paralog, the four panels display atomic-level structural models of the ortholog proteins of <italic>Danio rerio</italic> (<italic>left column</italic>) and <italic>Homo sapiens</italic> (<italic>right column</italic>) generated by AlphaFold2. Per-residue SIM, REP, LLPS, and prion-likeness scores are reported on protein structures using a pseudocolor scale going from <italic>blue</italic> (lower scores) to <italic>red</italic> (higher scores) through <italic>white</italic> (intermediate scores).</p>
</caption>
<graphic xlink:href="fbinf-05-1491735-g008.tif"/>
</fig>
</sec>
<sec id="s2-5">
<title>Divergent evolution of compositional features across CPEB paralogs in vertebrates</title>
<p>For each CPEB paralog, we initially analyzed the evolutionary variation of the amino acid composition and AAR lengths across the nine vertebrate clades, as shown in <xref ref-type="fig" rid="F4">Figure 4</xref>. We initially calculated, across all available sequences in each clade, the mean percent occurrence of each amino acid and the mean total length of the AARs formed by each amino acid. Then, we studied whether these 40 parameters remained substantially stable across clades during vertebrate evolution or whether they varied, either in a clade-specific manner or with detectable overall trends related to clade stem ages (<xref ref-type="fig" rid="F5">Figures 5</xref>&#x2013;<xref ref-type="fig" rid="F8">8</xref>; <xref ref-type="sec" rid="s10">Supplementary Table S2</xref>).</p>
<p>This analysis revealed how the amino acid composition of CPEB1 remained overall quite stable across clades, from Chondrichthyes to Euarchontoglires, with relatively modest changes in amino acid occurrences. However, some clade stem age-related trends in the occurrence of certain amino acids were detected. Indeed, the mean percent occurrence of A, E, H and L residues (<italic>r</italic> &#x3d; &#x2212;0.69, <italic>r</italic> &#x3d; &#x2212;0.78, <italic>r</italic> &#x3d; &#x2212;0.75 and <italic>r</italic> &#x3d; &#x2212;0.73 respectively, n &#x3d; 9 taxa, p &#x3c; 0.05 in all instances) increases significantly from older to younger clades, while that of M and N residues decreased (<italic>r</italic> &#x3d; 0.85, p &#x3c; 0.01 and <italic>r</italic> &#x3d; 0.72, p &#x3c; 0.05 respectively, n &#x3d; 9 in both instances; <xref ref-type="fig" rid="F5">Figures 5A, B</xref>). AARs, which are not present in human CPEB1, were also not found in most of its vertebrate orthologs with the exception of a few clades in which short repeats (&#x223c;4-residue-long) can be sporadically observed. No AAR length displays significant evolutionary variation across clades (<xref ref-type="fig" rid="F5">Figure 5C</xref>).</p>
<p>CPEB2 underwent marked compositional changes across clades, and the percent occurrence of several amino acids varied considerably, correlating with clade stem ages (<xref ref-type="fig" rid="F5">Figures 5D, E</xref>). In particular, the occurrence of P and L residues significantly increased by &#x223c;60% and &#x223c;30% respectively (<italic>r</italic> &#x3d; &#x2212;0.96, n &#x3d; 9, p &#x3c; 0.001 and <italic>r</italic> &#x3d; &#x2212;0.85, n &#x3d; 9, p &#x3c; 0.01) going from Chondrichthyes to Euarchontoglires, whereas the occurrence of negatively charged (E,D), several hydrophobic (I,V,M,W,Y) and N residues decreased (<italic>r</italic> from &#x2212;0.78 to &#x2212;0.98, n &#x3d; 9, <italic>p</italic> between 0.035 and 0.001). PolyP and polyG significantly increased their total lengths (<xref ref-type="fig" rid="F5">Figure 5F</xref>; <italic>r</italic> &#x3d; &#x2212;0.91, n &#x3d; 9, p &#x3c; 0.01, and <italic>r</italic> &#x3d; &#x2212;0.87, n &#x3d; 9, p &#x3c; 0.02, respectively). Interestingly, also a short 4-residue-long polyC repeat appeared in Metatheria (<italic>r</italic> &#x3d; &#x2212;0.85, n &#x3d; 9, p &#x3c; 0.01).</p>
<p>CPEB3 also underwent considerable changes in amino acid occurrence and AAR length. Remarkably, some of these changes were parallel to those observed for CPEB2. Indeed, the occurrence of P residues increased (<xref ref-type="fig" rid="F6">Figures 6A, B</xref>, <italic>r</italic> &#x3d; &#x2212;0.93, n &#x3d; 9, p &#x3c; 0.001), while that of D, I, and N residues decreased (<italic>r</italic> between &#x2212;0.86 and &#x2212;0.96, n &#x3d; 9, <italic>p</italic> between 0.002 and 0.001) from Chondrichthyes to Euarchontoglires. In addition, the occurrence of A residues increased in CPEB3 (<italic>r</italic> &#x3d; &#x2212;0.93, n &#x3d; 9, p &#x3c; 0.01), also in relation to polyA length elongation, while that of R, K, W, and C residues decreased (<italic>r</italic> between 0.90 and 0.91, n &#x3d; 9, <italic>p</italic> &#x3c; 0.001; <xref ref-type="fig" rid="F6">Figure 6C</xref>).</p>
<p>Unlike the two other members of the CPEB2-4 subfamily, CPEB4 displayed quite limited changes in amino acid occurrence and AAR length through the vertebrate lineage. The occurrence of I (<italic>r</italic> &#x3d; &#x2212;0.82, n &#x3d; 9, p &#x3c; 0.01) and A residues (<italic>r</italic> &#x3d; &#x2212;0.75, n &#x3d; 9, p &#x3c; 0.02) increased significantly going from older to younger clades, whereas that of V (<italic>r</italic> &#x3d; 0.84, n &#x3d; 9, p &#x3c; 0.01) and M residues (<italic>r</italic> &#x3d; 0.76, n &#x3d; 9, p &#x3c; 0.02) decreased (<xref ref-type="fig" rid="F6">Figures 6D, E</xref>). AARs shortened or disappeared in CPEB4 along the vertebrate lineage (<xref ref-type="fig" rid="F6">Figure 6F</xref>). Indeed, the polyG tract, i.e., the only AAR in human CPEB4, is longer in Chondrichthyes than in Euarchontoglires (<italic>r</italic> &#x3d; &#x2212;0.76, n &#x3d; 9, p &#x3c; 0.03) and a polyQ repeat that is present in Chondrichthyes is not found in the other clades (<italic>r</italic> &#x3d; &#x2212;0.75, n &#x3d; 9, p &#x3c; 0.03).</p>
<p>Taken together, these findings indicate that throughout vertebrate evolution, the four CPEB paralogs have been markedly diverging in terms of primary sequence composition. In quantitative terms, it is evident how CPEB1 varied overall to a considerably lesser degree than the paralogs of the CPEB2-4 subfamily and how, within the latter, CPEB2 and CPEB3 varied more than CPEB4. We also highlighted some parallel changes across some paralogs, especially for CPEB2 and CPEB3. For instance, the occurrence of P residues and the length of polyP repeats increased in both CPEB2 and CPEB3, but not in CPEB1/4, going from older to younger clades (<xref ref-type="fig" rid="F7">Figures 7A, B</xref>), whereas that of negatively charged (D and E) and some polar (N) or aromatic (Y) residues significantly decreased. Alanine residues increased in both CPEB3 and CPEB4. The increase in I residues in CPEB4 paralleled the increase in a related aliphatic amino acid (L) in CPEB2. These findings uncover a remarkable degree of compositional divergence across CPEB paralogs, especially between the CPEB1 and CPEB2-4 subfamilies and, within the latter, between CPEB2/3 and CPEB4. It is noteworthy how CPEB2 and CPEB3, the two known prion-like paralogs, underwent several parallel changes.</p>
</sec>
<sec id="s2-6">
<title>Divergent evolution of sequence complexity, LLPS-propensity, and prion-likeness in vertebrate CPEBs</title>
<p>Based on the previous results, we analyzed how the observed evolutionary changes in amino acid composition of the four CPEB paralogs may have impacted their overall primary sequence complexity, predicted LLPS propensity, and prion-likeness. Towards this aim, we calculated the mean values of complexity-related (SIM and REP) and function-related (ParSe P distance and PLAAC PrD) scores across orthologs in each clade and studied their variation profiles across clades (<xref ref-type="fig" rid="F7">Figures 7C&#x2013;F</xref>). To visually highlight the protein regions impacted by the evolutionary variations of the complexity- and function-related scores, their per-residue values were reported using a pseudocolor scale onto the available AlphaFold models of CPEB paralogs of two species from older, i.e., <italic>Danio rerio</italic> (Actinopterygii), and younger, i.e., <italic>Homo sapiens</italic> (Euarchontoglires), vertebrate clades (<xref ref-type="fig" rid="F8">Figure 8</xref>).</p>
<p>For CPEB1, we found that both SIM and REP do not display marked oscillations across clades and have, overall, lower values in comparison with those of CPEB2-4 (<xref ref-type="fig" rid="F7">Figures 7C, D</xref>). The protein does not to have any predicted PrD in vertebrates, as in <italic>Homo</italic> (<xref ref-type="fig" rid="F7">Figure 7F</xref>), and displays the lowest LLPS propensity among the CPEB paralogs across clades (<xref ref-type="fig" rid="F7">Figure 7E</xref>). Overall, the CPEB1 complexity- and function-related scores are generally lower in comparison with those of CPEB2-4 in each clade, displaying modest degrees of variation in relation to clade stem ages (<xref ref-type="fig" rid="F2">Figures 2</xref>, <xref ref-type="fig" rid="F3">3</xref>). The minimal decline in LLPS propensity of the protein going towards younger clades is statistically significant (r &#x3d; 0.67, n &#x3d; 9; p &#x3c; 0.05). <xref ref-type="fig" rid="F8">Figure 8</xref> (<italic>first panel from the left</italic>) highlights onto structural models the lack of substantial changes in CPEB1 complexity- and function-related parameters between <italic>Danio</italic> and <italic>Homo</italic>.</p>
<p>At the opposite, the primary sequence of CPEB2 displayed a considerable reduction in sequence complexity, with a strong increase in both the SIM and REP scores going from Chondrichthyes to Primates, which correlates significantly with clade stem ages (<italic>r</italic> &#x3d; &#x2212;0.81, n &#x3d; 9 taxa, p &#x3d; 0.01 and <italic>r</italic> &#x3d; &#x2212;0.77, n &#x3d; 9 taxa, p &#x3c; 0.02, respectively). Notably, these changes are also paralleled by significant increases in both LLPS propensity and prion-likeness (<italic>r</italic> &#x3d; &#x2212;0.77, n &#x3d; 9 taxa, p &#x3c; 0.02 in both instances). <xref ref-type="fig" rid="F8">Figure 8</xref> highlights onto structural models the marked increases in CPEB2 complexity- and function-related scores, as well as in the length of the NTR (see <xref ref-type="fig" rid="F2">Figure 2</xref>), between species from older (<italic>Danio</italic>) and younger (<italic>Homo</italic>) clades.</p>
<p>As for CPEB2, the primary sequence of CPEB3 also underwent a reduction in sequence complexity with an increase in SIM score going from Chondrichthyes to Euarchontoglires, correlating significantly with clade stem ages (<italic>r</italic> &#x3d; &#x2212;0.81, n &#x3d; 9 taxa, p &#x3c; 0.01; <xref ref-type="fig" rid="F7">Figure 7C</xref>). The REP score also displays a similar statistically significant trend (<italic>r</italic> &#x3d; &#x2212;0.68, n &#x3d; 9 taxa, p &#x3c; 0.05; <xref ref-type="fig" rid="F7">Figure 7D</xref>). However, these two trends are not paralleled by significant increases in LLPS propensity and prion-likeness (<xref ref-type="fig" rid="F7">Figures 7E,F</xref>), as found instead for CPEB2. Indeed, the PrD scores are instead substantially stable at a relatively high levels across vertebrate clades, with no evident correlation with clade stem ages (<xref ref-type="fig" rid="F7">Figure 7F</xref>, <italic>r</italic> &#x3d; &#x2212;0.07, n &#x3d; 9 taxa, p &#x3d; 0.86), while LLPS propensity even declined to a certain extent, although this reduction did not significantly correlate with clade stem ages (<xref ref-type="fig" rid="F7">Figure 7E</xref>; <italic>r</italic> &#x3d; &#x2212;0.61, n &#x3d; 9 taxa, p &#x3d; 0.08). Together, these findings indicate that, as for CPEB2, the overall sequence complexity of CPEB3 declines going towards younger vertebrate clades, although this may not directly translate into an increase in LLPS propensity and prion-likeness. Moreover, they also suggest that CPEB3 may have reached certain degrees of LLPS propensity and prion-likeness relatively early in the vertebrate lineage and has maintained them ever since. <xref ref-type="fig" rid="F8">Figure 8</xref> visually illustrates the differences in CPEB3 SIM, REP, LLPS, and prion-likeness scores between <italic>Danio</italic> and <italic>Homo</italic>.</p>
<p>CPEB4 displayed quite different trends in comparison with CPEB2/3. Indeed, this protein underwent only a modest reduction in both the SIM and REP scores going towards younger clades, which did not significantly correlate with clade stem ages (<italic>r</italic> &#x3d; 0.56 and <italic>r</italic> &#x3d; 0.47, respectively, n &#x3d; 9, <italic>p</italic> &#x3e; 0.05 in both cases; <xref ref-type="fig" rid="F7">Figures 7C,D</xref>), It is noteworthy that CPEB2, CPEB3, and CPEB4 had similar SIM and REP scores in the older vertebrate clades and then diverged considerably along the vertebrate lineage. In contrast with what was found for CPEB2, LLPS propensity slightly but significantly declined towards younger clades (r &#x3d; 0.67, n &#x3d; 9, p &#x3c; 0.05), and no significant change was observed for the PrD score, as exemplified in <xref ref-type="fig" rid="F8">Figure 8</xref> for the zebrafish and human orthologs. These findings indicate that the compositional changes observed in the four CPEB paralogs along the vertebrate lineage are associated to significant divergence in their overall sequence complexity and repetitiveness that directly affect the predicted propensity of the proteins to undergo LLPS and prion-like fibrillization. Thus, the CPEB evolutionary dynamics that we have uncovered may have critically contributed to the functional divergence of CPEB paralogs in the vertebrate lineage (see <xref ref-type="sec" rid="s3">Discussion</xref>).</p>
</sec>
<sec id="s2-7">
<title>The observed CPEB evolutionary trends are robust to random species sampling and intraclade variability</title>
<p>In the previous analyses, the number of CPEB orthologs that was analyzed per clade was determined by the availability of primary sequences in databases, which is not proportional to the actual clade size. To rule out that uneven species sampling may have contributed to the evolutionary trends that we observed, we repeated our analyses using for each clade a fixed number of randomly selected species (5). This randomized analysis was repeated for 10 times, using CPEB2 as a case study. Each time, we calculated the correlation coefficient between 24 parameters of interest (amino acid percent occurrences, as well as, SIM, REP, LLPS propensity, and prion-likeness scores) and clade stem ages. Remarkably, those correlations that were significant when using all the available sequences remained significant when using only 5 of them per clade, except for a single case, i.e., in 239 of 240 instances (<xref ref-type="sec" rid="s10">Supplementary Table S3</xref>; <xref ref-type="sec" rid="s10">Supplementary Figure S5</xref>). Thus, the evolutionary trends that we detected are largely independent of the degree of species sampling in clades.</p>
<p>Moreover, our previous analyses were performed using the mean values of the parameters of interest in each clade. As this approach did not consider the degree of intraclade variability of the parameters, we repeated our analysis for CPEB2 using the values of the parameters of interest for each individual species rather their mean values per clade. This analysis revealed that 13 out of 14 of the significant correlations that were detected for the parameters of interest remained significant even when not averaging values in each clade (<xref ref-type="sec" rid="s10">Supplementary Table S4</xref>). The only exception was the percent occurrence of W residues, which are very rare in the protein, whose correlation coefficient fell slightly below significance. Thus, the observed evolutionary trends in CPEB composition- and function-related parameters remain significant even when considering their intraclade variability.</p>
<p>To directly compare intraclade and interclade variability over a similar evolutionary time span in the vertebrate lineage (<xref ref-type="fig" rid="F4">Figure 4</xref>), we repeated our analysis for the 24 parameters of interest within the Actinopterygii clade, using again CPEB2 as a case study. We performed this analysis in Actinopterygii as we had available CPEB2 sequences from a good number of species (<xref ref-type="sec" rid="s10">Supplementary Table S1</xref>) from this clade that evolved over a long time period (396 my), comparable to that considered in our previous analysis across vertebrate clades (429 my). The mean values of the 24 parameters were analyzed in relation to the stem ages of 21 clades nested within Actinopterygii, ranging from Cladistia (396 mya) to Poeciliinae (18.9 mya; <xref ref-type="fig" rid="F9">Figure 9A</xref>; see <xref ref-type="sec" rid="s4">Methods</xref>). Although the number of available sequences for the oldest clades (from Cladistia to Elopocephala) was small, this analysis revealed that the variability of the 24 parameters within Actinopterygii is relatively limited in comparison with that across vertebrate clades over a similarly long evolutionary timespan (&#x3e;400 million years; <xref ref-type="fig" rid="F9">Figure 9B</xref>; <xref ref-type="sec" rid="s10">Supplementary Figure S6</xref>). Within this clade, these relatively modest variation trends, some of which are statistically significant, go either in the same (e.g., % P, <xref ref-type="fig" rid="F9">Figure 9B</xref>) or in the opposite (e.g., % LLPS propensity and prion-likeness, <xref ref-type="fig" rid="F9">Figure 9B</xref>; <xref ref-type="sec" rid="s10">Supplementary Figure S6</xref>) direction of those observed across vertebrate clades.</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>Intraclade versus interclade evolutionary variation in CPEB2 and evolutionary dynamics of TIA1 <bold>(A)</bold>. On the <italic>left</italic>, phylogenetic tree of the 134 species of the Actinopterygii clade with available CPEB2 sequences. Twenty-one subclades are numbered and labelled by a three-letter abbreviation of the clade name as listed in the <italic>Methods</italic> section. The cladogram on the right illustrates the phylogenetic relationships between the 21 clades shown in the phylogenetic tree. Clade stem ages are indicated. The <italic>colored bars</italic> on top indicate the higher-level clades (listed <italic>on the right</italic>) that variably comprise the 21 clades indicated in the cladogram. <bold>(B)</bold> Scatterplots with regression lines displaying, for CPEB2 orthologs, correlations across vertebrate clades (circles, <italic>red regression line</italic>) and Actinopterygii clades (squares, <italic>blue regression line</italic>) between the mean values of the indicated parameters (i.e., P occurrence in <italic>upper left</italic> panel, SIM score in <italic>upper right</italic> panel, ParSe LLPS propensity score in <italic>lower left</italic> panel) and clade stem ages. Data points are colored to indicate clades as reported in the legend <italic>on the right</italic>. Arrowheads indicate in each graph the datapoint relative to the mean value of the parameter in Actinopterygii. The <italic>r</italic> correlation coefficients are indicated for each regression line. <italic>Asterisks</italic> indicate statistically significant correlations. <bold>(C)</bold> As in <xref ref-type="fig" rid="F7">Figure 7C</xref>, with datapoints and regression related to the TIA1 SIM score (in <italic>red</italic>) in comparison with the CPEB1-4 SIM scores.</p>
</caption>
<graphic xlink:href="fbinf-05-1491735-g009.tif"/>
</fig>
<p>These findings indicate that the evolutionary dynamics of the CPEB composition- and function-related parameters that we observed across vertebrate clades appear to be lineage-specific. Thus, over a similar evolutionary timespan of &#x223c;400 million years, their variation profiles radically differ, in magnitude and/or direction, along the vertebrate lineage or within its derived lineages, such as Actinopterygii. Therefore, the variation of some of the LCR-related parameters that we analyzed appears to mark evolutionary transitions across vertebrate clades, correlating with their stem ages. In this respect, the evolutionary dynamics of these parameters are comparable to those encountered for some other LCR-related parameters in the evolution of eukaryotic proteomes (<xref ref-type="bibr" rid="B64">Pelassa et al., 2019</xref>; see <xref ref-type="sec" rid="s3">Discussion</xref>).</p>
</sec>
<sec id="s2-8">
<title>The evolutionary changes observed in CPEBs are not generalized across prion-like RNA-binding proteins</title>
<p>The previous findings prompted us to test whether the evolutionary dynamics that we observed for CPEB2/3 are also detectable for other similar proteins, or whether they represent protein-/paralog-specific features.</p>
<p>Towards this aim, we studied the evolutionary history of TIA1, an LLPS-prone, prion-forming vertebrate protein containing multiple RRMs and a C-terminal LCR (<xref ref-type="bibr" rid="B76">Rayman and Kandel, 2017</xref>; <xref ref-type="sec" rid="s10">Supplementary Figure S7A</xref>), i.e., a protein structurally and functionally related to CPEB2/3. We found that the TIA1 amino acid composition varied in a more limited manner compared to CPEB2/3 across vertebrate clades (<xref ref-type="fig" rid="F9">Figure 9C</xref>; <xref ref-type="sec" rid="s10">Supplementary Figures S7B&#x2013;G</xref>). Notably, the TIA1 SIM and REP scores did not vary significantly (<xref ref-type="sec" rid="s10">Supplementary Figures S7D&#x2013;E</xref>). As for CPEB1, the protein has a relatively low LLPS propensity in older clades, which minimally, but significantly, increased going towards younger clades (r &#x3d; &#x2212;0.90, n &#x3d; 9, p &#x3c; 0.001; <xref ref-type="sec" rid="s10">Supplementary Figure S7F</xref>). As for CPEB3, the prion-likeness of TIA1 is high and relatively stable throughout vertebrate phylogenesis, with only a taxon-specific drop in Actinopterygii, and a minimal increase from older to younger clades (r &#x3d; &#x2212;0.68, n &#x3d; 9, p &#x3c; 0.05; <xref ref-type="sec" rid="s10">Supplementary Figure S7G</xref>). Thus, this protein has considerable prion-likeness and modest LLPS propensity already in older clades and maintained these features throughout the vertebrate phylogenetic tree, without substantial quantitative changes in sequence composition and complexity.</p>
<p>Thus, the evolutionary changes that we found for CPEB2/3 did not occur in similar proteins over the same evolutionary timespan, representing protein- and paralog-specific phenomena.</p>
</sec>
<sec id="s2-9">
<title>Evolutionary divergence within the CPEB2-4 subfamily and the rise of CPEB2 as an LLPS-prone and prion-like protein in the vertebrate lineage</title>
<p>Overall, the previous findings indicate that the complexity- and function-related parameters that were analyzed diverged significantly not only across members of the CPEB1 and CPEB2-4 subfamilies but also within the latter. Notably, going from older to younger clades, the SIM and REP scores significantly increase only for CPEB2/3, but not for CPEB4. Indeed, the SIM and REP scores of CPEB2 correlate significantly with those of CPEB3 (<italic>r</italic> &#x3d; 0.89 and <italic>r</italic> &#x3d; 0.81, respectively, n &#x3d; 9 taxa, p &#x3c; 0.01 in both instances), but not of CPEB4, across clades (<italic>r</italic> &#x3d; 0.18, n &#x3d; 9, p &#x3d; 0.62, and <italic>r</italic> &#x3d; 0.05, n &#x3d; 9, p &#x3d; 0.96, respectively; <xref ref-type="fig" rid="F10">Figures 10A,B</xref>). However, CPEB2 and CPEB3 also diverged from each other in the impact of their sequence complexity changes on LLPS propensity and prion-likeness. Indeed, the LLPS propensity and prion-likeness scores of CPEB2 do not significantly correlate with those of CPEB3 (<italic>r</italic> &#x3d; 0.39, n &#x3d; 9 taxa, p &#x3d; 0.29 and <italic>r</italic> &#x3d; 0.42, n &#x3d; 9 taxa, p &#x3d; 0.26) and CPEB4 (<italic>r</italic> &#x3d; 0.56, n &#x3d; 9, p &#x3d; 0.11 and <italic>r</italic> &#x3d; 0.63, n &#x3d; 9 taxa, p &#x3d; 0.07) across clades (<xref ref-type="fig" rid="F10">Figures 10C,D</xref>). Thus, two paralogs like CPEB2 and CPEB3 can display both parallel and divergent evolutionary trajectories with respect to different composition- and function-related parameters.</p>
<fig id="F10" position="float">
<label>FIGURE 10</label>
<caption>
<p>Divergent evolutionary variation of SIM, REP, LLPS, and prion-likeness scores across the CPEB2-4 subfamily orthologs. <bold>(A)</bold> Scatterplots with regression lines displaying correlations between the clade mean SIM scores of CPEB2 and those of CPEB3 (<italic>purple</italic>) or CPEB4 (<italic>green</italic>). The <italic>r</italic> correlation coefficients are indicated for each regression line. <italic>Asterisks</italic> indicate statistically significant correlations. <bold>(B)</bold> As in <bold>(A)</bold>, but for the REP score. Note how CPEB2 and CPEB3, but not CPEB4, underwent parallel evolutionary changes in both SIM and REP scores. <bold>(C)</bold> As in <bold>(A)</bold>, but for the LLPS propensity score (ParSe &#x2211; classifier distance P). <bold>(D)</bold> As in <bold>(A)</bold>, but for the prion-likeness score PLAAC (PrD score). Note how CPEB2 changes in LLPS propensity and prion-likeness scores do not correlate significantly with those of either CPEB3 or CPEB4. <bold>(E)</bold> Scatterplot with regression line displaying the significant correlation between the mean LLPS propensity and SIM scores of CPEB2 across vertebrate clades. The <italic>r</italic> correlation coefficients are indicated for each regression line. <italic>Asterisks</italic> (in this panel as well as in panels F-H) indicate statistically significant correlations. <bold>(F)</bold> As in <bold>(E)</bold>, for the correlation between the mean LLPS propensity and REP scores. <bold>(G)</bold> As in <bold>(E)</bold>, for the correlation between the mean PrD and SIM scores. <bold>(H)</bold> As in <bold>(E)</bold>, for the correlation between the mean PrD score and SIM scores. Note how the SIM and REP scores correlate indifferently with both LLPS propensity and prion-likeness scores.</p>
</caption>
<graphic xlink:href="fbinf-05-1491735-g010.tif"/>
</fig>
<p>Overall, CPEB2 underwent the most conspicuous changes in amino acid composition and sequence complexity among the four CPEB paralogs along the vertebrate lineage. The considerable changes in SIM and REP scores correlated significantly with both LLPS propensity and prion-likeness scores (<xref ref-type="fig" rid="F10">Figures 10E&#x2013;H</xref>) at similarly high levels (r &#x3d; 0.87-0.94, n &#x3d; 9 and p &#x3c; 0.01 in all instances), suggesting the absence of any obvious preferential link between the SIM and REP scores and either LLPS propensity or prion-likeness.</p>
<p>The results of these analyses reveal how CPEB2 arose as a second prion-like paralog of the CPEB family besides CPEB3 along the vertebrate lineage, reaching remarkable degrees of LLPS propensity and prion-likeness in the youngest vertebrate clades, including Glires and Primates.</p>
</sec>
</sec>
<sec sec-type="discussion" id="s3">
<title>Discussion</title>
<p>We have systematically characterized the composition, complexity, LLPS propensity, and prion-likeness of the primary sequences of human CPEB1-4, studying their evolution in more than 500 species across nine major vertebrate clades. We found that the four CPEB paralogs underwent largely divergent evolutionary changes in composition and sequence complexity that varied their LLPS propensity and prion-likeness, with detectable trends going from older to younger vertebrate clades. These changes were particularly marked for CPEB2, which became a protein with high LLPS propensity and prion-likeness in younger clades, such as Glires and Primates. These findings expand our understanding of the molecular evolution of the CPEB protein family by defining, both qualitatively and quantitatively, how progressive changes in LCRs/AARs may have promoted the functional divergence of the four CPEB paralogs along the vertebrate lineage.</p>
<sec id="s3-1">
<title>Sequence composition and complexity in the human CPEB paralogs: structural and functional implications</title>
<p>We systematically analyzed the primary sequence features of the four human CPEB paralogs, focusing on their composition, sequence complexity, LLPS propensity, and prion-likeness. The combination of these sequence- and function-related quantitative parameters can provide a better understanding of how paralog-specific differences in LCR/AAR composition may determine functional differences across the four CPEB paralogous proteins. Our analyses substantially extend the breadth and scope of previous descriptive reports of the amino acid composition of some CPEB orthologs (e.g., <xref ref-type="bibr" rid="B86">Si et al., 2003b</xref>; <xref ref-type="bibr" rid="B21">Fiumara et al., 2010</xref>; <xref ref-type="bibr" rid="B74">Ram&#xed;rez de Mingo et al., 2022</xref>).</p>
<p>In compositional terms, we found marked paralog-specific enrichments and depletions of several amino acids across the human CPEBs, which were associated with changes in the complexity and repetitiveness of their primary sequences, with CPEB2/3 displaying the lowest complexity and the highest repetitiveness. The biological meaning of these paralog-specific primary sequence differences is largely unknown and can only be interpreted based on our currently limited understanding of the structure and function of the CPEB NTRs.</p>
<p>Recent NMR structural analyses of human CPEB3 NTR fragments revealed a combination of random coil, &#x3b1;-helical, and polyproline-II (PP-II) conformations (<xref ref-type="bibr" rid="B74">Ram&#xed;rez de Mingo et al., 2022</xref>). No NTR atomic-level structure is available for other paralogs. The AlphaFold models of human CPEBs show how the four NTRs are mostly disordered with interspersed structured segments, similar to what observed in the CPEB3 NMR structures. The enrichment in P/G structure-breaking residues, especially in CPEB2/3, may be key in maintaining the NTRs in a mostly disordered, flexible conformation. The disordered NTR portions can mediate LLPS through multivalent interactions (<xref ref-type="bibr" rid="B28">Gomes and Shorter, 2019</xref>). The interspersed secondary structure elements, which can be stabilized by folding-upon-binding mechanisms (<xref ref-type="bibr" rid="B106">Wright and Dyson, 2009</xref>), may cooperate with disordered regions in driving LLPS and fibrillization (<xref ref-type="bibr" rid="B75">Raveendra et al., 2013</xref>; <xref ref-type="bibr" rid="B69">Peskett et al., 2018</xref>; <xref ref-type="bibr" rid="B100">Vaglietti et al., 2023</xref>).</p>
<p>&#x2018;Molecular grammars&#x2019; are thought to exist by which the occurrence and order of amino acids in LCRs defines their LLPS behavior (<xref ref-type="bibr" rid="B6">Brangwynne et al., 2015</xref>; <xref ref-type="bibr" rid="B57">Martin and Mittag, 2018</xref>; <xref ref-type="bibr" rid="B103">Wang et al., 2018</xref>; <xref ref-type="bibr" rid="B28">Gomes and Shorter, 2019</xref>; <xref ref-type="bibr" rid="B83">Saar et al., 2021</xref>; <xref ref-type="bibr" rid="B77">Rekhi et al., 2024</xref>), although their fine &#x2018;rules&#x2019; are not yet clearly understood. <xref ref-type="bibr" rid="B57">Martin and Mittag (2018)</xref> distinguished three types of LCRs, i.e. those enriched in either polar (and G), charged, or hydrophobic residues, with differential LLPS behaviors. The human CPEB LCRs, generally enriched in Q, S, G and depleted in charged and hydrophobic residues belong to the polar type, which is common in LLPS-prone proteins (<xref ref-type="bibr" rid="B6">Brangwynne et al., 2015</xref>). The LLPS of polar LCRs can be modulated by interspersed aromatic and charged residues (<xref ref-type="bibr" rid="B57">Martin and Mittag, 2018</xref>). Spaced aromatic (Y/F) and positively charged (R/K) residues govern the LLPS of certain proteins (<xref ref-type="bibr" rid="B103">Wang et al., 2018</xref>). While Y and K/R are underrepresented in CPEBs, F residues are scattered along their NTRs, as common in LLPS-prone proteins (<xref ref-type="bibr" rid="B103">Wang et al., 2018</xref>; <xref ref-type="bibr" rid="B57">Martin and Mittag, 2018</xref>). The variable enrichment in G, Q, and S residues may differentially shape the material properties of CPEB2-4 condensates (<xref ref-type="bibr" rid="B103">Wang et al., 2018</xref>). The prominent enrichment in P residues in the CPEB2-4 subfamily, may impact LLPS as P-rich regions play LLPS-modulating roles (<xref ref-type="bibr" rid="B79">Riback et al., 2017</xref>; <xref ref-type="bibr" rid="B77">Rekhi et al., 2024</xref>), also through proline cis-trans isomerization (<xref ref-type="bibr" rid="B28">Gomes and Shorter, 2019</xref>). Not all LLPS-driving LCRs are disordered and not all LLPS-driving domains are LCRs (<xref ref-type="bibr" rid="B57">Martin and Mittag, 2018</xref>; <xref ref-type="bibr" rid="B16">Dignon et al., 2020</xref>; <xref ref-type="bibr" rid="B100">Vaglietti et al., 2023</xref>), as also shown by our ParSe predictions. RRMs can contribute to LLPS in a complex interplay with LCRs (<xref ref-type="bibr" rid="B103">Wang et al., 2018</xref>) and future studies will have to finely dissect the interplay of NTRs and CTRs in CPEB LLPS.</p>
<p>Molecular grammars have also been proposed for functional prions (<xref ref-type="bibr" rid="B3">Alberti et al., 2009</xref>; <xref ref-type="bibr" rid="B21">Fiumara et al., 2010</xref>; <xref ref-type="bibr" rid="B31">Halfmann et al., 2011</xref>; <xref ref-type="bibr" rid="B102">Wake et al., 2024</xref>). <xref ref-type="bibr" rid="B21">Fiumara et al. (2010)</xref> classified these prions based on their composition, distinguishing Q/N-rich (type 1), Q/N/P/G-rich (type 3), and P/G-rich (type 5) prions, with intermediate degrees (types 2 and 4). The fibrillization of these prion classes can be triggered by distinct structural elements, i.e., &#x3b1;-helical coiled-coil (CCs) for Q/N-rich prions and &#x3b2;-sheets for prions richer in P/G residues, which may coexist (<xref ref-type="bibr" rid="B21">Fiumara et al., 2010</xref>; Hervas et al., 2021). Based on our analyses, human CPEB3 is a type 2 prion, bearing both type-1-like CC-prone regions (poly-Q, -A, and, -S AARs; <xref ref-type="bibr" rid="B65">Pelassa et al., 2014</xref>; <xref ref-type="bibr" rid="B52">Lilliu et al., 2018</xref>) and type-3 P/Q-rich regions, consistent with structural analyses of NTR fragments showing &#x3b1;-helical structures overlapping/flanking poly-Q, -A, and -S AARs, and PP-II and disordered conformations in P-rich regions (<xref ref-type="bibr" rid="B74">Ram&#xed;rez de Mingo et al., 2022</xref>). P/G-rich patches may limit the fibrillization propensity of other NTR regions (<xref ref-type="bibr" rid="B21">Fiumara et al., 2010</xref>). Indeed, <xref ref-type="bibr" rid="B78">Reselammal et al. (2021)</xref> identified a core PrD subregion (a.a. 101-145) forming the rigid part of CPEB3 fibrils flanked by flexible proline-rich regions (a.a. 80-100 and 165-194; 37%&#x2013;40% P). Thus, P residues in CPEB2/3 may profoundly shape their prion-like fibrillization besides their LLPS. CPEB2 is even more enriched in P/G than CPEB3, resembling a type 3 prion.</p>
<p>The prediction tools that we used correctly identified the protein regions known to drive the LLPS and prion-like fibrillization of human CPEBs based on experimental studies (<xref ref-type="bibr" rid="B19">Duran-Arqu&#xe9; et al., 2022</xref>; <xref ref-type="bibr" rid="B91">Stephan et al., 2015</xref>; <xref ref-type="bibr" rid="B98">Tsvetkov et al., 2020</xref>). On this basis, we further employed these tools in our evolutionary analyses (see below) to predict the contribution of specific NTR subregions to LLPS and prion-like fibrillization. ParSe identified specific LLPS-prone subregions that displayed an alternating pattern, with peripheral overlap, with the PrDs identified by PLAAC in CPEB2/3 (a.a. 210-250, 337-450, 489-566 in CPEB2; a.a. 1-35, 145-218 in CPEB3). These predictions suggest that different NTR subregions may be functionally specialized in driving either LLPS-condensation or prion-like fibrillization. Whether this is the case for CPEB2 remains to be experimentally determined. A few studies have attempted to initially identify functionally specialized subregions within the CPEB3 NTR, although with partially contradictory results. <xref ref-type="bibr" rid="B91">Stephan et al. (2015)</xref> mapped two PrDs in mouse CPEB3, i.e., PrD1 (a.a. 1-217; a.a. 1-216 in human CPEB3) and PrD2 (a.a. 284-449; a.a. 284-431 in human CPEB3), separated by an actin-binding region. The two PrDs predicted by PLAAC in our analyses overlap with PrD1. <xref ref-type="bibr" rid="B78">Reselammal et al. (2021)</xref> showed the key role of a PrD1 subregion (a.a. 101-194), containing one of the two PLAAC-predicted PrDs, in mouse CPEB3 fibrillization. <xref ref-type="bibr" rid="B73">Ram&#xed;rez de Mingo et al. (2023)</xref> identified the a.a. 1-200 region of human CPEB3 as a PrD-like &#x2018;amyloid-forming region&#x2019; and the distal NTR (a.a. 250-426), overlapping with PrD2 in <xref ref-type="bibr" rid="B91">Stephan et al. (2015)</xref>, as the &#x2018;phase-separation domain&#x2019;. These partially contradictory findings, obtained in heterogeneous systems often only <italic>in vitro</italic>, together with our observations, indicate the need of careful molecular dissection approaches to identify NTR subregions mediating CPEB2/3 LLPS and/or fibrillization in the cellular context. Future studies will also have to better define the functional and temporal relationships between CPEB2/3 LLPS and prion-like fibrillization. For CPEB3, some studies view them as alternative states (<xref ref-type="bibr" rid="B25">Ford et al., 2019</xref>; <xref ref-type="bibr" rid="B24">2023</xref>), while others see LLPS as an intermediate step towards fibrillization (<xref ref-type="bibr" rid="B74">Ram&#xed;rez de Mingo et al., 2022</xref>; <xref ref-type="bibr" rid="B74">Ram&#xed;rez de Mingo et al., 2023</xref>), as observed for other LCR-bearing proteins (<xref ref-type="bibr" rid="B69">Peskett et al., 2018</xref>; <xref ref-type="bibr" rid="B100">Vaglietti et al., 2023</xref>). While all these models have some experimental support, they remain largely speculative. Our findings can provide critical guidance in the further experimental dissection of the functional roles of NTR subregions in the LLPS and prion-like behavior of the vertebrate CPEB paralogs.</p>
</sec>
<sec id="s3-2">
<title>Divergent LCR/AAR evolutionary variation as a driver of functional diversification in protein paralogs</title>
<p>The four paralogous CPEB genes appeared early in the vertebrate lineage, when many gene families diversified (<xref ref-type="bibr" rid="B61">Nishizawa and Nishizawa, 1999</xref>; <xref ref-type="bibr" rid="B72">Rad&#xf3;-Trilla et al., 2015</xref>). The appearance of LCRs/AARs and their evolutionary variation in length and composition, can contribute to the functional diversification of paralogous proteins with adaptive significance (e.g., <xref ref-type="bibr" rid="B17">Dover, 1989</xref>; <xref ref-type="bibr" rid="B68">Persi et al., 2016</xref>; <xref ref-type="bibr" rid="B67">2023</xref>; <xref ref-type="bibr" rid="B64">Pelassa et al., 2019</xref>; <xref ref-type="bibr" rid="B100">Vaglietti et al., 2023</xref>). In our study, we initially characterized the marked differences in LCR-related sequence complexity, LLPS propensity, and prion-likeness across CPEB paralogs in <italic>H. sapiens</italic>, a species belonging to a relatively young terminal clade (Euarchontoglires) along the vertebrate lineage. In the second part of our study, we analyzed whether those differences are phylogenetically conserved&#x2013;and were therefore present even in species from more ancient vertebrate clades&#x2013;or whether they arose gradually, or at a specific points, along the vertebrate lineage. We found that composition- and function-related parameters of primary sequences varied in a largely divergent manner across CPEB paralogs through the vertebrate lineage, starting from Chondrichthyes. In general, for each paralog, these parameters were either relatively constant across clades or varied with trends largely related to clade stem ages along the lineage. Some clade-specific oscillations in their mean value, superimposed to clade stem age-related trends, were also found. We previously detected similar evolutionary trends for other LCR-related parameters at the level of entire proteomes (<xref ref-type="bibr" rid="B65">Pelassa et al., 2014</xref>; <xref ref-type="bibr" rid="B64">Pelassa et al., 2019</xref>). These trends were still detected when varying the sampling of species within clades and when accounting for the intraclade variability of the parameters, indicating that they are mostly related to inter- rather than intra-clade variation in the vertebrate lineage, at least for the CPEB case. Indeed, some complexity- and function-related parameters that varied markedly across vertebrate clades, correlating with clade stem ages, were instead relatively stable within clades, even with a long evolutionary history, such as Actinopterygii. Thus, changes in these parameters appear to be lineage-specific and to mark major evolutionary transitions across clades along the vertebrate lineage, as found for other LCR-related parameters in the evolution of eukaryotic proteomes (<xref ref-type="bibr" rid="B64">Pelassa et al., 2019</xref>).</p>
<p>In principle, either neutral evolution with genetic drift, selective forces, or their combination, may have shaped the evolutionary dynamics that we identified (e.g., <xref ref-type="bibr" rid="B26">Galtier, 2024</xref>).</p>
<p>The evolution of LCRs of variable complexity, from homopolymers (i.e., AARs), to oligopeptide repeats and regions of cryptic simplicity (<xref ref-type="bibr" rid="B94">Tautz et al., 1986</xref>; <xref ref-type="bibr" rid="B20">Enright et al., 2023</xref>), has been often modelled after that of selectively neutral microsatellites (<xref ref-type="bibr" rid="B8">Buschiazzo and Gemmell, 2006</xref>). AARs/LCRs originate from replication slippage and/or unequal crossing-over (<xref ref-type="bibr" rid="B1">Alb&#xe0; et al., 1999</xref>; <xref ref-type="bibr" rid="B84">Sainudiin et al., 2004</xref>; <xref ref-type="bibr" rid="B62">Owens et al., 2013</xref>; <xref ref-type="bibr" rid="B105">Warren et al., 1997</xref>). Synonymous or non-synonymous substitutions can lead, respectively, to their stabilization or interruption and loss (<xref ref-type="bibr" rid="B8">Buschiazzo and Gemmell, 2006</xref>; <xref ref-type="bibr" rid="B71"> Rad&#xf3;-Trilla and Alb&#xe0;, 2012</xref>; <xref ref-type="bibr" rid="B49">Lenz et al., 2014</xref>). LCRs can also arise from tandem duplications of gene segments (<xref ref-type="bibr" rid="B61">Nishizawa and Nishizawa, 1999</xref>) and GC-biased gene conversion (<xref ref-type="bibr" rid="B27">Galtier et al., 2009</xref>). These mechanisms, and thus AARs/LCRs occurrence and composition, can arise from clade-specific quantitative differences in slippage rates (<xref ref-type="bibr" rid="B9">Canceill et al., 1999</xref>; <xref ref-type="bibr" rid="B23">Flores and Engels, 1999</xref>; <xref ref-type="bibr" rid="B81">Ross et al., 2003</xref>; <xref ref-type="bibr" rid="B46">Laidlaw et al., 2007</xref>; <xref ref-type="bibr" rid="B10">Castillo-Lizardo et al., 2014</xref>), genome base composition (<xref ref-type="bibr" rid="B15">De Pristo et al., 2006</xref>; <xref ref-type="bibr" rid="B97">Tian et al., 2011</xref>), codon usage (<xref ref-type="bibr" rid="B1">Alb&#xe0; et al., 1999</xref>), unequal crossing-over (<xref ref-type="bibr" rid="B37">Hoffmann et al., 2008</xref>), and DNA repair mechanisms (<xref ref-type="bibr" rid="B88">Sia et al., 2001</xref>). At least some of these mechanisms may have contributed to the observed LCR/AAR variation across clades. Indeed, LCRs/AARs whose amino acids are encoded by GC-rich codons (e.g., A, G, P) are enriched in GC-rich mammalian genomes (<xref ref-type="bibr" rid="B92">Sumiyama et al., 1996</xref>; <xref ref-type="bibr" rid="B60">Nakachi et al., 1997</xref>), although this trend is not universal (<xref ref-type="bibr" rid="B71">Rad&#xf3;-Trilla and Alb&#xe0;, 2012</xref>). The evolutionary trends that we observed in CPEB P-rich/polyP, G-rich/polyG, and A-rich/polyA regions appear to be consistent with these trends.</p>
<p>However, a growing body of evidence is showing that selective forces also play substantial roles in the evolution of LCRs/AARs (<xref ref-type="bibr" rid="B17">Dover, 1989</xref>; <xref ref-type="bibr" rid="B71">Rad&#xf3;-Trilla and Alb&#xe0;, 2012</xref>; <xref ref-type="bibr" rid="B72">Rad&#xf3;-Trilla et al., 2015</xref>; <xref ref-type="bibr" rid="B68">Persi et al., 2016</xref>; <xref ref-type="bibr" rid="B20">Enright et al., 2023</xref>; <xref ref-type="bibr" rid="B95">Teekas et al., 2024</xref>) which are increasingly recognized as functional sequences rather than selectively neutral spacers (e.g., <xref ref-type="bibr" rid="B17">Dover, 1989</xref>; <xref ref-type="bibr" rid="B21">Fiumara et al., 2010</xref>; <xref ref-type="bibr" rid="B22">2015</xref>; <xref ref-type="bibr" rid="B65">Pelassa et al., 2014</xref>; <xref ref-type="bibr" rid="B64">2019</xref>; <xref ref-type="bibr" rid="B66">Pelassa and Fiumara, 2015</xref>; <xref ref-type="bibr" rid="B12">Chavali et al., 2017</xref>; <xref ref-type="bibr" rid="B109">Chavali et al., 2020</xref>; <xref ref-type="bibr" rid="B56">Marchetti et al., 2021</xref>; <xref ref-type="bibr" rid="B99">Vaglietti and Fiumara, 2021</xref>; <xref ref-type="bibr" rid="B100">Vaglietti et al., 2023</xref>). LCRs/AARs may be subject to selective pressure because variations in their length and composition alter protein structure (<xref ref-type="bibr" rid="B21">Fiumara et al., 2010</xref>) and interactions (<xref ref-type="bibr" rid="B66">Pelassa and Fiumara, 2015</xref>), also by convergent evolution with interactors (<xref ref-type="bibr" rid="B99">Vaglietti and Fiumara, 2021</xref>), as well as localization, through LLPS and aggregation, and physiological function (<xref ref-type="bibr" rid="B100">Vaglietti et al., 2023</xref>). Several lines of evidence, including analyses of mutation rates and codon usage (<xref ref-type="bibr" rid="B32">Hancock et al., 2001</xref>; <xref ref-type="bibr" rid="B59">Mularoni et al., 2010</xref>; <xref ref-type="bibr" rid="B39">Huntley and Golding, 2000</xref>; <xref ref-type="bibr" rid="B29">Haerty and Golding, 2010</xref>; <xref ref-type="bibr" rid="B51">Li et al., 2012</xref>), convergent evolution (<xref ref-type="bibr" rid="B48">Lavoie et al., 2003</xref>; <xref ref-type="bibr" rid="B100">Vaglietti et al., 2023</xref>), and sequence entropy (<xref ref-type="bibr" rid="B20">Enright et al., 2023</xref>), indicate that LCRs/AARs are subject to selective pressure. LCRs evolve more rapidly than other protein regions (<xref ref-type="bibr" rid="B2">Alb&#xe0; et al., 2007</xref>), with phases of relaxed purifying selection and positive selection followed by phases of intense purifying selection (<xref ref-type="bibr" rid="B68">Persi et al., 2016</xref>).</p>
<p>Two main lines of evidence in our findings suggest that selection played a role in shaping the evolution and divergence of LCR-related parameters in the vertebrate CPEB family. <italic>First</italic>, at least some of the LCR changes that we observed in the evolution of vertebrate CPEBs are predicted to directly impact their LLPS and prion-like behavior, and other aspects of their physiological activity, based not only on our <italic>in silico</italic> analyses but also on experimental evidence (e.g., <xref ref-type="bibr" rid="B103">Wang et al., 2018</xref>; <xref ref-type="bibr" rid="B77">Rekhi et al., 2024</xref>). These findings are consistent with the view that LCR variation is an evolutionary tool for regulating protein LLPS with adaptive effects (<xref ref-type="bibr" rid="B57">Martin and Mittag, 2018</xref>). For instance, changes in P-richness, such as those found in CPEB2/3, can regulate protein LLPS (<xref ref-type="bibr" rid="B79">Riback et al., 2017</xref>) and may have modulated CPEB3 interactions with actin mediated by its P-rich NTR (<xref ref-type="bibr" rid="B71">Rad&#xf3;-Trilla and Alb&#xe0;, 2012</xref>; <xref ref-type="bibr" rid="B91">Stephan et al., 2015</xref>). More in general, the evolutionary compositional changes in CPEBs may have contributed to shaping their interactomes (<xref ref-type="bibr" rid="B66">Pelassa and Fiumara, 2015</xref>; <xref ref-type="bibr" rid="B55">Mallik et al., 2022</xref>). Therefore, selection may have favored compositional changes in certain paralogs because of their direct impact on protein function and interactions. <italic>Second</italic>, both composition- and function-related parameters displayed clearly divergent evolutionary trends across paralogs, with some of them increasing or decreasing in certain paralogs, in a clade stem age-related manner, while remaining essentially stable in the other paralogs. These divergent evolutionary trajectories would be difficult to explain if the CPEB LCRs were selectively neutral. In the latter case, one may expect more similar trends across paralogs. Thus, it is plausible to speculate that, during vertebrate evolution, positive selection may have favored compositional changes in CPEB2/3 LCRs, while purifying selection may have maintained the composition of the CPEB1/4 LCRs relatively stable, consistent with observations of both adaptive and purifying selection acting at different stages of LCR evolution (<xref ref-type="bibr" rid="B68">Persi et al., 2016</xref>; <xref ref-type="bibr" rid="B67">2023</xref>). From this perspective, our findings indicate that the CPEB2 LCRs may have undergone a phase of positive selection more recently than those of other paralogs.</p>
<p>In conclusion, we identified extensive patterns of LCR/AAR divergent evolution that may have had a key role in shaping the paralog-specific functions of CPEBs through the modulation of protein LLPS and prion-like behaviors. These findings identify the evolution of CPEBs as a paradigmatic example of the interplay of gene duplication and LCR variation in the functional diversification of protein families (<xref ref-type="bibr" rid="B67">Persi et al., 2023</xref>; <xref ref-type="bibr" rid="B72">Rad&#xf3;-Trilla et al., 2015</xref>; <xref ref-type="bibr" rid="B13">Chiu et al., 2022</xref>). Thus, they may provide key guidance for future experimental studies on the paralog-specific biological roles of the extensive LCRs of CPEBs, and their subregions, in LLPS and prion-like aggregation. Furthermore, they warrant further explorations of the LLPS and prion-like behaviors of CPEB2 in the context of vertebrate organisms, and their nervous systems, as previously done for CPEB3. Given the growing genetic, structural, and functional information on CPEB1-4, and the knowledge of their evolutionary dynamics that we traced here, the CPEB family represents an exquisite case study for investigating the impact of LCR evolution on the functional divergence of paralogous proteins.</p>
</sec>
</sec>
<sec sec-type="materials|methods" id="s4">
<title>Materials and methods</title>
<sec id="s4-1">
<title>Protein primary sequences</title>
<p>The primary sequences of human CPEB1-4 were obtained from the NCBI protein database (<ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/protein/">https://www.ncbi.nlm.nih.gov/protein/</ext-link>; IDs: NP_001352171.1, NP_001170853.1, NP_055727.3, NP_085130.2, respectively), selecting isoforms reported as canonical in the Ensembl database. The reference human proteome was downloaded from the Uniprot database (<ext-link ext-link-type="uri" xlink:href="https://www.uniprot.org/proteomes/UP000005640">https://www.uniprot.org/proteomes/UP000005640</ext-link>; Proteome ID: UP000005640, one sequence per gene, 20.590 proteins). The primary sequences of CPEB1-4 and TIA1 vertebrate orthologs (one per species) were downloaded in batch from the NCBI protein database. We selected sequences of orthologs of 571 species from nine major clades with different stem ages along the vertebrate lineage. The clades were defined based on a phylogenetic tree of the 571 species derived from TimeTree (<xref ref-type="bibr" rid="B44">Kumar et al., 2017</xref>; <ext-link ext-link-type="uri" xlink:href="http://www.timetree.org">www.timetree.org</ext-link>), and on taxonomic information derived from NCBI Taxonomy (<ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/taxonomy">www.ncbi.nlm.nih.gov/taxonomy</ext-link>), using a branch-based clade definition approach. The stem age of each clade was derived from its divergence time from Euarchontoglires, or, for Euarchontoglires itself, from the divergence time between its constituent sister taxa, Glires and Euarchonta (obtained from the TimeTree database, median values; <ext-link ext-link-type="uri" xlink:href="http://timetree.org/">http://timetree.org/</ext-link>), as follows: Chondrichthyes (<italic>Cho</italic>, 12 species, 462 mya), Actinopterigyii (<italic>Act</italic>, 175 species, 429 mya), Amphibia/Lissamphibia (<italic>Amp</italic>, 11 species, 352 mya), Sauropsida (<italic>Sau</italic>, 159 species, 319 mya), Marsupialia (<italic>Mar</italic>, 8 species, 160 mya), Atlantogenata (<italic>Atl</italic>, 9 species, 99 mya), Laurasiatheria (<italic>Lau</italic>, 119 species, 94 mya), and two clades within Euarchontoglires, i.e., Glires (<italic>Gli</italic>, 45 species) and Primates (<italic>Pri</italic>, 34 species) which diverged 87 mya. Divergence times between clades were derived from <ext-link ext-link-type="uri" xlink:href="http://TimeTree.org">TimeTree.org</ext-link> (median values). For each clade, the available sequences were aligned using MultAlin (<xref ref-type="bibr" rid="B14">Corpet, 1988</xref>) and the alignment was visually inspected. Sequences that appeared obviously incomplete in comparison with those of the same clade, i.e., lacking the initial methionine and/or with large deletions (&#x3e;50 residues), were discarded and not further analyzed. After this selection process, most species (89%) still had 3-4 paralog sequences available for the analysis. The list of the selected sequences is reported in <xref ref-type="sec" rid="s10">Supplementary Table S1</xref>. In some analyses (see <xref ref-type="fig" rid="F9">Figure 9</xref>), the Actinopterigyii species with an available CPEB2 sequence were further divided into 21 clades based on their phylogenetic tree derived from TimeTree and taxonomic information derived from NCBI Taxonomy. For some of the clades (1-4, 6-9, 16-17) only a few (1-3) sequences were available. The stem age of each clade was derived from its divergence time from Poeciliinae, or for Poeciliinae itself, from the divergence time between of its two sister subclades to which <italic>Poecilia</italic> spp./<italic>Poeciliopsis prolifica</italic> and <italic>Xiphophorus</italic> spp./<italic>Gambusia affinis</italic> belong, as follows: <italic>clade 1</italic>, Cladistia (<italic>Cla</italic>, 2 species, 396 Mya); <italic>clade 2</italic>, Holostei (<italic>Hol</italic>, 1 species, 321 Mya); <italic>clade 3</italic>, Osteoglossocephala (<italic>Ost</italic>, 2 species, 263 Mya); <italic>clade 4</italic>, Elopocephala (<italic>Elo</italic>, 3 species, 250 Mya); <italic>clade 5</italic> (<italic>Oto</italic>, Otomorpha, 31 species, 224 Mya); <italic>clade 6,</italic> Protacanthopterygii (<italic>Pro</italic>, 1 species, 219 Mya); <italic>clade 7,</italic> Lampridacea (<italic>Lam</italic>, 1 species, 134 Mya); <italic>clade 8</italic>, Holocentrinomorphaceae (<italic>Hol</italic>, 1 species, 127 Mya); <italic>clade 9</italic>, Batrachoidaria (<italic>Bat,</italic> 1 species, 120 Mya); <italic>clade 10</italic>, Syngnathiaria/related Percomophaceae (<italic>Syn</italic>, 9 species, 109 Mya); <italic>clade 11</italic>, Gobiaria (<italic>Gob</italic>, 3 species, 108 Mya); <italic>clade 12</italic>, Eupercaria (<italic>Eup</italic>, 28 species, 113 Mya); <italic>clade 13</italic>, Carangaria/related Percomophaceae (<italic>Car</italic>, 16 species, 104 Mya); <italic>clade 14</italic>, Pomacentridae/Ovalentaria incertas sedis (<italic>Pom</italic>, 7 species, 87 Mya); <italic>clade 15</italic>, Cichlomorphae (<italic>Cic</italic>, 9 species, 91 Mya); <italic>clade 16</italic>, Atheriniformes (<italic>Ath</italic>, 1 species, 80 Mya); <italic>clade 17</italic>, Beloniformes, (<italic>Bel</italic>, 1 species, 89 Mya); <italic>clade 18</italic>, Aplocheiloidei (<italic>Apl</italic>, 4 species, 74 Mya); <italic>clade 19</italic>, Cyprinodontoidei (<italic>Cyp</italic>, 4 species, 46 Mya); <italic>clade 20</italic>, Poecilinae (<italic>Poe</italic>, <italic>Poecilia</italic> spp./<italic>P. prolifica</italic>; 4 species, 18.9 Mya); <italic>clade 21</italic>, Poeciliinae (<italic>Poe</italic>, <italic>Xiphophorus</italic> spp./<italic>G. affinis</italic>, 5 species, 18.9 Mya). Divergence times between clades were derived from <ext-link ext-link-type="uri" xlink:href="http://TimeTree.org">TimeTree.org</ext-link> (median values).</p>
</sec>
<sec id="s4-2">
<title>Compositional analyses of protein primary sequences</title>
<p>The percent occurrence of each amino acid, as well as the occurrence and length of AARs (&#x2265;4 residues) of the 20 amino acids, in protein primary sequences (of CPEB1-4, TIA1, or of the whole human proteome) were determined using previously developed Perl scripts (<xref ref-type="bibr" rid="B65">Pelassa et al., 2014</xref>; <xref ref-type="bibr" rid="B56">Marchetti et al., 2021</xref>). In case multiple repeats of one same amino acid were found in a protein, we calculated their total length as the sum of the individual repeat (&#x2265;4 residues) lengths and used this value for the evolutionary analyses. We used 20% and &#x2212;20% over- or under-representation thresholds, respectively, to identify amino acids enriched or depleted in CPEB paralogs possibly in relation to the presence of compositionally biased regions (LCRs/AARs) in their primary sequences. These thresholds were empirically selected considering that in most proteins LCR/AARs regions constitute only a limited portion of the primary sequence. For instances, an average human protein of 500 residues is expected to contain &#x223c;8%, i.e. 40, alanine (A) residues. If the initial 100 residues of the same protein were an alanine-rich region containing 16% A, the whole protein would then contain 48 (16 &#x2b; 32) alanine residues, with a 20% increase in the percent occurrence of the amino acid, from 8% (40/500) to 9.6% (48/500). The same result would be obtained with an even shorter alanine-rich region (e.g., 50 residues) containing a higher percentage of alanine residues (e.g., 32%). Similarly, if the same protein contained a repeat of 10 alanine residues along its primary sequence, it would then contain 50 alanine residues (10%), i.e., &#x223c;20% more than expected. Thus, deviations &#x3e;20% in both directions in the occurrence of a given amino acid in the primary sequence of an average protein can signal the presence of compositionally biased regions of even modest length.</p>
</sec>
<sec id="s4-3">
<title>Analyses of protein primary sequence complexity and repetitiveness</title>
<p>To define quantitatively the overall primary sequence complexity features of the proteins of interest, we calculated two per-residue sequence complexity-related scores, expressing the local degree of sequence simplicity (SIM) and repetitiveness (REP). The two scores were calculated, using <italic>ad hoc</italic> Perl scripts, in a sliding window of 20 residues centered around each residue (9 residues upstream, 10 residues downstream for all residues) along the protein primary sequence. For both scores, the sliding window length increased from 11 to 20 residues for the first 10 residues of the primary sequence and decreased from 20 to 10 residues for the last 10 residues.</p>
<p>In this 20-residue window, the SIM score was calculated as:<disp-formula id="equ1">
<mml:math id="m1">
<mml:mrow>
<mml:mtext>SIM</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mtext>CV</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mtext>&#x2002;</mml:mtext>
<mml:mn>1</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>log</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>where CV is the coefficient of variation, <italic>a</italic> is a set of 20 values corresponding to the absolute number of occurrences of each amino acid in the 20-residue window (going from 0 to 20 for a given amino acid), and <italic>b</italic> is the number of amino acids occurring at least one time in the 20-residue window (going from 1 to 20). The &#x201c;ACDEFGHIKLMNPQRSTVWY&#x201d; sequence (in any order) has the minimum coefficient of variation of <italic>a</italic> (0) as well as the maximum <italic>b</italic> score (20), with the lowest possible SIM score (0). Any pure homopolymeric amino acid sequence, e.g., &#x201c;AAAAAAAAAAAAAAAAAAAA&#x201d;, has the maximum CV(<italic>a</italic>) (4.35) as well as the minimum <italic>b</italic> score (1), with the highest possible SIM score (4.35). The SIM score can thus vary from 0 to 4.35.</p>
<p>The REP score has been calculated as:<disp-formula id="equ2">
<mml:math id="m2">
<mml:mrow>
<mml:mtext>REP</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:msqrt>
<mml:mi>b</mml:mi>
</mml:msqrt>
<mml:mo>&#x2a;</mml:mo>
<mml:msqrt>
<mml:mi>c</mml:mi>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>Where <italic>a</italic> the total length of tandem repeats of at least two units of any amino acid (from 0 to 20), <italic>b</italic> is the number of tandem repeats of at least two units of any amino acid (from 1 for a 20-residues homopolymer to 10 in a sequence like &#x201c;AACCDDEEFFGGHHIIKKLL&#x201d;), and <italic>c</italic> is the number of different amino acids forming tandem repeats of at least two residues (from 0 to 10). The REP score can thus vary from 0 (for a sequence such as &#x201c;ACDEFGHIKLMNPQRSTVWY&#x201d;, in any order) to 20 (for any pure homopolymeric amino acid sequence, e.g., &#x201c;AAAAAAAAAAAAAAAAAAAA&#x201d;).</p>
<p>For each protein of interest. we calculated the mean per-residue SIM and REP scores across all the amino acids in the primary sequence. For the evolutionary analyses, we then calculated the mean values of these scores across all orthologs of a given protein in each clade of interest.</p>
</sec>
<sec id="s4-4">
<title>Liquid-liquid phase separation (LLPS) propensity and prion-likeness predictions</title>
<p>The per-residue propensity to undergo LLPS for the primary sequence of the four human CPEB proteins was calculated using the FuzDrop algorithm, with a pDP score threshold of 0.6 to predict LLPS-prone protein regions (<xref ref-type="bibr" rid="B101">Vendruscolo and Fuxreiter, 2022</xref>) and the ParSe algorithm, version 2, taking into account three related phase-separation propensity metrics of the algorithm (<xref ref-type="bibr" rid="B40">Ibrahim et al., 2023</xref>), i.e., classifier distance, classifier distance with U<sub>&#x3c0;</sub> &#x2b; U<sub>q</sub> extension (&#x394;h&#xb0;-trained), and classifier distance with U<sub>&#x3c0;</sub> &#x2b; U<sub>q</sub> extension (c<sub>sat</sub>-trained), indicated as 1, 2, and 3, respectively, in <xref ref-type="fig" rid="F2">Figures 2</xref>, <xref ref-type="fig" rid="F3">3</xref>. (<ext-link ext-link-type="uri" xlink:href="https://stevewhitten.github.io/Parse_v2_FASTA">https://stevewhitten.github.io/Parse_v2_FASTA</ext-link>; <xref ref-type="bibr" rid="B40">Ibrahim et al., 2023</xref>). ParSe is able to identify residues within LCR/IDR regions with (labelled as &#x2018;P&#x2019;) and without (labelled as &#x2018;D&#x2019;) propensity to undergo LLPS, as well as residues in folded regions (labelled as &#x2018;F&#x2019;) which are not predicted to have LLPS propensity. P-, D-, and F-labelled residues are depicted, respectively, in <italic>red</italic>, <italic>gray</italic>, and <italic>turquoise</italic> in the protein schemes shown in <xref ref-type="fig" rid="F2">Figures 2</xref>, <xref ref-type="fig" rid="F3">3</xref>. To better highlight the local LLPS propensity of each CPEB region, we reported the category of each single residue (P, D, or F) in the plots, even though ParSe predicts as P, D, or F regions only if they are formed by at least 20 consecutive residues with the same label. ParSe was also used to obtain batch predictions of the LLPS propensity of the CPEBs vertebrate orthologs, using the &#x2018;&#x3a3; classifier distance P&#x2019; score as a measure of the presence of LLPS-prone intrinsically disordered regions (IDRs) in the proteins of interest (<xref ref-type="bibr" rid="B40">Ibrahim et al., 2023</xref>). The prion-likeness of proteins was calculated using the PLAAC tool (<xref ref-type="bibr" rid="B47">Lancaster et al., 2014</xref>). To identify the position of potential PrDs in the four human paralogs (<xref ref-type="fig" rid="F2">Figures 2</xref>&#x2013;<xref ref-type="fig" rid="F4">4</xref>), we plotted the per-residue PLAAC scores for the entire proteins, considering as PrDs continuous stretches of amino acids with PLAAC scores &#x2265;0. To assess the overall prion-likeness of CPEB orthologs of a given clade in evolutionary analyses (<xref ref-type="fig" rid="F6">Figures 6</xref>&#x2013;<xref ref-type="fig" rid="F10">10</xref>), we calculated their mean &#x2018;PRDscore&#x2019; as provided by the PLAAC software.</p>
</sec>
<sec id="s4-5">
<title>AlphaFold structural models</title>
<p>Atomic level structural models of CPEB1-4 paralogs of <italic>H. sapiens</italic> and <italic>D. rerio,</italic> as shown in <xref ref-type="fig" rid="F1">Figures 1B</xref>, <xref ref-type="fig" rid="F8">8</xref> were generated using the Colab AlphaFold2 software (<xref ref-type="bibr" rid="B58">Mirdita et al., 2022</xref>; available at <ext-link ext-link-type="uri" xlink:href="https://colab.research.google.com/github/sokrypton/ColabFold/blob/main/AlphaFold2.ipynb">https://colab.research.google.com/github/sokrypton/ColabFold/blob/main/AlphaFold2.ipynb</ext-link>). We selected the first one of the five structural models that were generated for each paralog. The models were downloaded as files in PDB format. The structures were visualized and pseudocolored (as in <xref ref-type="fig" rid="F8">Figure 8</xref>) based on per-residue SIM, REP, LLPS propensity, or PrD scores using the UCSF Chimera software (<xref ref-type="bibr" rid="B70">Pettersen et al., 2004</xref>).</p>
</sec>
<sec id="s4-6">
<title>Phylogenetic trees</title>
<p>Phylogenetic trees of the vertebrate and Actinopterigyii lineages were derived from TimeTree (<ext-link ext-link-type="uri" xlink:href="http://timetree.org">timetree.org</ext-link>; <xref ref-type="bibr" rid="B33">Hedges et al., 2006</xref>) in Newick format and then processed using MEGA11 (<xref ref-type="bibr" rid="B93">Tamura et al., 2021</xref>) and RStudio software (<xref ref-type="bibr" rid="B107">Yu et al., 2017</xref>). Vertebrate species silhouettes (Public Domain Mark 1.0 and CC0 1.0 Universal Public Domain Dedication) were downloaded from PhyloPic (<ext-link ext-link-type="uri" xlink:href="https://www.phylopic.org/">https://www.phylopic.org/</ext-link>). Credits: NASA (<italic>H. sapiens</italic>), Daniel Jaron (<italic>Mus musculus</italic>), Steven Traver (<italic>Bos taurus, Loxodonta africana, Gallus gallus</italic>), Daniel Stadtmauer (<italic>Monodelphis domestica</italic>), Andreas Hejnol (<italic>Xenopus tropicalis</italic>), Jake Warner (<italic>D. rerio</italic>), and Nathan Hermann (<italic>Amblyraja radiata</italic>).</p>
</sec>
<sec id="s4-7">
<title>Evolutionary analyses of complexity- and function-related scores</title>
<p>We calculated 24 parameters of interest (i.e., the percent occurrences of the 20 amino acids and the mean per-residue SIM, REP, LLPS propensity, and prion-likeness scores) for each CPEB1-4 and TIA1 ortholog sequence. Then, we calculated the mean values of these 24 parameters across orthologs in each clade of interest (in vertebrates or Actinopterygii, see above). Finally, we calculated the Pearson&#x2019;s <italic>r</italic> coefficients in correlations of the mean values of these parameters in each clade with clade stem ages. For CPEB2, we also performed the same analysis using the values of the 24 parameters of each individual ortholog protein (rather than their mean values across orthologs in each clade) or by considering only five randomly selected species per clade. The latter analysis was repeated 10 times with different sets of randomly selected orthologs per clade. The random selection of orthologs was performed using an <italic>ad hoc</italic> Perl script.</p>
</sec>
<sec id="s4-8">
<title>Software and statistics</title>
<p>Available (<xref ref-type="bibr" rid="B65">Pelassa et al., 2014</xref>; <xref ref-type="bibr" rid="B64">2019</xref>; <xref ref-type="bibr" rid="B56">Marchetti et al., 2021</xref>) and <italic>ad hoc</italic> software for bioinformatics analyses was written in Perl language (<ext-link ext-link-type="uri" xlink:href="http://www.perl.org">www.perl.org</ext-link>). Alignments of protein primary sequences used in the selection of ortholog sequences were obtained using Clustal Omega (<xref ref-type="bibr" rid="B89">Sievers et al., 2011</xref>) and Multalin (<xref ref-type="bibr" rid="B14">Corpet, 1988</xref>). Protein schemes were generated using Prosite MyDomains (<xref ref-type="bibr" rid="B90">Sigrist et al., 2012</xref>) using domain boundaries derived from Uniprot, NCBI protein, and SMART (<ext-link ext-link-type="uri" xlink:href="http://smart.embl-heidelberg.de/">http://smart.embl-heidelberg.de/</ext-link>; <xref ref-type="bibr" rid="B50">Letunic et al., 2021</xref>) databases and modified using Photoshop Elements 11 (Adobe), which was also used to generate figures. Plots of amino acid distributions along protein primary sequences were generated using the DrawProtein RStudio package (<xref ref-type="bibr" rid="B7">Brennan, 2018</xref>). Data analysis and statistics were performed using Statistica (TIBCO) and Excel (Microsoft), which was also used to generate graphs. The <italic>r</italic> correlation coefficient was calculated using Excel and its statistical significance assessed using the online Prism (GraphPad) calculator. A value of p &#x2264; 0.05 was considered as statistically significant in all instances.</p>
</sec>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec sec-type="author-contributions" id="s6">
<title>Author contributions</title>
<p>SV: Data curation, Formal Analysis, Investigation, Methodology, Software, Visualization, Validation, Writing&#x2013;review and editing. SB: Data curation, Investigation, Visualization, Writing&#x2013;review and editing. MG: Project administration, Supervision, Validation, Writing&#x2013;review and editing. FF: Conceptualization, Funding acquisition, Investigation, Project administration, Resources, Software, Supervision, Writing&#x2013;original draft, Methodology, Visualization, Writing&#x2013;review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s7">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. This research was supported by RiLo2022/2023 grants from the University of Turin.</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fbinf.2025.1491735/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fbinf.2025.1491735/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Table4.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table1.pdf" id="SM2" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Presentation1.pdf" id="SM3" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table2.pdf" id="SM4" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table3.pdf" id="SM5" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alb&#xe0;</surname>
<given-names>M. M.</given-names>
</name>
<name>
<surname>Santib&#xe1;nez-Koref</surname>
<given-names>M. F.</given-names>
</name>
<name>
<surname>Hancock</surname>
<given-names>J. M.</given-names>
</name>
</person-group> (<year>1999</year>). <article-title>Conservation of polyglutamine tract size between mice and humans depends on codon interruption</article-title>. <source>Mol. Biol. Evol.</source> <volume>16</volume> (<issue>11</issue>), <fpage>1641</fpage>&#x2013;<lpage>1644</lpage>. <pub-id pub-id-type="doi">10.1093/oxfordjournals.molbev.a026077</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alb&#xe0;</surname>
<given-names>M. M.</given-names>
</name>
<name>
<surname>Tompa</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Veitia</surname>
<given-names>R. A.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Amino acid repeats and the structure and evolution of proteins</article-title>. <source>Gene Protein Evol.</source> <volume>3</volume>, <fpage>119</fpage>&#x2013;<lpage>130</lpage>. <pub-id pub-id-type="doi">10.1159/000107607</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alberti</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Halfmann</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>King</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Kapila</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Lindquist</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>A systematic survey identifies prions and illuminates sequence features of prionogenic proteins</article-title>. <source>Cell.</source> <volume>137</volume> (<issue>1</issue>), <fpage>146</fpage>&#x2013;<lpage>158</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2009.02.044</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ashami</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Falk</surname>
<given-names>A. S.</given-names>
</name>
<name>
<surname>Hurd</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Garg</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cervantes</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Rawat</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Droplet and fibril formation of the functional amyloid Orb2</article-title>. <source>J. Biol. Chem.</source> <volume>297</volume> (<issue>1</issue>), <fpage>100804</fpage>. <pub-id pub-id-type="doi">10.1016/j.jbc.2021.100804</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bowler</surname>
<given-names>J. T.</given-names>
</name>
<name>
<surname>Sawaya</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>Boyer</surname>
<given-names>D. R.</given-names>
</name>
<name>
<surname>Cascio</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Bali</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Eisenberg</surname>
<given-names>D. S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Micro-electron diffraction structure of the aggregation-driving N terminus of Drosophila neuronal protein Orb2A reveals amyloid-like &#x3b2;-sheets</article-title>. <source>J. Biol. Chem.</source> <volume>298</volume> (<issue>10</issue>), <fpage>102396</fpage>. <pub-id pub-id-type="doi">10.1016/j.jbc.2022.102396</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brangwynne</surname>
<given-names>C. P.</given-names>
</name>
<name>
<surname>Tompa</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Pappu</surname>
<given-names>R. V.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Polymer physics of intracellular phase transitions</article-title>. <source>Nat. Phys.</source> <volume>11</volume> (<issue>11</issue>), <fpage>899</fpage>&#x2013;<lpage>904</lpage>. <pub-id pub-id-type="doi">10.1038/nphys3532</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brennan</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>drawProteins: a Bioconductor/R package for reproducible and programmatic generation of protein schematics</article-title>. <source>F1000Research</source> <volume>7</volume>, <fpage>1105</fpage>. <pub-id pub-id-type="doi">10.12688/f1000research.14541.1</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Buschiazzo</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Gemmell</surname>
<given-names>N. J.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>The rise, fall and renaissance of microsatellites in eukaryotic genomes</article-title>. <source>BioEssays</source> <volume>28</volume> (<issue>10</issue>), <fpage>1040</fpage>&#x2013;<lpage>1050</lpage>. <pub-id pub-id-type="doi">10.1002/bies.20470</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Canceill</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Viguera</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Ehrlich</surname>
<given-names>S. D.</given-names>
</name>
</person-group> (<year>1999</year>). <article-title>Replication slippage of different DNA polymerases is inversely related to their strand displacement efficiency</article-title>. <source>J. Biol. Chem.</source> <volume>274</volume> (<issue>39</issue>), <fpage>27481</fpage>&#x2013;<lpage>27490</lpage>. <pub-id pub-id-type="doi">10.1074/jbc.274.39.27481</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Castillo-Lizardo</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Henneke</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Viguera</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Replication slippage of the thermophilic DNA polymerases B and D from the Euryarchaeota Pyrococcus abyssi</article-title>. <source>Front. Microbiol.</source> <volume>5</volume>, <fpage>403</fpage>. <pub-id pub-id-type="doi">10.3389/fmicb.2014.00403</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cervantes</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Bajakian</surname>
<given-names>T. H.</given-names>
</name>
<name>
<surname>Soria</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Falk</surname>
<given-names>A. S.</given-names>
</name>
<name>
<surname>Service</surname>
<given-names>R. J.</given-names>
</name>
<name>
<surname>Langen</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Identification and structural characterization of the N-terminal amyloid core of Orb2 isoform A</article-title>. <source>Sci. Rep.</source> <volume>6</volume> (<issue>1</issue>), <fpage>38265</fpage>. <pub-id pub-id-type="doi">10.1038/srep38265</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chavali</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Chavali</surname>
<given-names>P. L.</given-names>
</name>
<name>
<surname>Chalancon</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>de Groot</surname>
<given-names>N. S.</given-names>
</name>
<name>
<surname>Gemayel</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Latysheva</surname>
<given-names>N. S.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Constraints and consequences of the emergence of amino acid repeats in eukaryotic proteins</article-title>. <source>Nat. Struct. and Mol. Biol.</source> <volume>24</volume> (<issue>9</issue>), <fpage>765</fpage>&#x2013;<lpage>777</lpage>. <pub-id pub-id-type="doi">10.1038/nsmb.3441</pub-id>
</citation>
</ref>
<ref id="B109">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chavali</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>A. K.</given-names>
</name>
<name>
<surname>Santhanam</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Babu</surname>
<given-names>M. M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Amino acid homorepeats in proteins</article-title>. <source>Nat. Revi. Chemi.</source> <volume>4</volume> (<issue>8</issue>), <fpage>420</fpage>&#x2013;<lpage>434</lpage>.</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chiu</surname>
<given-names>S.-H.</given-names>
</name>
<name>
<surname>Ho</surname>
<given-names>W.-L.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y.-C.</given-names>
</name>
<name>
<surname>Kuo</surname>
<given-names>J.-C.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Phase separation driven by interchangeable properties in the intrinsically disordered regions of protein paralogs</article-title>. <source>Commun. Biol.</source> <volume>5</volume> (<issue>1</issue>), <fpage>400</fpage>. <pub-id pub-id-type="doi">10.1038/s42003-022-03354-4</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Corpet</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>1988</year>). <article-title>Multiple sequence alignment with hierarchical clustering</article-title>. <source>Nucleic acids Res.</source> <volume>16</volume> (<issue>22</issue>), <fpage>10881</fpage>&#x2013;<lpage>10890</lpage>. <pub-id pub-id-type="doi">10.1093/nar/16.22.10881</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>De Pristo</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Zilversmit</surname>
<given-names>M. M.</given-names>
</name>
<name>
<surname>Hartl</surname>
<given-names>D. L.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>On the abundance, amino acid composition, and evolutionary dynamics of low-complexity regions in proteins</article-title>. <source>Gene</source> <volume>378</volume>, <fpage>19</fpage>&#x2013;<lpage>30</lpage>. <pub-id pub-id-type="doi">10.1016/j.gene.2006.03.023</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dignon</surname>
<given-names>G. L.</given-names>
</name>
<name>
<surname>Best</surname>
<given-names>R. B.</given-names>
</name>
<name>
<surname>Mittal</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Biomolecular phase separation: from molecular driving forces to macroscopic properties</article-title>. <source>Annu. Rev. Phys. Chem.</source> <volume>71</volume> (<issue>1</issue>), <fpage>53</fpage>&#x2013;<lpage>75</lpage>. <pub-id pub-id-type="doi">10.1146/annurev-physchem-071819-113553</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dover</surname>
<given-names>G. A.</given-names>
</name>
</person-group> (<year>1989</year>). <article-title>Slips, strings and species</article-title>. <source>Trends Genet.</source> <volume>5</volume>, <fpage>100</fpage>&#x2013;<lpage>102</lpage>. <pub-id pub-id-type="doi">10.1016/0168-9525(89)90038-3</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Duran-Arqu&#xe9;</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Ca&#xf1;ete</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Castellazzi</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Bartomeu</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ferrer-Caelles</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Reina</surname>
<given-names>O.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Comparative analyses of vertebrate CPEB proteins define two subfamilies with coordinated yet distinct functions in post-transcriptional gene regulation</article-title>. <source>Genome Biol.</source> <volume>23</volume>, <fpage>192</fpage>. <pub-id pub-id-type="doi">10.1186/s13059-022-02759-y</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Enright</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Dickson</surname>
<given-names>Z. W.</given-names>
</name>
<name>
<surname>Golding</surname>
<given-names>G. B.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Low complexity regions in proteins and DNA are poorly correlated</article-title>. <source>Mol. Biol. Evol.</source> <volume>40</volume> (<issue>4</issue>), <fpage>msad084</fpage>. <pub-id pub-id-type="doi">10.1093/molbev/msad084</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fiumara</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Fioriti</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Kandel</surname>
<given-names>E. R.</given-names>
</name>
<name>
<surname>Hendrickson</surname>
<given-names>W. A.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Essential role of coiled coils for aggregation and activity of Q/N-rich prions and PolyQ proteins</article-title>. <source>Cell.</source> <volume>143</volume> (<issue>7</issue>), <fpage>1121</fpage>&#x2013;<lpage>1135</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2010.11.042</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fiumara</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Rajasethupathy</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Antonov</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Kosmidis</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sossin</surname>
<given-names>W. S.</given-names>
</name>
<name>
<surname>Kandel</surname>
<given-names>E. R.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>MicroRNA-22 gates long-term heterosynaptic plasticity in Aplysia through presynaptic regulation of CPEB and downstream targets</article-title>. <source>Cell. Rep.</source> <volume>11</volume> (<issue>12</issue>), <fpage>1866</fpage>&#x2013;<lpage>1875</lpage>. <pub-id pub-id-type="doi">10.1016/j.celrep.2015.05.034</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Flores</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Engels</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>1999</year>). <article-title>Microsatellite instability in Drosophila spellchecker1 (MutS homolog) mutants</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>96</volume> (<issue>6</issue>), <fpage>2964</fpage>&#x2013;<lpage>2969</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.96.6.2964</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ford</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Asok</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Tripp</surname>
<given-names>A. D.</given-names>
</name>
<name>
<surname>Parro</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Fitzpatrick</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>De Solis</surname>
<given-names>C. A.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>CPEB3 low-complexity motif regulates local protein synthesis via protein&#x2013;protein interactions in neuronal ribonucleoprotein granules</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>120</volume>, <fpage>e2114747120</fpage>. <pub-id pub-id-type="doi">10.1073/pnas.2114747120</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ford</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ling</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Kandel</surname>
<given-names>E. R.</given-names>
</name>
<name>
<surname>Fioriti</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>CPEB3 inhibits translation of mRNA targets by localizing them to P bodies</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>116</volume>, <fpage>18078</fpage>&#x2013;<lpage>18087</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1815275116</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Galtier</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Half a century of controversy: the neutralist/selectionist debate in molecular evolution</article-title>. <source>Genome Biol. Evol.</source> <volume>16</volume> (<issue>2</issue>), <fpage>evae003</fpage>. <pub-id pub-id-type="doi">10.1093/gbe/evae003</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Galtier</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Duret</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Gl&#xe9;min</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ranwez</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>GC-biased gene conversion promotes the fixation of deleterious amino acid changes in primates</article-title>. <source>Trends Genet.</source> <volume>25</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>5</lpage>. <pub-id pub-id-type="doi">10.1016/j.tig.2008.10.011</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gomes</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Shorter</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>The molecular language of membraneless organelles</article-title>. <source>J. Biol. Chem.</source> <volume>294</volume> (<issue>18</issue>), <fpage>7115</fpage>&#x2013;<lpage>7127</lpage>. <pub-id pub-id-type="doi">10.1074/jbc.tm118.001192</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Haerty</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Golding</surname>
<given-names>G. B.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Genome-wide evidence for selection acting on single amino acid repeats</article-title>. <source>Genome Res.</source> <volume>20</volume> (<issue>6</issue>), <fpage>755</fpage>&#x2013;<lpage>760</lpage>. <pub-id pub-id-type="doi">10.1101/gr.101246.109</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hake</surname>
<given-names>L. E.</given-names>
</name>
<name>
<surname>Richter</surname>
<given-names>J. D.</given-names>
</name>
</person-group> (<year>1994</year>). <article-title>CPEB is a specificity factor that mediates cytoplasmic polyadenylation during Xenopus oocyte maturation</article-title>. <source>Cell.</source> <volume>79</volume> (<issue>4</issue>), <fpage>617</fpage>&#x2013;<lpage>627</lpage>. <pub-id pub-id-type="doi">10.1016/0092-8674(94)90547-9</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Halfmann</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Alberti</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Krishnan</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Lyle</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>O&#x2019;Donnell</surname>
<given-names>C. W.</given-names>
</name>
<name>
<surname>King</surname>
<given-names>O. D.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>Opposing effects of glutamine and asparagine govern prion formation by intrinsically disordered proteins</article-title>. <source>Mol. Cell.</source> <volume>43</volume> (<issue>1</issue>), <fpage>72</fpage>&#x2013;<lpage>84</lpage>. <pub-id pub-id-type="doi">10.1016/j.molcel.2011.05.013</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hancock</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Worthey</surname>
<given-names>E. A.</given-names>
</name>
<name>
<surname>Santib&#xe1;nez-Koref</surname>
<given-names>M. F.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>A role for selection in regulating the evolutionary emergence of disease-causing and other coding CAG repeats in humans and mice</article-title>. <source>Mol. Biol. Evol.</source> <volume>18</volume> (<issue>6</issue>), <fpage>1014</fpage>&#x2013;<lpage>1023</lpage>. <pub-id pub-id-type="doi">10.1093/oxfordjournals.molbev.a003873</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hedges</surname>
<given-names>S. B.</given-names>
</name>
<name>
<surname>Dudley</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kumar</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>TimeTree: a public knowledge-base of divergence times among organisms</article-title>. <source>Bioinformatics</source> <volume>22</volume>, <fpage>2971</fpage>&#x2013;<lpage>2972</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btl505</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Heinrich</surname>
<given-names>S. U.</given-names>
</name>
<name>
<surname>Lindquist</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Protein-only mechanism induces self-perpetuating changes in the activity of neuronal Aplysia cytoplasmic polyadenylation element binding protein (CPEB)</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>108</volume> (<issue>7</issue>), <fpage>2999</fpage>&#x2013;<lpage>3004</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1019368108</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hervas</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Del Carmen Fern&#xe1;ndez-Ram&#xed;rez</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Galera-Prat</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Suzuki</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Nagai</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Bruix</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Divergent CPEB prion-like domains reveal different assembly mechanisms for a generic amyloid-like fold</article-title>. <source>BMC Biol.</source> <volume>19</volume>, <fpage>43</fpage>. <pub-id pub-id-type="doi">10.1186/s12915-021-00967-9</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hervas</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Rau</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Park</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Murzin</surname>
<given-names>A. G.</given-names>
</name>
<name>
<surname>Fitzpatrick</surname>
<given-names>J. A. J.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Cryo-EM structure of a neuronal functional amyloid implicated in memory persistence in Drosophila</article-title>. <source>Science</source> <volume>367</volume>, <fpage>1230</fpage>&#x2013;<lpage>1234</lpage>. <pub-id pub-id-type="doi">10.1126/science.aba3526</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hoffmann</surname>
<given-names>F. G.</given-names>
</name>
<name>
<surname>Opazo</surname>
<given-names>J. C.</given-names>
</name>
<name>
<surname>Storz</surname>
<given-names>J. F.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Rapid rates of lineage-specific gene duplication and deletion in the &#x3b1;-globin gene family</article-title>. <source>Mol. Biol. Evol.</source> <volume>25</volume> (<issue>3</issue>), <fpage>591</fpage>&#x2013;<lpage>602</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msn004</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>Y.-S.</given-names>
</name>
<name>
<surname>Mendez</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Fernandez</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Richter</surname>
<given-names>J. D.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>CPEB and translational control by cytoplasmic polyadenylation: impact on synaptic plasticity, learning, and memory</article-title>. <source>Mol. Psychiatry</source> <volume>28</volume> (<issue>7</issue>), <fpage>2728</fpage>&#x2013;<lpage>2736</lpage>. <pub-id pub-id-type="doi">10.1038/s41380-023-02088-x</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huntley</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Golding</surname>
<given-names>G. B.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>Evolution of simple sequence in proteins</article-title>. <source>J. Mol. Evol.</source> <volume>51</volume> (<issue>2</issue>), <fpage>131</fpage>&#x2013;<lpage>140</lpage>. <pub-id pub-id-type="doi">10.1007/s002390010073</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ibrahim</surname>
<given-names>A. Y.</given-names>
</name>
<name>
<surname>Khaodeuanepheng</surname>
<given-names>N. P.</given-names>
</name>
<name>
<surname>Amarasekara</surname>
<given-names>D. L.</given-names>
</name>
<name>
<surname>Correia</surname>
<given-names>J. J.</given-names>
</name>
<name>
<surname>Lewis</surname>
<given-names>K. A.</given-names>
</name>
<name>
<surname>Fitzkee</surname>
<given-names>N. C.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Intrinsically disordered regions that drive phase separation form a robustly distinct protein class</article-title>. <source>J. Biol. Chem.</source> <volume>299</volume>, <fpage>102801</fpage>. <pub-id pub-id-type="doi">10.1016/j.jbc.2022.102801</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kandel</surname>
<given-names>E. R.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>The molecular biology of memory: cAMP, PKA, CRE, CREB-1, CREB-2, and CPEB</article-title>. <source>Mol. Brain</source> <volume>5</volume>, <fpage>14</fpage>. <pub-id pub-id-type="doi">10.1186/1756-6606-5-14</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Kandel</surname>
<given-names>E. R.</given-names>
</name>
<name>
<surname>Derkatch</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Pavlopoulos</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2013</year>). &#x201c;<article-title>The role of functional prions in the persistence of memory storage</article-title>,&#x201d; in <source>Proteopathic seeds and neurodegenerative diseases</source>. Editors <person-group person-group-type="editor">
<name>
<surname>Jucker</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Christen</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<publisher-loc>Berlin, Heidelberg</publisher-loc>: <publisher-name>Springer Berlin Heidelberg</publisher-name>), <fpage>131</fpage>&#x2013;<lpage>152</lpage>.</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kozlov</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Shidlovskii</surname>
<given-names>Y. V.</given-names>
</name>
<name>
<surname>Gilmutdinov</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Schedl</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Zhukova</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>The role of CPEB family proteins in the nervous system function in the norm and pathology</article-title>. <source>Cell. and Biosci.</source> <volume>11</volume>, <fpage>64</fpage>. <pub-id pub-id-type="doi">10.1186/s13578-021-00577-6</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kumar</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Stecher</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Suleski</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hedges</surname>
<given-names>S. B.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>TimeTree: a resource for timelines, timetrees, and divergence times</article-title>. <source>Mol. Biol. Evol.</source> <volume>34</volume> (<issue>7</issue>), <fpage>1812</fpage>&#x2013;<lpage>1819</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msx116</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kurihara</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Tokuriki</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Myojin</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Hori</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Kuroiwa</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Matsuda</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2003</year>). <article-title>CPEB2, a novel putative translational regulator in mouse haploid germ cells</article-title>. <source>Biol. reproduction</source> <volume>69</volume> (<issue>1</issue>), <fpage>261</fpage>&#x2013;<lpage>268</lpage>. <pub-id pub-id-type="doi">10.1095/biolreprod.103.015677</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Laidlaw</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Gelfand</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ng</surname>
<given-names>K. W.</given-names>
</name>
<name>
<surname>Garner</surname>
<given-names>H. R.</given-names>
</name>
<name>
<surname>Ranganathan</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Benson</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2007</year>). <article-title>Elevated basal slippage mutation rates among the Canidae</article-title>. <source>J. Hered.</source> <volume>98</volume> (<issue>5</issue>), <fpage>452</fpage>&#x2013;<lpage>460</lpage>. <pub-id pub-id-type="doi">10.1093/jhered/esm017</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lancaster</surname>
<given-names>A. K.</given-names>
</name>
<name>
<surname>Nutter-Upham</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Lindquist</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>King</surname>
<given-names>O. D.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>PLAAC: a web and command-line application to identify proteins with prion-like amino acid composition</article-title>. <source>Bioinformatics</source> <volume>30</volume>, <fpage>2501</fpage>&#x2013;<lpage>2502</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btu310</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lavoie</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Debeane</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Trinh</surname>
<given-names>Q. D.</given-names>
</name>
<name>
<surname>Turcotte</surname>
<given-names>J. F.</given-names>
</name>
<name>
<surname>Corbeil-Girard</surname>
<given-names>L. P.</given-names>
</name>
<name>
<surname>Dicaire</surname>
<given-names>M. J.</given-names>
</name>
<etal/>
</person-group> (<year>2003</year>). <article-title>Polymorphism, shared functions and convergent evolution of genes with sequences coding for polyalanine domains</article-title>. <source>Hum. Mol. Genet.</source> <volume>12</volume> (<issue>22</issue>), <fpage>2967</fpage>&#x2013;<lpage>2979</lpage>. <pub-id pub-id-type="doi">10.1093/hmg/ddg329</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lenz</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Haerty</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Golding</surname>
<given-names>G. B.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Increased substitution rates surrounding low-complexity regions within primate proteins</article-title>. <source>Genome Biol. Evol.</source> <volume>6</volume> (<issue>3</issue>), <fpage>655</fpage>&#x2013;<lpage>665</lpage>. <pub-id pub-id-type="doi">10.1093/gbe/evu042</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Letunic</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Khedkar</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bork</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>SMART: recent updates, new developments and status in 2020</article-title>. <source>Nucleic acids Res.</source> <volume>49</volume> (<issue>D1</issue>), <fpage>D458</fpage>&#x2013;<lpage>D460</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkaa937</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Insight into role of selection in the evolution of polyglutamine tracts in humans</article-title>. <source>PloS one</source> <volume>7</volume> (<issue>7</issue>), <fpage>e41167</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0041167</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lilliu</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Villeri</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Pelassa</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Cesano</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Scarano</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Fiumara</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Polyserine repeats promote coiled coil-mediated fibril formation and length-dependent protein aggregation</article-title>. <source>J. Struct. Biol.</source> <volume>204</volume> (<issue>3</issue>), <fpage>572</fpage>&#x2013;<lpage>584</lpage>. <pub-id pub-id-type="doi">10.1016/j.jsb.2018.09.001</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lu</surname>
<given-names>W. H.</given-names>
</name>
<name>
<surname>Chao</surname>
<given-names>H. W.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>P. Y.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>S. H.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>T. H.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>H. W.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>CPEB3-dowregulated Nr3c1 mRNA translation confers resilience to developing posttraumatic stress disorder-like behavior in fear-conditioned mice</article-title>. <source>Neuropsychopharmacology</source> <volume>46</volume> (<issue>9</issue>), <fpage>1669</fpage>&#x2013;<lpage>1679</lpage>. <pub-id pub-id-type="doi">10.1038/s41386-021-01017-2</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Majumdar</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Cesario</surname>
<given-names>W. C.</given-names>
</name>
<name>
<surname>White-Grindley</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ren</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Khan</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>Critical role of amyloid-like oligomers of Drosophila Orb2 in the persistence of memory</article-title>. <source>Cell.</source> <volume>148</volume>, <fpage>515</fpage>&#x2013;<lpage>529</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2012.01.004</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mallik</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Tawfik</surname>
<given-names>D. S.</given-names>
</name>
<name>
<surname>Levy</surname>
<given-names>E. D.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>How gene duplication diversifies the landscape of protein oligomeric state and function</article-title>. <source>Curr. Opin. Genet. and Dev.</source> <volume>76</volume>, <fpage>101966</fpage>. <pub-id pub-id-type="doi">10.1016/j.gde.2022.101966</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marchetti</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Vaglietti</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Rizzo</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Di Nardo</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Colnaghi</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ghirardi</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Heptad stereotypy, S/Q layering, and remote origin of the SARS-CoV-2 fusion core</article-title>. <source>Virus Evol.</source> <volume>7</volume> (<issue>2</issue>), <fpage>veab097</fpage>. <pub-id pub-id-type="doi">10.1093/ve/veab097</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Martin</surname>
<given-names>E. W.</given-names>
</name>
<name>
<surname>Mittag</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Relationship of sequence and phase separation in protein low-complexity regions</article-title>. <source>Biochemistry</source> <volume>57</volume> (<issue>17</issue>), <fpage>2478</fpage>&#x2013;<lpage>2487</lpage>. <pub-id pub-id-type="doi">10.1021/acs.biochem.8b00008</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mirdita</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sch&#xfc;tze</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Moriwaki</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Heo</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ovchinnikov</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Steinegger</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>ColabFold: making protein folding accessible to all</article-title>. <source>Nat. methods</source> <volume>19</volume> (<issue>6</issue>), <fpage>679</fpage>&#x2013;<lpage>682</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-022-01488-1</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mularoni</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ledda</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Toll-Riera</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Alb&#xe0;</surname>
<given-names>M. M.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Natural selection drives the accumulation of amino acid tandem repeats in human proteins</article-title>. <source>Genome Res.</source> <volume>20</volume> (<issue>6</issue>), <fpage>745</fpage>&#x2013;<lpage>754</lpage>. <pub-id pub-id-type="doi">10.1101/gr.101261.109</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nakachi</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hayakawa</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Oota</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Sumiyama</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ueda</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>1997</year>). <article-title>Nucleotide compositional constraints on genomes generate alanine-glycine-and proline-rich structures in transcription factors</article-title>. <source>Mol. Biol. Evol.</source> <volume>14</volume> (<issue>10</issue>), <fpage>1042</fpage>&#x2013;<lpage>1049</lpage>. <pub-id pub-id-type="doi">10.1093/oxfordjournals.molbev.a025710</pub-id>
</citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nishizawa</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Nishizawa</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>1999</year>). <article-title>Local-scale repetitiveness in amino acid use in eukaryote protein sequences: a genomic factor in protein evolution</article-title>. <source>Proteins Struct. Funct. Genet.</source> <volume>37</volume> (<issue>2</issue>), <fpage>284</fpage>&#x2013;<lpage>292</lpage>. <pub-id pub-id-type="doi">10.1002/(sici)1097-0134(19991101)37:2&#x3c;284::aid-prot13&#x3e;3.0.co;2-4</pub-id>
</citation>
</ref>
<ref id="B62">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Owens</surname>
<given-names>K. M.</given-names>
</name>
<name>
<surname>Quinonez</surname>
<given-names>S. C.</given-names>
</name>
<name>
<surname>Thomas</surname>
<given-names>P. E.</given-names>
</name>
<name>
<surname>Keegan</surname>
<given-names>C. E.</given-names>
</name>
<name>
<surname>Lefebvre</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Roulston</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Analysis of <italic>de novo HOXA13</italic> polyalanine expansions supports replication slippage without repair in their generation</article-title>. <source>Am. J. Med. Genet. Part A</source> <volume>161</volume> (<issue>5</issue>), <fpage>1019</fpage>&#x2013;<lpage>1027</lpage>. <pub-id pub-id-type="doi">10.1002/ajmg.a.35843</pub-id>
</citation>
</ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Paps</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Holland</surname>
<given-names>P. W. H.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Reconstruction of the ancestral metazoan genome reveals an increase in genomic novelty</article-title>. <source>Nat. Commun.</source> <volume>9</volume>, <fpage>1730</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-018-04136-5</pub-id>
</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pelassa</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Cibelli</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Villeri</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Lilliu</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Vaglietti</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Olocco</surname>
<given-names>F.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Compound dynamics and combinatorial patterns of amino acid repeats encode a system of evolutionary and developmental markers</article-title>. <source>Genome Biol. Evol.</source> <volume>11</volume> (<issue>11</issue>), <fpage>3159</fpage>&#x2013;<lpage>3178</lpage>. <pub-id pub-id-type="doi">10.1093/gbe/evz216</pub-id>
</citation>
</ref>
<ref id="B65">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pelassa</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Cora</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Cesano</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Monje</surname>
<given-names>F. J.</given-names>
</name>
<name>
<surname>Montarolo</surname>
<given-names>P. G.</given-names>
</name>
<name>
<surname>Fiumara</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Association of polyalanine and polyglutamine coiled coils mediates expansion disease-related protein aggregation and dysfunction</article-title>. <source>Hum. Mol. Genet.</source> <volume>23</volume> (<issue>13</issue>), <fpage>3402</fpage>&#x2013;<lpage>3420</lpage>. <pub-id pub-id-type="doi">10.1093/hmg/ddu049</pub-id>
</citation>
</ref>
<ref id="B66">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pelassa</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Fiumara</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Differential occurrence of interactions and interaction domains in proteins containing homopolymeric amino acid repeats</article-title>. <source>Front. Genet.</source> <volume>6</volume>, <fpage>345</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2015.00345</pub-id>
</citation>
</ref>
<ref id="B67">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Persi</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Wolf</surname>
<given-names>Y. I.</given-names>
</name>
<name>
<surname>Karamycheva</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Makarova</surname>
<given-names>K. S.</given-names>
</name>
<name>
<surname>Koonin</surname>
<given-names>E. V.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Compensatory relationship between low-complexity regions and gene paralogy in the evolution of prokaryotes</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>120</volume> (<issue>16</issue>), <fpage>e2300154120</fpage>. <pub-id pub-id-type="doi">10.1073/pnas.2300154120</pub-id>
</citation>
</ref>
<ref id="B68">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Persi</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Wolf</surname>
<given-names>Y. I.</given-names>
</name>
<name>
<surname>Koonin</surname>
<given-names>E. V.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Positive and strongly relaxed purifying selection drive the evolution of repeats in proteins</article-title>. <source>Nat. Commun.</source> <volume>7</volume> (<issue>1</issue>), <fpage>13570</fpage>. <pub-id pub-id-type="doi">10.1038/ncomms13570</pub-id>
</citation>
</ref>
<ref id="B69">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peskett</surname>
<given-names>T. R.</given-names>
</name>
<name>
<surname>Rau</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>O&#x2019;Driscoll</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Patani</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Lowe</surname>
<given-names>A. R.</given-names>
</name>
<name>
<surname>Saibil</surname>
<given-names>H. R.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>A liquid to solid phase transition underlying pathological huntingtin Exon1 aggregation</article-title>. <source>Mol. Cell.</source> <volume>70</volume>, <fpage>588</fpage>&#x2013;<lpage>601.e6</lpage>. <pub-id pub-id-type="doi">10.1016/j.molcel.2018.04.007</pub-id>
</citation>
</ref>
<ref id="B70">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pettersen</surname>
<given-names>E. F.</given-names>
</name>
<name>
<surname>Goddard</surname>
<given-names>T. D.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>C. C.</given-names>
</name>
<name>
<surname>Couch</surname>
<given-names>G. S.</given-names>
</name>
<name>
<surname>Greenblatt</surname>
<given-names>D. M.</given-names>
</name>
<name>
<surname>Meng</surname>
<given-names>E. C.</given-names>
</name>
<etal/>
</person-group> (<year>2004</year>). <article-title>UCSF Chimera&#x2014;a visualization system for exploratory research and analysis</article-title>. <source>J. Comput. Chem.</source> <volume>25</volume> (<issue>13</issue>), <fpage>1605</fpage>&#x2013;<lpage>1612</lpage>. <pub-id pub-id-type="doi">10.1002/jcc.20084</pub-id>
</citation>
</ref>
<ref id="B71">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rad&#xf3;-Trilla</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Alb&#xe0;</surname>
<given-names>Mm.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Dissecting the role of low-complexity regions in the evolution of vertebrate proteins</article-title>. <source>BMC Evol. Biol.</source> <volume>12</volume> (<issue>1</issue>), <fpage>155</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2148-12-155</pub-id>
</citation>
</ref>
<ref id="B72">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rad&#xf3;-Trilla</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Arat&#xf3;</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Pegueroles</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Raya</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>De La Luna</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Alb&#xe0;</surname>
<given-names>M. M.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Key role of amino acid repeat expansions in the functional diversification of duplicated transcription factors</article-title>. <source>Mol. Biol. Evol.</source> <volume>32</volume>, <fpage>2263</fpage>&#x2013;<lpage>2272</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msv103</pub-id>
</citation>
</ref>
<ref id="B73">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ram&#xed;rez de Mingo</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>L&#xf3;pez-Garc&#xed;a</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Vaquero</surname>
<given-names>M. E.</given-names>
</name>
<name>
<surname>Herv&#xe1;s</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Laurents</surname>
<given-names>D. V.</given-names>
</name>
<name>
<surname>Carri&#xf3;n-V&#xe1;zquez</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Phase separation modulates the functional amyloid assembly of human CPEB3</article-title>. <source>Prog. Neurobiol.</source> <volume>231</volume>, <fpage>102540</fpage>. <pub-id pub-id-type="doi">10.1016/j.pneurobio.2023.102540</pub-id>
</citation>
</ref>
<ref id="B74">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ram&#xed;rez de Mingo</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Pantoja-Uceda</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Herv&#xe1;s</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Carri&#xf3;n-V&#xe1;zquez</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Laurents</surname>
<given-names>D. V.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Conformational dynamics in the disordered region of human CPEB3 linked to memory consolidation</article-title>. <source>BMC Biol.</source> <volume>20</volume> (<issue>1</issue>), <fpage>129</fpage>. <pub-id pub-id-type="doi">10.1186/s12915-022-01310-6</pub-id>
</citation>
</ref>
<ref id="B75">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Raveendra</surname>
<given-names>B. L.</given-names>
</name>
<name>
<surname>Siemer</surname>
<given-names>A. B.</given-names>
</name>
<name>
<surname>Puthanveettil</surname>
<given-names>S. V.</given-names>
</name>
<name>
<surname>Hendrickson</surname>
<given-names>W. A.</given-names>
</name>
<name>
<surname>Kandel</surname>
<given-names>E. R.</given-names>
</name>
<name>
<surname>McDermott</surname>
<given-names>A. E.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Characterization of prion-like conformational changes of the neuronal isoform of Aplysia CPEB</article-title>. <source>Nat. Struct. and Mol. Biol.</source> <volume>20</volume>, <fpage>495</fpage>&#x2013;<lpage>501</lpage>. <pub-id pub-id-type="doi">10.1038/nsmb.2503</pub-id>
</citation>
</ref>
<ref id="B76">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rayman</surname>
<given-names>J. B.</given-names>
</name>
<name>
<surname>Kandel</surname>
<given-names>E. R.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>TIA-1 is a functional prion-like protein</article-title>. <source>Cold Spring Harb. Perspect. Biol.</source> <volume>9</volume> (<issue>5</issue>), <fpage>a030718</fpage>. <pub-id pub-id-type="doi">10.1101/cshperspect.a030718</pub-id>
</citation>
</ref>
<ref id="B77">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rekhi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Garcia</surname>
<given-names>C. G.</given-names>
</name>
<name>
<surname>Barai</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Rizuan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Schuster</surname>
<given-names>B. S.</given-names>
</name>
<name>
<surname>Kiick</surname>
<given-names>K. L.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Expanding the molecular language of protein liquid&#x2013;liquid phase separation</article-title>. <source>Nat. Chem.</source> <volume>16</volume>, <fpage>1113</fpage>&#x2013;<lpage>1124</lpage>. <pub-id pub-id-type="doi">10.1038/s41557-024-01489-x</pub-id>
</citation>
</ref>
<ref id="B78">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reselammal</surname>
<given-names>D. S.</given-names>
</name>
<name>
<surname>Pinhero</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Sharma</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Oliyantakath Hassan</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Srinivasula</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Vijayan</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Mapping the fibril core of the prion subdomain of the mammalian CPEB3 that is involved in long term memory retention</article-title>. <source>J. Mol. Biol.</source> <volume>433</volume> (<issue>15</issue>), <fpage>167084</fpage>. <pub-id pub-id-type="doi">10.1016/j.jmb.2021.167084</pub-id>
</citation>
</ref>
<ref id="B79">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Riback</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Katanski</surname>
<given-names>C. D.</given-names>
</name>
<name>
<surname>Kear-Scott</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Pilipenko</surname>
<given-names>E. V.</given-names>
</name>
<name>
<surname>Rojek</surname>
<given-names>A. E.</given-names>
</name>
<name>
<surname>Sosnick</surname>
<given-names>T. R.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Stress-triggered phase separation is an adaptive, evolutionarily tuned response</article-title>. <source>Cell.</source> <volume>168</volume> (<issue>6</issue>), <fpage>1028</fpage>&#x2013;<lpage>1040.e19</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2017.02.027</pub-id>
</citation>
</ref>
<ref id="B80">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Richter</surname>
<given-names>J. D.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>CPEB: a life in translation</article-title>. <source>Trends Biochem. Sci.</source> <volume>32</volume>, <fpage>279</fpage>&#x2013;<lpage>285</lpage>. <pub-id pub-id-type="doi">10.1016/j.tibs.2007.04.004</pub-id>
</citation>
</ref>
<ref id="B81">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ross</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Dyer</surname>
<given-names>K. A.</given-names>
</name>
<name>
<surname>Erez</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Miller</surname>
<given-names>S. J.</given-names>
</name>
<name>
<surname>Jaenike</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Markow</surname>
<given-names>T. A.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>Rapid divergence of microsatellite abundance among species of Drosophila</article-title>. <source>Mol. Biol. Evol.</source> <volume>20</volume> (<issue>7</issue>), <fpage>1143</fpage>&#x2013;<lpage>1157</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msg137</pub-id>
</citation>
</ref>
<ref id="B82">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rouhana</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Edgar</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Hugosson</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Dountcheva</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Martindale</surname>
<given-names>M. Q.</given-names>
</name>
<name>
<surname>Ryan</surname>
<given-names>J. F.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Cytoplasmic polyadenylation is an ancestral hallmark of early development in animals</article-title>. <source>Mol. Biol. Evol.</source> <volume>40</volume> (<issue>6</issue>), <fpage>msad137</fpage>. <pub-id pub-id-type="doi">10.1093/molbev/msad137</pub-id>
</citation>
</ref>
<ref id="B83">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Saar</surname>
<given-names>K. L.</given-names>
</name>
<name>
<surname>Morgunov</surname>
<given-names>A. S.</given-names>
</name>
<name>
<surname>Qi</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Arter</surname>
<given-names>W. E.</given-names>
</name>
<name>
<surname>Krainer</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>A. A.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Learning the molecular grammar of protein condensates from sequence determinants and embeddings</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>118</volume> (<issue>15</issue>), <fpage>e2019053118</fpage>. <pub-id pub-id-type="doi">10.1073/pnas.2019053118</pub-id>
</citation>
</ref>
<ref id="B84">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sainudiin</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Durrett</surname>
<given-names>R. T.</given-names>
</name>
<name>
<surname>Aquadro</surname>
<given-names>C. F.</given-names>
</name>
<name>
<surname>Nielsen</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Microsatellite mutation models: insights from a comparison of humans and chimpanzees</article-title>. <source>Genetics</source> <volume>168</volume> (<issue>1</issue>), <fpage>383</fpage>&#x2013;<lpage>395</lpage>. <pub-id pub-id-type="doi">10.1534/genetics.103.022665</pub-id>
</citation>
</ref>
<ref id="B85">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Si</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Prions: what are they good for?</article-title> <source>Annu. Rev. Cell. Dev. Biol.</source> <volume>31</volume> (<issue>1</issue>), <fpage>149</fpage>&#x2013;<lpage>169</lpage>. <pub-id pub-id-type="doi">10.1146/annurev-cellbio-100913-013409</pub-id>
</citation>
</ref>
<ref id="B108">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Si</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>Y. B.</given-names>
</name>
<name>
<surname>White-Grindley</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Majumdar</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kandel</surname>
<given-names>E. R.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Aplysia CPEB can form prion-like multimers in sensory neurons that contribute to long-term facilitation</article-title>. <source>Cell</source> <volume>140</volume> (<issue>3</issue>), <fpage>421</fpage>&#x2013;<lpage>435</lpage>.</citation>
</ref>
<ref id="B86">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Si</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Giustetto</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Etkin</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Hsu</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Janisiewicz</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Miniaci</surname>
<given-names>M. C.</given-names>
</name>
<etal/>
</person-group> (<year>2003b</year>). <article-title>A neuronal isoform of CPEB regulates local protein synthesis and stabilizes synapse-specific long-term facilitation in Aplysia</article-title>. <source>Cell.</source> <volume>115</volume>, <fpage>893</fpage>&#x2013;<lpage>904</lpage>. <pub-id pub-id-type="doi">10.1016/s0092-8674(03)01021-3</pub-id>
</citation>
</ref>
<ref id="B87">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Si</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Lindquist</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kandel</surname>
<given-names>E. R.</given-names>
</name>
</person-group> (<year>2003a</year>). <article-title>A neuronal isoform of the Aplysia CPEB has prion-like properties</article-title>. <source>Cell.</source> <volume>115</volume>, <fpage>879</fpage>&#x2013;<lpage>891</lpage>. <pub-id pub-id-type="doi">10.1016/s0092-8674(03)01020-1</pub-id>
</citation>
</ref>
<ref id="B88">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sia</surname>
<given-names>E. A.</given-names>
</name>
<name>
<surname>Dominska</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Stefanovic</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Petes</surname>
<given-names>T. D.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Isolation and characterization of point mutations in mismatch repair genes that destabilize microsatellites in yeast</article-title>. <source>Mol. Cell. Biol.</source> <volume>21</volume>, <fpage>8157</fpage>&#x2013;<lpage>8167</lpage>. <pub-id pub-id-type="doi">10.1128/mcb.21.23.8157-8167.2001</pub-id>
</citation>
</ref>
<ref id="B89">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sievers</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Wilm</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Dineen</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Gibson</surname>
<given-names>T. J.</given-names>
</name>
<name>
<surname>Karplus</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>W.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>Fast, scalable generation of high&#x2010;quality protein multiple sequence alignments using Clustal Omega</article-title>. <source>Mol. Syst. Biol.</source> <volume>7</volume>, <fpage>539</fpage>. <pub-id pub-id-type="doi">10.1038/msb.2011.75</pub-id>
</citation>
</ref>
<ref id="B90">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sigrist</surname>
<given-names>C. J. A.</given-names>
</name>
<name>
<surname>De Castro</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Cerutti</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Cuche</surname>
<given-names>B. A.</given-names>
</name>
<name>
<surname>Hulo</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Bridge</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>New and continuing developments at PROSITE</article-title>. <source>Nucleic Acids Res.</source> <volume>41</volume>, <fpage>D344</fpage>&#x2013;<lpage>D347</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gks1067</pub-id>
</citation>
</ref>
<ref id="B91">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stephan</surname>
<given-names>J. S.</given-names>
</name>
<name>
<surname>Fioriti</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Lamba</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Colnaghi</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Karl</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Derkatch</surname>
<given-names>I. L.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>The CPEB3 protein is a functional prion that interacts with the actin cytoskeleton</article-title>. <source>Cell. Rep.</source> <volume>11</volume>, <fpage>1772</fpage>&#x2013;<lpage>1785</lpage>. <pub-id pub-id-type="doi">10.1016/j.celrep.2015.04.060</pub-id>
</citation>
</ref>
<ref id="B92">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sumiyama</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Washio-Watanabe</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Saitou</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Hayakawa</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Ueda</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>1996</year>). <article-title>Class III POU genes: generation of homopolymeric amino acid repeats under GC pressure in mammals</article-title>. <source>J. Mol. Evol.</source> <volume>43</volume>, <fpage>170</fpage>&#x2013;<lpage>178</lpage>. <pub-id pub-id-type="doi">10.1007/pl00006075</pub-id>
</citation>
</ref>
<ref id="B93">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tamura</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Stecher</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Kumar</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>MEGA11: molecular evolutionary genetics analysis version 11</article-title>. <source>Mol. Biol. Evol.</source> <volume>38</volume>, <fpage>3022</fpage>&#x2013;<lpage>3027</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msab120</pub-id>
</citation>
</ref>
<ref id="B94">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tautz</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Trick</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Dover</surname>
<given-names>G. A.</given-names>
</name>
</person-group> (<year>1986</year>). <article-title>Cryptic simplicity in DNA is a major source of genetic variation</article-title>. <source>Nature</source> <volume>322</volume> (<issue>6080</issue>), <fpage>652</fpage>&#x2013;<lpage>656</lpage>. <pub-id pub-id-type="doi">10.1038/322652a0</pub-id>
</citation>
</ref>
<ref id="B95">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Teekas</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Sharma</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Vijay</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Terminal regions of a protein are a hotspot for low complexity regions and selection</article-title>. <source>Open Biol.</source> <volume>14</volume> (<issue>6</issue>), <fpage>230439</fpage>. <pub-id pub-id-type="doi">10.1098/rsob.230439</pub-id>
</citation>
</ref>
<ref id="B96">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Theis</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Si</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Kandel</surname>
<given-names>E. R.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>Two previously undescribed members of the mouse CPEB family of genes and their inducible expression in the principal cell layers of the hippocampus</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>100</volume> (<issue>16</issue>), <fpage>9602</fpage>&#x2013;<lpage>9607</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1133424100</pub-id>
</citation>
</ref>
<ref id="B97">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tian</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Strassmann</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Queller</surname>
<given-names>D. C.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Genome nucleotide composition shapes variation in simple sequence repeats</article-title>. <source>Mol. Biol. Evol.</source> <volume>28</volume> (<issue>2</issue>), <fpage>899</fpage>&#x2013;<lpage>909</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msq266</pub-id>
</citation>
</ref>
<ref id="B98">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tsvetkov</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Eisen</surname>
<given-names>T. J.</given-names>
</name>
<name>
<surname>Heinrich</surname>
<given-names>S. U.</given-names>
</name>
<name>
<surname>Brune</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Hallacli</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Newby</surname>
<given-names>G. A.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Persistent activation of mRNA translation by transient Hsp90 inhibition</article-title>. <source>Cell. Rep.</source> <volume>32</volume>, <fpage>108149</fpage>. <pub-id pub-id-type="doi">10.1016/j.celrep.2020.108149</pub-id>
</citation>
</ref>
<ref id="B99">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vaglietti</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Fiumara</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>PolyQ length co-evolution in neural proteins</article-title>. <source>NAR Genomics Bioinforma.</source> <volume>3</volume> (<issue>2</issue>), <fpage>lqab032</fpage>. <pub-id pub-id-type="doi">10.1093/nargab/lqab032</pub-id>
</citation>
</ref>
<ref id="B100">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vaglietti</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Villeri</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Dell&#x2019;Oca</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Marchetti</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Cesano</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Rizzo</surname>
<given-names>F.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>PolyQ length-based molecular encoding of vocalization frequency in FOXP2</article-title>. <source>iScience</source> <volume>26</volume>, <fpage>108036</fpage>. <pub-id pub-id-type="doi">10.1016/j.isci.2023.108036</pub-id>
</citation>
</ref>
<ref id="B101">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vendruscolo</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Fuxreiter</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Sequence determinants of the aggregation of proteins within condensates generated by liquid-liquid phase separation</article-title>. <source>J. Mol. Biol.</source> <volume>434</volume>, <fpage>167201</fpage>. <pub-id pub-id-type="doi">10.1016/j.jmb.2021.167201</pub-id>
</citation>
</ref>
<ref id="B102">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wake</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Weng</surname>
<given-names>S.-L.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S.-H.</given-names>
</name>
<name>
<surname>Kirilenko</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Mittal</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Expanding the molecular grammar of polar residues and arginine in FUS prion-like domain phase separation and aggregation</article-title>. <source>bioRxiv</source>. <pub-id pub-id-type="doi">10.1101/2024.02.15.580391</pub-id>
</citation>
</ref>
<ref id="B103">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>J.-M.</given-names>
</name>
<name>
<surname>Holehouse</surname>
<given-names>A. S.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>H. O.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Jahnel</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>A molecular grammar governing the driving forces for phase separation of prion-like RNA binding proteins</article-title>. <source>Cell.</source> <volume>174</volume>, <fpage>688</fpage>&#x2013;<lpage>699.e16</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2018.06.006</pub-id>
</citation>
</ref>
<ref id="B104">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>X.-P.</given-names>
</name>
<name>
<surname>Cooper</surname>
<given-names>N. G. F.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Comparative <italic>in silico</italic> analyses of cpeb1&#x2013;4 with functional predictions</article-title>. <source>Bioinforma. Biol. Insights</source> <volume>4</volume> (<issue>S5087</issue>), <fpage>BBI.S5087</fpage>. <pub-id pub-id-type="doi">10.4137/bbi.s5087</pub-id>
</citation>
</ref>
<ref id="B105">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Warren</surname>
<given-names>S. T.</given-names>
</name>
<name>
<surname>Muragaki</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Mundlos</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Upton</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Olsen</surname>
<given-names>B. R.</given-names>
</name>
</person-group> (<year>1997</year>). <article-title>Polyalanine expansion in synpolydactyly might result from unequal crossing-over of HOXD13</article-title>. <source>Science</source> <volume>275</volume> (<issue>5298</issue>), <fpage>408</fpage>&#x2013;<lpage>409</lpage>. <pub-id pub-id-type="doi">10.1126/science.275.5298.408</pub-id>
</citation>
</ref>
<ref id="B106">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wright</surname>
<given-names>P. E.</given-names>
</name>
<name>
<surname>Dyson</surname>
<given-names>H. J.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Linking folding and binding</article-title>. <source>Curr. Opin. Struct. Biol.</source> <volume>19</volume> (<issue>1</issue>), <fpage>31</fpage>&#x2013;<lpage>38</lpage>. <pub-id pub-id-type="doi">10.1016/j.sbi.2008.12.003</pub-id>
</citation>
</ref>
<ref id="B107">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Smith</surname>
<given-names>D. K.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Guan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lam</surname>
<given-names>T. T.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>ggtree: an r package for visualization and annotation of phylogenetic trees with their covariates and other associated data</article-title>. <source>Methods Ecol. Evol.</source> <volume>8</volume>, <fpage>28</fpage>&#x2013;<lpage>36</lpage>. <pub-id pub-id-type="doi">10.1111/2041-210x.12628</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>