<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<?covid-19-tdm?>
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">875406</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2022.875406</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Increased Frequency of Indels in Hypervariable Regions of SARS-CoV-2 Proteins&#x2014;A Possible Signature of Adaptive Selection</article-title>
<alt-title alt-title-type="left-running-head">Alisoltani et al.</alt-title>
<alt-title alt-title-type="right-running-head">Indels in SARS-CoV-2 Adaptive Evolution</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Alisoltani</surname>
<given-names>Arghavan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1107463/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Jaroszewski</surname>
<given-names>Lukasz</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/41008/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Iyer</surname>
<given-names>Mallika</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/575681/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Iranzadeh</surname>
<given-names>Arash</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Godzik</surname>
<given-names>Adam</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1660022/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Biosciences Division</institution>, <institution>School of Medicine</institution>, <institution>University of California, Riverside</institution>, <addr-line>Riverside</addr-line>, <addr-line>CA</addr-line>, <country>United States</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Graduate School of Biomedical Sciences</institution>, <institution>Sanford Burnham Prebys Medical Discovery Institute</institution>, <addr-line>La Jolla</addr-line>, <addr-line>CA</addr-line>, <country>United States</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Computational Biology Division</institution>, <institution>Department of Integrative Biomedical Sciences</institution>, <institution>University of Cape Town</institution>, <addr-line>Cape Town</addr-line>, <country>South Africa</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/347568/overview">Dariusz Plewczynski</ext-link>, Warsaw University of Technology, Poland</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1527845/overview">Haizhou Liu</ext-link>, Wuhan Institute of Virology (CAS), China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1240951/overview">Sayaka Miura</ext-link>, Temple University, United States</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Adam Godzik, <email>adam.godzik@medsch.ucr.edu</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Computational Genomics, a section of the journal Frontiers in Genetics</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>02</day>
<month>06</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>13</volume>
<elocation-id>875406</elocation-id>
<history>
<date date-type="received">
<day>14</day>
<month>02</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>08</day>
<month>04</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2022 Alisoltani, Jaroszewski, Iyer, Iranzadeh and Godzik.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Alisoltani, Jaroszewski, Iyer, Iranzadeh and Godzik</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Most attention in the surveillance of evolving SARS-CoV-2 genome has been centered on nucleotide substitutions in the spike glycoprotein. We show that, as the pandemic extends into its second year, the numbers and ratio of genomes with in-frame insertions and deletions (indels) increases significantly, especially among the variants of concern (VOCs). Monitoring of the SARS-CoV-2 genome evolution shows that co-occurrence (i.e., highly correlated presence) of indels, especially deletions on spike N-terminal domain and non-structural protein 6 (NSP6) is a shared feature in several VOCs such as Alpha, Beta, Delta, and Omicron. Indels distribution is correlated with spike mutations associated with immune escape and growth in the number of genomes with indels coincides with the increasing population resistance due to vaccination and previous infections. Indels occur most frequently in the spike, but also in other proteins, especially those involved in interactions with the host immune system. We also showed that indels concentrate in regions of individual SARS-CoV-2 proteins known as hypervariable regions (HVRs) that are mostly located in specific loop regions. Structural analysis suggests that indels remodel viral proteins&#x2019; surfaces at common epitopes and interaction interfaces, affecting the virus&#x2019; interactions with host proteins. We hypothesize that the increased frequency of indels, the non-random distribution of them and their independent co-occurrence in several VOCs is another mechanism of response to elevated global population immunity.</p>
</abstract>
<kwd-group>
<kwd>indels</kwd>
<kwd>SARS-CoV-2</kwd>
<kwd>protein loop</kwd>
<kwd>hypervariable regions (HVR)</kwd>
<kwd>variants of concern (VOCs)</kwd>
</kwd-group>
<contract-num rid="cn001">HHSN272201700060C GM118187</contract-num>
<contract-sponsor id="cn001">National Institutes of Health<named-content content-type="fundref-id">10.13039/100000002</named-content>
</contract-sponsor>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Introduction</title>
<p>Insertions/deletions (indels), are the second most common modifications in the evolution of viral genomes after single nucleotide polymorphisms (SNPs), yet receive relatively little attention in genome analyses (<xref ref-type="bibr" rid="B35">Palmer and Poon, 2019</xref>). One of the reasons for that is that their consequences on protein structure and function are more challenging to determine than SNPs. Examples of long, loss-of-function deletions removing entire proteins or functional domains were shown to be deleterious (<xref ref-type="bibr" rid="B60">Zwart et al., 2014</xref>) or attenuating (<xref ref-type="bibr" rid="B34">Oostra et al., 2007</xref>); however, the effects of shorter, function-refining indels are mostly unknown. Such indels tend to happen in the loops between secondary structure elements, but interestingly not in all the loops, so their distribution cannot be explained by the plasticity of protein structure alone. Such indels rarely affect the overall structure of proteins, but may alter the binding specificity or protein-protein interaction surfaces (<xref ref-type="bibr" rid="B44">Studer et al., 2013</xref>), in few studied examples leading to increased drug resistance and immune escape in viruses (<xref ref-type="bibr" rid="B56">Wood et al., 2009</xref>; <xref ref-type="bibr" rid="B35">Palmer and Poon, 2019</xref>). Their prevalence, evolutionary dynamics, and overall consequences for fitness of most viruses, including SARS-CoV-2, largely remain unacknowledged and unaddressed.</p>
<p>Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) first emerged in Wuhan, China and subsequently spread worldwide and infected millions of people in several waves of evolving variants. Its high mutability (<xref ref-type="bibr" rid="B50">van Dorp et al., 2020</xref>), typical for RNA viruses (<xref ref-type="bibr" rid="B9">Duffy, 2018</xref>) but exacerbated by the scale of the COVID-19 pandemic, has resulted in the emergence of multiple lineages. Higher infectivity, transmissibility and/or lower efficacy of the current vaccines have been reported for Beta (B.1.351&#x2a;) (<xref ref-type="bibr" rid="B47">Tegally et al., 2021</xref>), Gamma (<italic>p</italic>.&#x2a;) (<xref ref-type="bibr" rid="B18">Jewell, 2021</xref>; <xref ref-type="bibr" rid="B29">Madhi et al., 2021</xref>), Delta (B.1.617.2, AY. &#x2a;) (<xref ref-type="bibr" rid="B36">Planas et al., 2021</xref>) (<xref ref-type="bibr" rid="B3">Cherian et al., 2021</xref>), Lambda (C.37) (<xref ref-type="bibr" rid="B21">Kimura et al., 2022</xref>) and Omicron variants (B.1.1.529 and BA.&#x2a;) (<xref ref-type="bibr" rid="B19">Karim and Karim, 2021</xref>; <xref ref-type="bibr" rid="B51">Viana et al., 2021</xref>). Tracking and analyzing new emerging lineages with modified disease phenotypes, dubbed variants of concern (VOCs) (<xref ref-type="bibr" rid="B37">Plante et al., 2021</xref>), is crucial for determining the strategies of fighting the COVID-19 pandemic. Massive sequencing of SARS-CoV-2, with over 10M genomes available today gives the United States a unique opportunity to study its evolution on the timescale of weeks or even days, as compared to much longer timescales available by comparing species. Much attention has been focused on specific mutations, such as E484K in the spike protein and their effects on host immune response (<xref ref-type="bibr" rid="B43">Starr et al., 2020</xref>; <xref ref-type="bibr" rid="B15">Jangra et al., 2021</xref>). At the same time, deletions and insertions received less attention, being less frequent, especially in the first phase of the pandemic and more challenging to interpret.</p>
<p>Still, several specific indels in SARS-CoV-2 in the envelope protein (<xref ref-type="bibr" rid="B23">Kumar et al., 2021</xref>), non-structural protein 1 (NSP1) (<xref ref-type="bibr" rid="B28">Lin et al., 2021</xref>), spike glycoprotein (spike or S) (<xref ref-type="bibr" rid="B32">McCarthy et al., 2021</xref>) and accessory ORFs (<xref ref-type="bibr" rid="B24">Lam et al., 2020</xref>), have been studied in detail. The NSP1 &#x394;79-89 was shown to be associated with lower IFN-&#x3b2; levels and non-severe phenotypes (<xref ref-type="bibr" rid="B28">Lin et al., 2021</xref>). Our analysis presented here expands on these examples and provides an overview of the dynamics of in-frame indels in the evolution of the SARS-CoV-2 genome. Regions with recurrent indels called recurrent deletion regions (RDRs) and recurrent insertion regions (RIRs) in the N-terminal domain (NTD) of the spike were shown to play a role in immune escape (<xref ref-type="bibr" rid="B32">McCarthy et al., 2021</xref>). Here we use the term hypervariable regions (HVRs) to refer to indel-prone regions. These concentrations of indels provide an example of a new paradigm of the effects of indels on viral genomes and proteins&#x2014;instead of loss-of-function they modify it by remodeling protein surfaces, affecting major antibody epitopes (<xref ref-type="bibr" rid="B2">Cai et al., 2021</xref>) and, possibly, protein-protein interaction networks.</p>
</sec>
<sec sec-type="methods" id="s2">
<title>Methods</title>
<sec id="s2-1">
<title>SARS-CoV-2 Sequencing Data Collection</title>
<p>We retrieved multiple sequence alignment (MSA) and metadata of complete SARS-CoV-2 genomes (6,143,793) from GISAID (<ext-link ext-link-type="uri" xlink:href="https://www.gisaid.org/">https://www.gisaid.org/</ext-link>) as of January 7<sup>th</sup>, 2022. Briefly, full alignment (msa_0106.fasta) provided by GISAID was based on 6,716,124 submissions to GISAID EpiCoV. GISAID pipeline excludes duplicate, low-quality sequences (&#x3e;5% N content) and incomplete sequences (length &#x3c;29,000 bp). Then, the GISAID pipeline used this cleaned data to create the MSA file of 6,143,793 sequences using MAFFT (<xref ref-type="bibr" rid="B20">Katoh and Standley, 2013</xref>) with hCoV-19/Wuhan/WIV04/2019 (EPI_ISL_402,124; GenBank: MN996527) used as reference (<xref ref-type="bibr" rid="B59">Zhou et al., 2020</xref>).</p>
</sec>
<sec id="s2-2">
<title>Identification of Indels</title>
<p>We used an in-house Perl script to identify variations in each genome based on the GISAID MSA file as of January 7<sup>th</sup>, 2022. Additionally, on top of GISAID&#x2019;s cutoffs for excluding low-quality genomes with high N content (0.05), we applied additional filtering to avoid spurious indels and indels with shifted positions arising from high N content. Moreover, genomes with more than 200 mutations were excluded, resulting in 4,976,200 SARS-CoV-2 genomes used in the downstream analysis in this study. Additionally, to avoid reporting spurious indels arising from sequencing errors or errors in MSA, we generated another MSA file with no gaps in reference (obtained with <italic>keep reference length</italic> option) (<xref ref-type="bibr" rid="B20">Katoh and Standley, 2013</xref>) to confirm the exact positions of all the deletions discussed in this study. Then, for visualizing and confirming the position of the indels we used the MSA file based on a representative genome for each of the indels with 0&#xa0;N content.</p>
</sec>
<sec id="s2-3">
<title>Assessing Differences in the Rate of Indels Between SARS-CoV-2 Proteins</title>
<p>We adopted the method we recently used to identify significantly under-mutated and over-mutated proteins during SARS-CoV-2 evolution (<xref ref-type="bibr" rid="B16">Jaroszewski et al., 2021</xref>) to identify proteins with a high rate of indels. Briefly, we counted the total number of indels (except single residue deletions which are usually regarded as unreliable) for each protein (except NSP11, ORF3b, ORF9b and ORF14 as these are too short for the significance analysis). We then used a two-sided binomial test to compare the rate of indels in each protein to the rate of indels in the background (all proteins) to identify proteins with high rates of indels. Our previous study (<xref ref-type="bibr" rid="B16">Jaroszewski et al., 2021</xref>) showed that ORF1ab is less frequently mutated and is likely under more stringent purifying selection than the genes coding for structural and accessory proteins (ORFs2-10). Therefore, we applied an additional statistical comparison of indel rates to non-structural proteins to identify NSPs (NSP1- NSP16) with a higher rate of indels than others. We performed a separate two-sided binomial test using only ORF1ab (corresponding to proteins NSP1-NSP16) for this specific comparison as background. Adjusted <italic>p</italic>-values (q-values) were calculated using the false discovery rate (FDR) method. Proteins with odds ratio above one and q-values less than 0.01 were considered as having significantly increased rates of indels.</p>
</sec>
<sec id="s2-4">
<title>Visualization of Indels on Proteins&#x2019; 3-Dimensional (3D) Structures</title>
<p>We used PyMol (<xref ref-type="bibr" rid="B45">PyMOL, 2021</xref>) and Coronavirus3D (<xref ref-type="bibr" rid="B40">Sedova et al., 2020</xref>) for studying and visualization of indels in the context of protein 3-dimensional (3D) structures. The 3D coordinates were downloaded from the Protein Data Bank (PDB) (<xref ref-type="bibr" rid="B1">Berman et al., 2000</xref>). For proteins with no available 3D structures we used, if available, models predicted by Alphafold (<ext-link ext-link-type="uri" xlink:href="https://deepmind.com/research/open-source/computational-predictions-of-protein-structures-associated-with-COVID-19">https://deepmind.com/research/open-source/computational-predictions-of-protein-structures-associated-with-COVID-19</ext-link>), or homology modeling (<ext-link ext-link-type="uri" xlink:href="https://zhanglab.dcmb.med.umich.edu/COVID-19/">https://zhanglab.dcmb.med.umich.edu/COVID-19/</ext-link>), noting in the discussion their hypothetical status. It should be noted that even for some proteins with available 3D structures we used models predicted with homology modeling when the indels were located in the regions of the protein with unresolved structures (unmodeled residues). Information on protein domain boundaries was based on 3D coordinates when available or on UniProt and the literature (<xref ref-type="sec" rid="s10">Supplementary Table S4</xref>).</p>
<p>The positions of transmembrane helices for proteins with no available 3D structures were identified with the TMHMM 2.0 algorithm (<xref ref-type="bibr" rid="B22">Krogh et al., 2001</xref>). IEDB server (Bepipred Linear Epitope Prediction 2.0&#xa0;at <ext-link ext-link-type="uri" xlink:href="http://www.iedb.org/">http://www.iedb.org/</ext-link>) (<xref ref-type="bibr" rid="B17">Jespersen et al., 2017</xref>) was used to predict B-cell epitopes for NSP1, NSP3, NSP6, spike, nucleocapsid, ORF3a, ORF7a, and ORF8 (i.e. proteins with significantly increased rates of indels).</p>
</sec>
<sec id="s2-5">
<title>Visualization of Indels on the Phylogenetic Tree</title>
<p>We mapped the number of indels for each genome (between one and six indels) on the Nextstrain time-resolved tree (<xref ref-type="bibr" rid="B13">Hadfield et al., 2018</xref>), which includes 3475 genomes sampled between December 2019 and Dec 27<sup>th</sup>, 2021. We used the ggtree R package (<xref ref-type="bibr" rid="B57">Yu, 2020</xref>) to visualize the tree.</p>
</sec>
<sec id="s2-6">
<title>Visualization of Indels on the Alignment File</title>
<p>We extracted one representative genome for each of the indels discussed in this study (i.e., the indels most frequently observed in SARS-CoV-2 genomes). These genomes were then used to visualize the indels using R packages ggmsa and Biostrings.</p>
</sec>
<sec id="s2-7">
<title>Analysis of Independent Occurrence of Indels in SARS-CoV-2</title>
<p>The independent acquisition of indels was determined using HomoplasyFinder (<xref ref-type="bibr" rid="B5">Crispell et al., 2019</xref>) with the same filtering criteria as used in the previous studies (<xref ref-type="bibr" rid="B50">van Dorp et al., 2020</xref>). To identify potential recurrent indels (independently acquired in different branches of phylogenetic tree) in SARS-CoV-2 genomes, we used the GISAID global tree that includes 4,701,022 SARS-CoV-2 genomes (GISAID as of January 7<sup>th</sup>, 2022) (<xref ref-type="bibr" rid="B41">Shu and McCauley, 2017</xref>) together with the input variant calling file (VCF). Briefly, HomoplasyFinder calculates the consistency index for each indel by dividing the minimum number of changes on the GISAID tree (MNCT) by the number of different indels observed at that site minus one. The most frequent indels (observed in at least 0.01% of all studied genomes) with a consistency index of &#x3c;1 and MNCT &#x3e;30 were reported as potentially recurrent indels if they were also independently acquired in more than two independent GISAID clades and in at least two PANGO lineages when their immediate ancestor didn&#x2019;t carry this indel, two-time points and two different continents (Originating lab). These filtering and stringent cutoffs were applied to address issues arising from mixed quality of assembled genomes, which in some cases are not detectable (e.g., assembly pipelines replace missing nucleotides with data from the reference genome) from the genome analysis alone. The quality issues introduce uncertainty in phylogenies, lineage assignments and underestimation of indels frequencies all lead to overestimation of independent occurrence of indels (<xref ref-type="bibr" rid="B7">De Maio et al., 2020</xref>; <xref ref-type="bibr" rid="B49">Turakhia et al., 2020</xref>; <xref ref-type="bibr" rid="B46">Tang et al., 2021</xref>), which we countered by increasing the cutoff thresholds. Regions with different recurrent indels which occurred in adjacent residues (up to five residues apart) were called hypervariable regions (HVRs). The HVRs observed in this study contain between 2 and 30 residues.</p>
<p>To calculate the recurrence of each indel as the function of time of sample collection, geographical location (originating lab), PANGO lineages, and GISAID clades, we grouped genomes into 25-time bins based on the month and year of the data collection, into six geographical locations (continents), 12 clade-based groups (G, GH, GK, GR, GRA, GRY, GV, L, O, S, V, and a non-assigned group), and 1544 different PANGO lineages. We used such relatively large groups to reduce noise arising from the difference between individual labs and from low-quality genomes.</p>
</sec>
<sec id="s2-8">
<title>Statistical Analysis of Co-Occurred Indels in SARS-CoV-2 Genomes</title>
<p>We ran cooccur R package to analyze the co-occurrence of indels in each lineage and all genomes and used ggplot2 R package (<xref ref-type="bibr" rid="B55">Wickham, 2011</xref>) to draw heatmap of correlation matrix. We also calculated Spearman&#x2019;s correlation coefficient and <italic>p</italic>-value of the correlation test for every two indels using hmisc (<xref ref-type="bibr" rid="B14">Harrell and Harrell, 2019</xref>) R package. We further checked the independent acquisition of top correlated/co-occurred indels using HomoplasyFinder (<xref ref-type="bibr" rid="B5">Crispell et al., 2019</xref>) based on the method explained earlier. The input VCF file includes information on the presence/absence of two co-occurred indels. We used ComplexHeatmap R package (<xref ref-type="bibr" rid="B11">Gu et al., 2016</xref>) to draw the heatmap of percentage of top indels in SARS-CoV-2 VOCs.</p>
</sec>
<sec id="s2-9">
<title>Comparing SARS-CoV-2 and SARS-CoV Genomes in Terms of Indels</title>
<p>Spike, NSP1, NSP3, NSP6, N, ORFs 3a, 7a, and eight protein sequences of SARS coronavirus Tor2 (NC_004,718.3) and SARS-CoV-2 (MN996527) were aligned using MAFFT (<xref ref-type="bibr" rid="B20">Katoh and Standley, 2013</xref>) (default parameters). We used Jalview (<xref ref-type="bibr" rid="B54">Waterhouse et al., 2009</xref>) to visualize alignment files and obtain the count and positions of indels.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>Results</title>
<sec id="s3-1">
<title>Increased Frequency of In-Frame Indels in Emerging SARS-CoV-2 Lineages</title>
<p>The recent increase in the number of indels (both insertions and deletions) was observed in all branches of the phylogenetic tree (<xref ref-type="fig" rid="F1">Figures 1A,B</xref>
<bold>)</bold>. This increase can be seen in the percentage of both SARS-CoV-2 lineages (<xref ref-type="fig" rid="F1">Figures 1C,D</xref>
<bold>)</bold> and genomes (<xref ref-type="sec" rid="s10">Supplementary Figures S1A,B</xref>) with at least one deletion or one insertion event (one or more than one amino acid change) growing in time. Indels were acquired in several VOCs such as Alpha (B.1.1.7 and Q.&#x2a;), Beta (B.1.351&#x2a;) and Omicron (B.1.1.529 and BA.&#x2a;), Gamma (<italic>P.</italic>&#x2a;), and Delta (B.1.617.2, AY.&#x2a;). As an example, Alpha variant (B.1.1.7) is defined by 17 signature genome modifications, including three deletions events (NSP6 &#x394;106&#x2013;108, S &#x394;69-70, and S &#x394;144), while Omicron variant includes seven indels as shown in <xref ref-type="sec" rid="s10">Supplementary Figure S1C</xref> (NSP3 1265:SL&#x3e;I, NSP6 &#x394;105-107, nucleocapsid &#x394;31&#x2013;33, S &#x394;69&#x2013;70, S 142:GVYY&#x3e;D, S 211:NL&#x3e;I, and S 214:R&#x3e;REPE). Additional indels and their combinations are found in other variants (<xref ref-type="sec" rid="s10">Supplementary Figure S1C</xref>). In this study, for simplicity, genome modifications that include both indels and substitutions such as S 142:GVYY&#x3e;D are only referred to as indels.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Distribution of indels in SARS-CoV-2 genomes <bold>(A)</bold> and <bold>(B)</bold> Increase in the number of deletion <bold>(D)</bold> and insertion <bold>(I)</bold> events in newly emerged lineages illustrated on Nextstrain&#x2019;s time-resolved phylogenetic tree, respectively <bold>(C)</bold> and <bold>(D)</bold> Percentage of PANGO lineages with and without deletion and insertion events over time, respectively <bold>(E)</bold> Distribution of the most common deletions along the SARS-CoV-2 genome (red) compared to insertions (blue) and missense substitutions (green).</p>
</caption>
<graphic xlink:href="fgene-13-875406-g001.tif"/>
</fig>
</sec>
<sec id="s3-2">
<title>Indels are concentrated on protein surfaces near epitope regions</title>
<p>Most indels are significantly (q-value &#x3c; 0.01 and odds ratio &#x3e;1) concentrated in NSP1, NSP3, NSP6, ORF3a, ORF6, ORF7a, ORF7b, ORF8, nucleocapsid, and spike glycoprotein (<xref ref-type="fig" rid="F1">Figure 1E</xref> and <xref ref-type="table" rid="T1">Table 1</xref>), all of which are involved in interactions with the host immune system (<xref ref-type="bibr" rid="B26">Lei et al., 2020</xref>; <xref ref-type="bibr" rid="B27">Liang et al., 2021</xref>; <xref ref-type="bibr" rid="B42">Smith et al., 2021</xref>). At the same time, proteins involved in the replication&#x2013;transcription complex show very few or no indels (<xref ref-type="fig" rid="F1">Figure 1E</xref> and <xref ref-type="table" rid="T1">Table 1</xref>). It is in agreement with our earlier report showing the segment of the genome coding for the non-structural proteins (Orf1ab, corresponding to proteins nsp1-nsp16) is significantly under-mutated for both missense and synonymous mutations (<xref ref-type="bibr" rid="B16">Jaroszewski et al., 2021</xref>). It should be noted that terms recurrent deletion regions (RDRs) and recurrent insertion regions (RIRs) are used in recent literature, indicating regions of SARS-CoV-2 proteins with frequent recurrent deletions and insertions, respectively. In this paper, we use the term &#x201c;hypervariable regions (HVR)&#x201d; referring to regions of proteins with frequent recurrent indels.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Comparison of frequencies of in-frame indels (indels) in SARS-CoV-2 proteins using the two-sided binomial test (only indels observed in at least two genomes were included to eliminate spurious mutations). Bold font indicates proteins with a significantly increased rate of indels (q-value&#x3c;0.01 and Odds ratio&#x3e;1).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="left">Protein</th>
<th rowspan="2" align="center">Protein length</th>
<th rowspan="2" align="center">Number of indels</th>
<th colspan="2" align="center">All Proteins as Background</th>
<th colspan="2" align="center">ORF1ab as Background</th>
</tr>
<tr>
<th align="center">Odds Ratio</th>
<th align="center">q-value (FDR adjusted <italic>p</italic>-value)</th>
<th align="center">Odds Ratio</th>
<th align="center">q-value (FDR adjusted <italic>p</italic>-value)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">NSP1</td>
<td align="center">540</td>
<td align="center">109</td>
<td align="center">2.14</td>
<td align="center">1.85E-12</td>
<td align="center">4.40</td>
<td align="center">1.44E-36</td>
</tr>
<tr>
<td align="left">NSP2</td>
<td align="char" char=".">1914</td>
<td align="char" char=".">81</td>
<td align="char" char=".">0.45</td>
<td align="center">5.89E-17</td>
<td align="char" char=".">0.92</td>
<td align="center">5.02E-01</td>
</tr>
<tr>
<td align="left">
<bold>NSP3</bold>
</td>
<td align="char" char=".">
<bold>5835</bold>
</td>
<td align="char" char=".">
<bold>442</bold>
</td>
<td align="char" char=".">
<bold>0.80</bold>
</td>
<td align="center">
<bold>1.78E-07</bold>
</td>
<td align="char" char=".">
<bold>1.65</bold>
</td>
<td align="center">
<bold>3.96E-32</bold>
</td>
</tr>
<tr>
<td align="left">NSP4</td>
<td align="char" char=".">1500</td>
<td align="char" char=".">40</td>
<td align="char" char=".">0.28</td>
<td align="center">5.58E-24</td>
<td align="char" char=".">0.58</td>
<td align="center">2.93E-04</td>
</tr>
<tr>
<td align="left">NSP5</td>
<td align="char" char=".">918</td>
<td align="char" char=".">6</td>
<td align="char" char=".">0.07</td>
<td align="center">3.74E-29</td>
<td align="char" char=".">0.14</td>
<td align="center">9.58E-12</td>
</tr>
<tr>
<td align="left">
<bold>NSP6</bold>
</td>
<td align="char" char=".">
<bold>870</bold>
</td>
<td align="char" char=".">
<bold>58</bold>
</td>
<td align="char" char=".">
<bold>0.71</bold>
</td>
<td align="center">
<bold>6.47E-03</bold>
</td>
<td align="char" char=".">
<bold>1.45</bold>
</td>
<td align="center">
<bold>8.99E-03</bold>
</td>
</tr>
<tr>
<td align="left">NSP7</td>
<td align="char" char=".">249</td>
<td align="char" char=".">5</td>
<td align="char" char=".">0.21</td>
<td align="center">1.18E-05</td>
<td align="char" char=".">0.44</td>
<td align="center">6.23E-02</td>
</tr>
<tr>
<td align="left">NSP8</td>
<td align="char" char=".">594</td>
<td align="char" char=".">9</td>
<td align="char" char=".">0.16</td>
<td align="center">1.86E-14</td>
<td align="char" char=".">0.33</td>
<td align="center">1.65E-04</td>
</tr>
<tr>
<td align="left">NSP9</td>
<td align="char" char=".">339</td>
<td align="char" char=".">7</td>
<td align="char" char=".">0.22</td>
<td align="center">2.31E-07</td>
<td align="char" char=".">0.45</td>
<td align="center">3.54E-02</td>
</tr>
<tr>
<td align="left">NSP10</td>
<td align="char" char=".">417</td>
<td align="char" char=".">8</td>
<td align="char" char=".">0.20</td>
<td align="center">4.37E-09</td>
<td align="char" char=".">0.42</td>
<td align="center">1.08E-02</td>
</tr>
<tr>
<td align="left">NSP12</td>
<td align="char" char=".">2795</td>
<td align="char" char=".">46</td>
<td align="char" char=".">0.17</td>
<td align="center">2.91E-64</td>
<td align="char" char=".">0.36</td>
<td align="center">5.63E-18</td>
</tr>
<tr>
<td align="left">NSP13</td>
<td align="char" char=".">1803</td>
<td align="char" char=".">15</td>
<td align="char" char=".">0.09</td>
<td align="center">3.32E-54</td>
<td align="char" char=".">0.18</td>
<td align="center">2.65E-20</td>
</tr>
<tr>
<td align="left">NSP14</td>
<td align="char" char=".">1581</td>
<td align="char" char=".">91</td>
<td align="char" char=".">0.61</td>
<td align="center">2.58E-07</td>
<td align="char" char=".">1.26</td>
<td align="center">3.54E-02</td>
</tr>
<tr>
<td align="left">NSP15</td>
<td align="char" char=".">1038</td>
<td align="char" char=".">36</td>
<td align="char" char=".">0.37</td>
<td align="center">1.05E-12</td>
<td align="char" char=".">0.76</td>
<td align="center">9.92E-02</td>
</tr>
<tr>
<td align="left">NSP16</td>
<td align="char" char=".">894</td>
<td align="char" char=".">23</td>
<td align="char" char=".">0.27</td>
<td align="center">6.51E-15</td>
<td align="char" char=".">0.56</td>
<td align="center">5.01E-03</td>
</tr>
<tr>
<td align="left">
<bold>Spike</bold>
</td>
<td align="char" char=".">
<bold>3822</bold>
</td>
<td align="char" char=".">
<bold>459</bold>
</td>
<td align="char" char=".">
<bold>1.27</bold>
</td>
<td align="center">
<bold>1.22E-07</bold>
</td>
<td align="center">
<bold>-</bold>
</td>
<td align="center">
<bold>-</bold>
</td>
</tr>
<tr>
<td align="left">E</td>
<td align="char" char=".">228</td>
<td align="char" char=".">18</td>
<td align="char" char=".">0.84</td>
<td align="center">5.16E-01</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
<tr>
<td align="left">M</td>
<td align="char" char=".">669</td>
<td align="char" char=".">26</td>
<td align="char" char=".">0.41</td>
<td align="center">2.06E-07</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
<tr>
<td align="left">
<bold>N</bold>
</td>
<td align="char" char=".">
<bold>1260</bold>
</td>
<td align="char" char=".">
<bold>159</bold>
</td>
<td align="char" char=".">
<bold>1.34</bold>
</td>
<td align="center">
<bold>3.43E-04</bold>
</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
<tr>
<td align="left">
<bold>ORF10</bold>
</td>
<td align="char" char=".">
<bold>117</bold>
</td>
<td align="char" char=".">
<bold>7</bold>
</td>
<td align="char" char=".">
<bold>0.63</bold>
</td>
<td align="center">
<bold>3.01E-01</bold>
</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
<tr>
<td align="left">
<bold>ORF3a</bold>
</td>
<td align="char" char=".">
<bold>828</bold>
</td>
<td align="char" char=".">
<bold>254</bold>
</td>
<td align="char" char=".">
<bold>3.25</bold>
</td>
<td align="center">
<bold>1.91E-57</bold>
</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
<tr>
<td align="left">
<bold>ORF6</bold>
</td>
<td align="char" char=".">
<bold>186</bold>
</td>
<td align="char" char=".">
<bold>61</bold>
</td>
<td align="char" char=".">
<bold>3.47</bold>
</td>
<td align="center">
<bold>9.43E-16</bold>
</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
<tr>
<td align="left">
<bold>ORF7a</bold>
</td>
<td align="char" char=".">
<bold>366</bold>
</td>
<td align="char" char=".">
<bold>595</bold>
</td>
<td align="char" char=".">
<bold>17.22</bold>
</td>
<td align="center">
<bold>0.00E&#x2b;00</bold>
</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
<tr>
<td align="left">
<bold>ORF7b</bold>
</td>
<td align="char" char=".">
<bold>132</bold>
</td>
<td align="char" char=".">
<bold>58</bold>
</td>
<td align="char" char=".">
<bold>4.65</bold>
</td>
<td align="center">
<bold>1.56E-20</bold>
</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Aggregation and recurrence of indels in hypervariable regions of SARS-CoV-2 proteins are determined by an interplay of the protein structural constraints and functional role of specific regions. Most of the HVRs of SARS-CoV-2 proteins (except ORF7a-HVR) are found on or adjacent to loops forming either experimentally identified (<xref ref-type="bibr" rid="B27">Liang et al., 2021</xref>; <xref ref-type="bibr" rid="B42">Smith et al., 2021</xref>) or predicted antibody epitopes (<xref ref-type="fig" rid="F2">Figures 2</xref>,<xref ref-type="fig" rid="F3">3</xref>), suggesting SARS-CoV-2 is optimizing its interactions with the host immune system, possibly in response to the increased immunity of the population. For instance, NSP6-HVR falls on a predicted T-cells (<xref ref-type="bibr" rid="B42">Smith et al., 2021</xref>) and B-cells epitope (per IEDB server), forming a short loop between two transmembrane helices (<xref ref-type="fig" rid="F2">Figure 2C</xref>). Similarly, NSP1-HVR1 and spike-HVRs (<xref ref-type="fig" rid="F2">Figure 2</xref>), as well as HVRs in other proteins are in or near the loop forming epitope regions (<xref ref-type="fig" rid="F3">Figure 3</xref>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Top SARS-CoV-2 HVRs in the context of protein 3D structures. <bold>(A)</bold> Distribution of indels in NSP1 <bold>(B)</bold> NSP1-HVRs on protein 3D structure <bold>(C)</bold> Distribution of indels in NSP6 <bold>(D)</bold> NSP6-HVR on protein 3D structure <bold>(E)</bold> Distribution of indels in spike glycoprotein <bold>(F)</bold> HVRs on the protein 3D structure of the spike glycoprotein N-terminal domain bound to human Fab CM25. Insertions, deletions, and predicted B-cell epitopes (result from the IEDB server at <ext-link ext-link-type="uri" xlink:href="http://www.iedb.org">www.iedb.org</ext-link>) are represented as blue dots, red dots, and green lines, respectively. <xref ref-type="sec" rid="s9">Supplementary Table S3</xref> provides details of structures/models used in the Figure.</p>
</caption>
<graphic xlink:href="fgene-13-875406-g002.tif"/>
</fig>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Top SARS-CoV-2 HVRs in the context of protein 3D structures <bold>(A)</bold> Distribution of indels in SARS-CoV-2 non-structural protein 3 (NSP3) <bold>(B)</bold> NSP3 recurrent deletion region (HVR) on protein 3D structure <bold>(C)</bold> Distribution of indels in SARS-CoV-2 nucleocapsid (N) protein <bold>(D)</bold> N-HVRs on protein 3D structure <bold>(E)</bold> Distribution of indels in SARS-CoV-2 ORF3a <bold>(F)</bold> ORF3a-HVRs on protein 3D structure <bold>(G)</bold> Distribution of indels in SARS-CoV-2 ORF7a <bold>(H)</bold> ORF7-HVR on protein 3D structure <bold>(I)</bold> Distribution of indels in SARS-CoV-2 ORF8 <bold>(J)</bold> ORF8-HVRs on protein 3D structure. Deletions, insertions, and epitopes are represented as red dots, blue dots, and green lines, respectively. Pink highlighted regions represent HVRs or potential hotspots for recurrent indels in each protein. The regions of 3D structure corresponding to HVRs are colored in red. The coordinates of proteins were obtained from different sources (see <xref ref-type="sec" rid="s9">Supplementary Table S3</xref>). Predicted 3D structural models <ext-link ext-link-type="uri" xlink:href="https://zhanglab.ccmb.med.umich.edu/COVID-19/">https://zhanglab.ccmb.med.umich.edu/COVID-19/</ext-link> were used for visualization of recurrent deletion regions in NSP3, ORF3a, and nucleocapsid protein. SP: signal peptide. Indels independently occur in several SARS-CoV-2 lineages in hypervariable regions.</p>
</caption>
<graphic xlink:href="fgene-13-875406-g003.tif"/>
</fig>
<p>In the most studied SARS-CoV-2 protein, surface glycoprotein S (spike), NTD is one of the most genetically modified regions of spike protein and of the entire SARS-CoV-2 proteome (see <xref ref-type="fig" rid="F1">Figure 1</xref>). Deletions in the NTD could classified as belonging to recurrent deletion regions: RDR1 (residues 60&#x2013;75), RDR2 (residues 139&#x2013;146), RDR3 (residues 210&#x2013;213), and RDR4 (residues 242&#x2013;248) (<xref ref-type="bibr" rid="B32">McCarthy et al., 2021</xref>). Recurrent insertions were also reported in the same regions (<xref ref-type="bibr" rid="B10">Gerdol, 2021</xref>). We observed that indels in NTD-HVR1 and HVR2 are more frequent as compared to HVR3 and HVR4 (<xref ref-type="fig" rid="F2">Supplementary Figure S2A,B</xref>). Several lineages with new spike indels (expanding spike-HVR2 and HVR4) are now emerging (<xref ref-type="fig" rid="F2">Supplementary Figure S2A,B</xref>). Comparison of spike proteins from the SARS-CoV (Tor2) and SARS-CoV-2 (one of the early Wuhan reference) viruses indicates 22 amino acid (AA) insertions and four AA deletions in SARS-CoV-2 spike protein compared to SARS-CoV that mainly occurred in NTD (<xref ref-type="fig" rid="F2">Supplementary Figure S2C</xref>), confirming that NTD is generally the most indel-prone region of spike in SARS coronaviruses.</p>
<p>NSP3 HVR corresponds to group 2 specific marker domain (G2M), a structurally uncharacterized region of the protein (<xref ref-type="fig" rid="F3">Figures 3A,B</xref>). Based on the NSP3 predicted model (built using D-I-TASSER/ C-I-TASSER pipeline from the Zhang lab, <ext-link ext-link-type="uri" xlink:href="https://zhanggroup.org/">https://zhanggroup.org/</ext-link>), NSP3-HVR is in the loop and indels in this region occur near B-cell epitopes predicted using IEDB server (<xref ref-type="fig" rid="F3">Figure 3B</xref>). Similar observations were also made for nucleocapsid protein (<xref ref-type="fig" rid="F3">Figure 3 C,D</xref>), ORF3a (<xref ref-type="fig" rid="F3">Figure 3 E,F</xref>), and ORF8&#x2019;s HVR (<xref ref-type="fig" rid="F3">Figures 3I,J</xref>). The indels in different protein HVRs occurred independently in several lineages (<xref ref-type="fig" rid="F4">Figure 4</xref> and <xref ref-type="sec" rid="s10">Supplementary Table S1</xref>) as seen on the SARS-CoV-2 phylogenetic tree (<xref ref-type="bibr" rid="B25">Elbe and Buckland-Merrett, 2017</xref>). In the following, we will discuss in detail the independent acquisition of indels in NSP1, NSP6 and NTD of spike protein HVRs. Independently acquired indels in NSP3, ORF3a, ORF7a, and ORF8 as well as in nucleocapsid protein HVRs will be discussed in separate sections.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Recurrent Indels in NSP1 and NSP6. <bold>(A)</bold> Nextstrain time-resolved tree, which includes 3475 genomes sampled between December 2019 and Dec 27<sup>th</sup>, 2021) displays the presence and distribution of the most frequent deletions positioned on NSP1-HVRs and NSP6-HVR as red dots <bold>(B)</bold> Top SARS-CoV-2 variants harbor the most frequent and potentially recurrent deletions of NSP1 and NSP6. Minimum Number of Changes on Tree (MNCT) and Consistency Index (CI) calculated using HomoplasyFinder based on GISAID global tree (4,701,022 SARS-CoV-2 genomes as of January 7<sup>th</sup>, 2022).</p>
</caption>
<graphic xlink:href="fgene-13-875406-g004.tif"/>
</fig>
<p>The independent acquisition of indels was determined using HomoplasyFinder (<xref ref-type="bibr" rid="B5">Crispell et al., 2019</xref>) with filtering criteria as applied in the previous study (<xref ref-type="bibr" rid="B50">van Dorp et al., 2020</xref>). Indels with minimum number of changes on tree (MNCT) above 30 were considered as potential recurrent deletions. We then applied additional filters (see above) and only included those that fulfilled all the criteria (<xref ref-type="sec" rid="s10">Supplementary Table S1</xref>). These stringent cutoffs were applied to avoid overestimation of homoplasies due to sequencing errors (<xref ref-type="bibr" rid="B7">De Maio et al., 2020</xref>).</p>
<p>Two mutually exclusive NSP1 HVRs (e.g., NSP1 &#x394;84 and NSP1 &#x394;85 in NSP1-HVR1 and &#x394;141-143 in NSP1-HVR2) emerged independently in several lineages such as Alpha, Beta, Delta, Gamma and Omicron (<xref ref-type="fig" rid="F4">Figures 4 A, B</xref>). A long version of the indel in NSP1-HVR1 (&#x394;79-89) was studied before (<xref ref-type="bibr" rid="B28">Lin et al., 2021</xref>), but our analysis indicates that shorter indels in this region are recurring more frequently (<xref ref-type="fig" rid="F5">Figure 5A</xref>
<bold>)</bold>. The results from HomoplasyFinder (consistency index or CI) indicate that NSP1 deletions are among the potential recurrent events in SARS-CoV-2 evolution (<xref ref-type="fig" rid="F4">Figure 4B</xref> and <xref ref-type="sec" rid="s10">Supplementary Table S1</xref>). NSP1 (&#x394;79-89) was reported to induce lower IFN-I response in the infected Calu-3 cells (<xref ref-type="bibr" rid="B28">Lin et al., 2021</xref>), highlighting the biological importance of indels in NSP1 and other non-spike proteins. It should be noted that NSP1 deletions are not among signature genomic modifications of any SARS-CoV-2 lineage and no indel event differences were identified between NSP1 proteins of SARS-CoV (Tor2) and SARS-CoV-2 (<xref ref-type="fig" rid="F3">Supplementary Figure S3A</xref>). This might imply that intact NSP1 is key for the full functionality of the virus and its pathogenicity but at the same time recurrent indels could suggest the presence of intra-host variations and quasispecies (<xref ref-type="bibr" rid="B39">Santacroce et al., 2021</xref>).</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Hypervariable regions (HVRs) of NSP1 and NSP6 <bold>(A)</bold> and <bold>(B)</bold> represent coordinates of HVRs of NSP1 and NSP6, respectively. The number of genomes containing a specific indel is provided on the left side of each plot. Indels independently co-occur in several SARS-CoV-2 lineages.</p>
</caption>
<graphic xlink:href="fgene-13-875406-g005.tif"/>
</fig>
<p>After the spike-HVRs, the NSP6-HVR (residues 99&#x2013;108) is the second most frequently modified HVR in SARS-CoV-2, with the &#x394;106-108 observed in more than 1M genomes as of January 2022 (<xref ref-type="fig" rid="F5">Figure 5A</xref>). NSP6 deletions independently occurred as a signature modification for several VOCs&#x2014;Alpha, Beta, Gamma, and Omicron but also some other lineages such as B.1.525 in Nigeria and Europe and B.1.526 in New York and Europe (<xref ref-type="fig" rid="F4">Figure 4</xref> and <xref ref-type="sec" rid="s10">Supplementary Table S2</xref>). Signatures of positive selection for NSP6 &#x394;106-108 were recently reported (<xref ref-type="bibr" rid="B30">Martin et al., 2021</xref>) in line with our results showing high recurrence of NSP6 deletions (<xref ref-type="fig" rid="F4">Figure 4B</xref>). In addition to recurrent indels, overlapping indel events identified in NSP1 (<xref ref-type="fig" rid="F5">Figure 5A</xref>), NSP6 (<xref ref-type="fig" rid="F5">Figure 5B</xref>), and Spike NTD (<xref ref-type="sec" rid="s10">Supplementary Figure S2</xref>) could provide additional evidence of convergent and/or parallel adaptive evolution in SARS-CoV-2 genomes. This may also offer more potential genetic routes for the rapid adaptation, immune escape and drug resistance of SARS-CoV-2. Similar evolutionary routes in HIV-1 and other RNA viruses were found to play pivotal role in drug and neutralizing antibody resistance (<xref ref-type="bibr" rid="B33">Men&#xe9;ndez-Arias et al., 2006</xref>; <xref ref-type="bibr" rid="B12">Gutierrez et al., 2019</xref>).</p>
<p>We observe an increasing number of genomes with two or more different indels in spike or other proteins. We use the term co-occurred indels for indels that appear simultaneously in at least one SARS-CoV-2 genome, and independent acquisition of top co-occurred indels was determined using HomoplasyFinder (see method section for details). Multiple spike-indels independently co-occurred with each other and with indels in other proteins, especially NSP6-indels (<xref ref-type="fig" rid="F6">Figure 6</xref> and <xref ref-type="sec" rid="s10">Supplementary Table S2</xref>). NSP6-indels independently co-occurred with spike indels located in HVR1 and HVR2 in Alpha (B.1.1.7, Q.&#x2a;) and B.1.525, with indels located in HVR2 in B.1.526.1 and B.1.1.318, with indels in HVR4 in Beta (B.1.351&#x2a;) and with several indels in HVR2 and HVR3 in Omicron (B.1.1.529 and BA.&#x2a;) as shown in <xref ref-type="fig" rid="F5">Figure 5</xref> and <xref ref-type="sec" rid="s10">Supplementary Table S2</xref>. Based on HomoplasyFinder results, indels in the spike NTD and ORF8 are also among the top co-occurred indels. Spike &#x394;157-158 and ORF8 &#x394;119-120 were found in more than 90% of the genomes assigned to Delta variant and their co-occurrences were also recorded in genomes assigned to other lineages such as Omicron and B.1.485 (<xref ref-type="fig" rid="F6">Figure 6B</xref> and <xref ref-type="sec" rid="s10">Supplementary Table S2</xref>).</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Indels and their co-occurrence in SARS-CoV-2. <bold>(A)</bold> Co-occurrence of top frequent indels <bold>(B)</bold> Co-occurrence of top indels in VOCs. Data for these heatmaps is provided in <xref ref-type="sec" rid="s9">Supplementary Table 2</xref> which includes additional combinations of indels in lineages harboring them <bold>(C)</bold> Independent co-occurrence of indels determined based on minimum number of changes on tree (MNCT) and consistency index (CI) calculated using HomoplasyFinder based on GISAID global tree (4,701,022 SARS-CoV-2 genomes as of January 7th, 2022).</p>
</caption>
<graphic xlink:href="fgene-13-875406-g006.tif"/>
</fig>
</sec>
<sec id="s3-3">
<title>Hypervariable Region in SARS-CoV-2 Non-Structural Protein Three NSP3</title>
<p>NSP3 along with NSP1 and NSP6 has significantly higher number of indels when compared to the rest of NSPs (<xref ref-type="table" rid="T1">Table 1</xref>). As shown in <xref ref-type="fig" rid="F3">Figure 3</xref>, indels in NSP3 are largely occurring in the loop region (1235&#x2013;1270) and near epitopes (<xref ref-type="bibr" rid="B42">Smith et al., 2021</xref>). NSP3 deletion 1265:SL&#x3e;I is a signature mutation of Omicron variant, NSP3 &#x394;1237-1251 was observed in L.1 PANGO lineage in Canada and NSP3 &#x394;1263 in B.1.1.298 variant from Denmark where the latter co-occur with NSP1 85:VM&#x3e;V and spike &#x394;69-70. NSP3-indels are often mutually exclusive with indels in other proteins - they only co-occurred with spike and NSP6-indels in Omicron and very few genomes assigned to B.1.1.7 lineage (<xref ref-type="sec" rid="s10">Supplementary Table S3</xref>). When compared to NSP3 of SARS-CoV, SARS-CoV-2 NSP3 had a total of 30 AA insertions and seven AA deletions which occurred mostly between residues 100&#x2013;400 (<xref ref-type="sec" rid="s10">Supplementary Figure S4</xref>) correspond to predicted epitopes (<xref ref-type="sec" rid="s10">Supplementary Figure S2</xref>
<bold>).</bold>
</p>
<p>Although NSP2 was not identified as a significantly indel-prone protein, some indels in the NSP2 appeared independently in several lineages (<xref ref-type="sec" rid="s10">Supplementary Tables S1, S2</xref>
<bold>)</bold>. The NSP2 &#x394;265-266 is the signature modification of the B.1.573, B.1.1.191, and AN.1 PANGO lineages (<xref ref-type="sec" rid="s10">Supplementary Table S2</xref>), primarily seen in Canada and Denmark samples. The NSP2 &#x394;268 is mainly occurring in viral genomes collected from England, Scotland, Northern Ireland, and the Netherlands, and it is also the signature mutation of several lineages (<xref ref-type="sec" rid="s10">Supplementary Table S2</xref>). The NSP2 &#x394;267-268 frequently appeared during the early phase of the pandemic and only a small portion of the recently collected genomes harbored other NSP2-indels positioned on NSP2-HVR (residues 260&#x2013;270). NSP2 was shown to disrupt host signaling, and it might play a role in SARS-CoV-2 pathogenicity. However, more investigation is required to elucidate the role of NSP2 protein and the impact of its indels on immune evasion.</p>
</sec>
<sec id="s3-4">
<title>Recurrent Deletion Regions in SARS-CoV-2 Nucleocapsid and Accessory Proteins ORF3a, ORF7a, and ORF8</title>
<p>Indels of the nucleocapsid protein occur in two potential HVRs (HVR1: clusters around residues 28&#x2013;35 and HVR2: clusters around residues 202&#x2013;214) as shown in <xref ref-type="fig" rid="F3">Figures 3C,D</xref>. Both nucleocapsid HVRs specially HVR-2 are close to experimentally identified epitopes such as 36-RSKQR-40 and 206-SPARM-210 (<xref ref-type="bibr" rid="B27">Liang et al., 2021</xref>; <xref ref-type="bibr" rid="B42">Smith et al., 2021</xref>). After Omicron signature deletion (&#x394;31-33&#xa0;at HVR1) the second most frequent deletion in nucleocapsid protein, 208AR&#x3e;G (HVR2), is a signature of B.1.1.318 and is found in some B.1.1.7 genomes (<xref ref-type="sec" rid="s10">Supplementary Table S2</xref>). It co-occurred with three other indels in B.1.1.318, including NSP6 &#x394;106-108, spike &#x394;144, and ORF7b 44:TNMKF&#x3e;Y. According to the Coronavirus3D (<xref ref-type="bibr" rid="B40">Sedova et al., 2020</xref>) variant tracker, this lineage was among the top growing lineages in several countries such as the United States, United Kingdom, and France in June 2021.</p>
<p>The most recurrent indels of ORF3a cluster around amino acid positions 103 (ORF3a-HVR1) and 255 (ORF3a-HVR2) as shown in <xref ref-type="fig" rid="F3">Figures 3E,F</xref>. ORF3a-HVRs are located in the structurally unresolved region of the protein. Based on the predicted structures, they correspond to loops which also contain predicted B-cell epitopes. Interestingly ORF3a-HVRs identified in our study are also near experimentally identified epitopes of ORF3a antibodies such as 100-GLEAPFLYLYALVYF-114 (<xref ref-type="bibr" rid="B42">Smith et al., 2021</xref>), 266-EPTTTTSVPL-275, 246-IHTID-250, and 266-EPTTTTSVPL-275 (<xref ref-type="bibr" rid="B27">Liang et al., 2021</xref>).</p>
<p>The only insertion (240P&#x3e;PE) in ORF3a SARS-CoV-2, when compared to SARS-CoV is located near ORF3a-HVR2 (<xref ref-type="sec" rid="s10">Supplementary Figure 3D</xref> and <xref ref-type="fig" rid="F3">Figures 3E,F</xref>). Despite recurring in several lineages, ORF3a indels are not signature mutations for any lineages or sub-lineages. ORF3a &#x394;255 co-occurred with NSP6 and spike indels in Alpha variant (<xref ref-type="sec" rid="s10">Supplementary Table S2</xref>
<bold>)</bold>.</p>
<p>Unlike the rest of SARS-CoV-2 proteins, accessory proteins (ORF7a and ORF8) have longer indels. The indels of ORF7a often happen in ORF7a-HVR encompassing residues 60&#x2013;100 (<xref ref-type="fig" rid="F3">Figure 3 G,H</xref>), near previously identified ORF7a epitopes such as 86-LFIRQEEVQELYSPI-100 (<xref ref-type="bibr" rid="B27">Liang et al., 2021</xref>). The most frequent indel in this region is 7A_62:QF&#x3e;H co-occurred with NSP6 and spike indels in the Delta variant (<xref ref-type="sec" rid="s10">Supplementary Table S2</xref>). ORF7a indels are not signature mutations of any SARS-CoV-2 lineage and protein is mostly conserved between SARS-CoV and SARS-CoV-2 when compared to ORF8 (8b) as shown in <xref ref-type="sec" rid="s10">Supplementary Figures S3E,F</xref>.</p>
<p>The most recurrent and frequent indels of ORF8 is encompassing residues 63&#x2013;66 (ORF8-HVR1) and 118&#x2013;120 (ORF8-HVR2) as illustrated in <xref ref-type="fig" rid="F3">Figure 3 I,J</xref>. and the latter is the signature mutation for the Delta variant and co-occurred with spike S_156:EFR&#x3e;G (<xref ref-type="fig" rid="F6">Figure 6</xref>). Interestingly, both ORF8 HVRs are near experimentally identified epitopes, including 66-GSKSP-70 and 106-EDFLE-110. The highest number of changes in terms of indels between SARS-CoV and SARS-CoV-2 proteins was recorded for ORF8 (8b) and spike proteins (<xref ref-type="sec" rid="s10">Supplementary Figure S3</xref>), indicating they are rapidly evolving among SARS coronaviruses. Deletions of an entire ORF8 were identified during both early and late phases of SARS-CoV pandemic (2003) in China (<xref ref-type="bibr" rid="B4">Consortium, 2004</xref>).</p>
<p>Interestingly, most SARS-CoV-2 proteins have a high tendency for recurrent deletions (<xref ref-type="sec" rid="s10">Supplementary Table S1</xref>), likely facilitating the virus adaptation to the human host. The increasing number of deletions also results in SARS-CoV-2 genome shrinkage over time, especially in the recent VOCs like Omicron (<xref ref-type="sec" rid="s10">Supplementary Figure S4</xref>). Although the direct association of genome size with viral fitness is difficult to prove, there is evidence of replicative advantage associated with smaller genome size in RNA viruses (<xref ref-type="bibr" rid="B48">Tromas et al., 2014</xref>; <xref ref-type="bibr" rid="B60">Zwart et al., 2014</xref>; <xref ref-type="bibr" rid="B53">Walker et al., 2015</xref>). The results of this study should be interpreted within the context of limitations in the quality of SARS-CoV-2 genomes. Mixed quality of genomes and high numbers of Ns increases instability in lineage assignments and might underestimate indels and overestimate homoplasies. We accounted for this problem by using very stringent criteria and we hypothesize that the real extent of homoplasy in the SARS-CoV-2 evolution is likely to be even higher.</p>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>Discussion</title>
<p>Viruses, and in particular RNA viruses, are known to undergo rapid genome modifications, but are rarely studied with frequency that would allow us to monitor their detailed dynamics. Comparison of genomes of separate species gives us only a summary of modifications that occurred over significant periods of time. The COVID-19 pandemic led to an unpreceded mobilization of the research community, which in turn provided a unique opportunity for real-time monitoring of a pathogenic virus during a pandemic. In this study, we used sequencing data provided by thousands of research groups and available in a GISAID database (<xref ref-type="bibr" rid="B41">Shu and McCauley, 2017</xref>) to study the dynamics of protein indels during the course of pandemic. This analysis revealed the increase in the rate of indels that started in late 2020, driven by the emergence of lineages containing deletions as signature genome modifications, such as Alpha and Beta variants which replaced most of the previous lineages without indels. These were in turn replaced by the Delta variant with even more deletions in its genome. The Omicron variant that appeared in November 2021 is the first VOC containing both insertions and deletions and it has currently replaced almost all previous variants. Some of the indels in these variants were already shown to increase immune invasion, lead to higher transmissibility and higher viral binding affinity (<xref ref-type="bibr" rid="B19">Karim and Karim, 2021</xref>; <xref ref-type="bibr" rid="B32">McCarthy et al., 2021</xref>; <xref ref-type="bibr" rid="B51">Viana et al., 2021</xref>), functions of others are still unknown, but we can speculate about them based on the co-occurrence and overlap with mutations at the same sites.</p>
<p>Different processes may contribute to the emergence of indels in viral genomes, such as replication slippage, recombination, and retrotransposition. Compared to recombination and retrotransposition, replication slippage generates short indels (<xref ref-type="bibr" rid="B52">Viguera et al., 2001</xref>; <xref ref-type="bibr" rid="B8">Domingo, 2020</xref>). Since our analysis revealed mainly short indels, we believe these indels are primarily the result of replication slippage. Another possible explanation for this hypothesis is that insertions emerged later in the pandemic consistent with a higher evolutionary cost for insertions than deletions due to higher probability of incidence of the slippage-induced deletions.</p>
<p>Regardless of the cause of their emergence, SARS-CoV-2 indels that were selected by evolution and contributed to the emerging lineages are predominantly found in specific regions of proteins known as hypervariable regions that typically correspond to loops in protein structures. Interestingly, not all loops in SARS-CoV-2 proteins were found to contain indels, those that do were close to either experimentally identified or predicted epitopes (<xref ref-type="bibr" rid="B58">Zhang et al., 2008</xref>; <xref ref-type="bibr" rid="B27">Liang et al., 2021</xref>; <xref ref-type="bibr" rid="B42">Smith et al., 2021</xref>) or were involved in protein-protein interactions, and in the case of the specific SARS-CoV-2 proteins with overabundant indels, in interactions with the host&#x2019;s immune system. Modeling and emerging experimental evidence (<xref ref-type="bibr" rid="B2">Cai et al., 2021</xref>) shows that deletions in such regions can remodel epitope surfaces, leading to immune escape. This parallels findings in HIV-1 where deletions in the spike glycoprotein regions encoding surface-exposed disordered loops were found to mediate escape from the neutralizing antibodies elicited by earlier variants of the virus (<xref ref-type="bibr" rid="B56">Wood et al., 2009</xref>; <xref ref-type="bibr" rid="B35">Palmer and Poon, 2019</xref>).</p>
<p>Many indel-prone regions such as the loops in the spike NTD overlap with mutation hotspots that are thought to be driven by host immune system pressure (<xref ref-type="bibr" rid="B10">Gerdol, 2021</xref>; <xref ref-type="bibr" rid="B31">McCallum et al., 2021</xref>; <xref ref-type="bibr" rid="B32">McCarthy et al., 2021</xref>). Therefore, we hypothesize that the emergence of indels in the same hotspots is a response to the same adaptive pressure. This is supported by the recent studies where both spike-NTD substitutions and indels were demonstrated to accelerate virus adaptation to the host and immune escape (<xref ref-type="bibr" rid="B10">Gerdol, 2021</xref>; <xref ref-type="bibr" rid="B31">McCallum et al., 2021</xref>; <xref ref-type="bibr" rid="B32">McCarthy et al., 2021</xref>).</p>
<p>Independent co-occurrence of indels in several VOCs might reflect signatures of adaptive evolution by recurrence or recombination. Several VOCs such as Alpha, Beta and Omicron which have simultaneous spike and NSP6-indels were found to have higher transmissibility, infectivity, or immune escape properties than the previously dominant lineages such as B.1.177 (<xref ref-type="bibr" rid="B6">Davies et al., 2021</xref>) with no indels. Such independent expansion of indels in multiple lineages and geographic locations suggests a common adaptation mechanism of SARS-CoV-2 genomes, probably to overcome host immune response, as also suggested in the recent literature (<xref ref-type="bibr" rid="B32">McCarthy et al., 2021</xref>; <xref ref-type="bibr" rid="B38">Ribes et al., 2021</xref>).</p>
<p>In conclusion, we conducted an in-depth analysis of indels in 4,976,200 SARS-CoV-2 genomes. We show that genomic modifications happen in a specific order, with deletions following point mutations, but growing quickly during the progress of the pandemic. In recent months we started seeing the emergence of insertions, including founder genomic modifications of the Omicron variant. Like mutations, indels are largely found in SARS-CoV-2 proteins involved in interactions with the host immune system but are preferentially located in specific regions of proteins &#x201c;hypervariable regions&#x201d; which overlap with structural features such as loops located close to epitopes. Indels in such regions might facilitate immune escape by remodeling the epitope surfaces and may prolong infection by these lineages. Such HVRs should be the subject of surveillance as much as common escape mutations. The increase in the number of indels and HVRs in recent lineages is likely a sign of the virus adapting to the increasing pool of resistant hosts, but other explanations, such as their role in regulating host antiviral response are also possible.</p>
</sec>
</body>
<back>
<sec id="s5">
<title>Data Availability Statement</title>
<p>All sequences used in this study are accessible via the GISAID database (<ext-link ext-link-type="uri" xlink:href="www.gsaid.org">www.gsaid.org</ext-link>). All protein structures are accessible via Protein Data Bank (<ext-link ext-link-type="uri" xlink:href="https://www.rcsb.org/">https://www.rcsb.org/</ext-link>) and models from the Zhang lab (<ext-link ext-link-type="uri" xlink:href="https://zhanglab.dcmb.med.umich.edu/COVID-19/">https://zhanglab.dcmb.med.umich.edu/COVID-19/</ext-link>) and AlphaFold database of COVID-19 structures (<ext-link ext-link-type="uri" xlink:href="https://deepmind.com/research/open-source/computational-predictions-of-protein-structures-associated-with-COVID-19">https://deepmind.com/research/open-source/computational-predictions-of-protein-structures-associated-with-COVID-19</ext-link>). All scripts are publicly available on GitHub repository (<ext-link ext-link-type="uri" xlink:href="https://github.com/ArghavanAlisoltani/SARS-CoV-2-Indels.git">https://github.com/ArghavanAlisoltani/SARS-CoV-2-Indels.git</ext-link>).</p>
</sec>
<sec id="s6">
<title>Author Contributions</title>
<p>AA, LJ, MI, AI, and AG designed the calculations; AA, LJ, and MI. performed them; AA, LJ, MI, AI, and AG analyzed data and AA, LJ, MI, and AG wrote the manuscript.</p>
</sec>
<sec id="s7">
<title>Funding</title>
<p>National Institute of Allergy and Infectious Diseases contract HHSN272201700060C (CSGID) and National Institute of General Medical Sciences Award GM118187 (to AG).</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ack>
<p>We gratefully acknowledge the authors from the originating laboratories and the submitting laboratories, who generated and shared via GISAID genetic sequence data on which this research is based, as well as structural biology groups contributing their structures to the PDB.</p>
</ack>
<sec id="s10">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fgene.2022.875406/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fgene.2022.875406/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Presentation1.zip" id="SM1" mimetype="application/zip" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table1.XLSX" id="SM2" mimetype="application/XLSX" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Berman</surname>
<given-names>H. M.</given-names>
</name>
<name>
<surname>Westbrook</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Gilliland</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Bhat</surname>
<given-names>T. N.</given-names>
</name>
<name>
<surname>Weissig</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2000</year>). <article-title>The Protein Data Bank</article-title>. <source>Nucleic Acids Res.</source> <volume>28</volume> (<issue>1</issue>), <fpage>235</fpage>&#x2013;<lpage>242</lpage>. <pub-id pub-id-type="doi">10.1093/nar/28.1.235</pub-id> </citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cai</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Lavine</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Rawson</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Structural Basis for Enhanced Infectivity and Immune Evasion of SARS-CoV-2 Variants</article-title>. <source>Science</source> <volume>373</volume> (<issue>6555</issue>), <fpage>642</fpage>&#x2013;<lpage>648</lpage>. <pub-id pub-id-type="doi">10.1126/science.abi9745</pub-id> </citation>
</ref>
<ref id="B3">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Cherian</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Potdar</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Jadhav</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Yadav</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Gupta</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Das</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). &#x201c;<article-title>Convergent Evolution of SARS-CoV-2 Spike Mutations, L452R, E484Q and P681R</article-title>,&#x201d; in <source>The Second Wave of COVID-19 in Maharashtra</source> (<publisher-loc>India</publisher-loc>: <publisher-name>bioRxiv</publisher-name>). </citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Consortium</surname>
<given-names>C. S. M. E.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Molecular Evolution of the SARS Coronavirus during the Course of the SARS Epidemic in China</article-title>. <source>Science</source> <volume>303</volume> (<issue>5664</issue>), <fpage>1666</fpage>&#x2013;<lpage>1669</lpage>. <pub-id pub-id-type="doi">10.1126/science.1092002</pub-id> </citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Crispell</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Balaz</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Gordon</surname>
<given-names>S. V.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>HomoplasyFinder: a Simple Tool to Identify Homoplasies on a Phylogeny</article-title>. <source>Microb. Genom</source> <volume>5</volume> (<issue>1</issue>). <pub-id pub-id-type="doi">10.1099/mgen.0.000245</pub-id> </citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Davies</surname>
<given-names>N. G.</given-names>
</name>
<name>
<surname>Abbott</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Barnard</surname>
<given-names>R. C.</given-names>
</name>
<name>
<surname>Jarvis</surname>
<given-names>C. I.</given-names>
</name>
<name>
<surname>Kucharski</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Munday</surname>
<given-names>J. D.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Estimated Transmissibility and Impact of SARS-CoV-2 Lineage B.1.1.7 in England</article-title>. <source>Science</source> <volume>372</volume> (<issue>6538</issue>), <fpage>eabg3055</fpage>. <pub-id pub-id-type="doi">10.1126/science.abg3055</pub-id> </citation>
</ref>
<ref id="B7">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>De Maio</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Walker</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Borges</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Weilguny</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Slodkowicz</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Goldman</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Issues with SARS-CoV-2 Sequencing Data</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://virological.org/t/issues-with-sars-cov-2-sequencing-data/473">https://virological.org/t/issues-with-sars-cov-2-sequencing-data/473</ext-link>
</comment>. </citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Domingo</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Molecular Basis of Genetic Variation of Viruses: Error-Prone Replication</article-title>. <source>Virus as Populations</source>, <fpage>35</fpage>&#x2013;<lpage>71</lpage>. <pub-id pub-id-type="doi">10.1016/B978-0-12-816331-3.00002-7</pub-id> </citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Duffy</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Why Are RNA Virus Mutation Rates So Damn High?</article-title> <source>Plos Biol.</source> <volume>16</volume> (<issue>8</issue>), <fpage>e3000003</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pbio.3000003</pub-id> </citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gerdol</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Emergence of a Recurrent Insertion in the N-Terminal Domain of the SARS-CoV-2 Spike Glycoprotein</article-title>. <source>bioRxiv</source>. </citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Eils</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Schlesner</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Complex Heatmaps Reveal Patterns and Correlations in Multidimensional Genomic Data</article-title>. <source>Bioinformatics</source> <volume>32</volume> (<issue>18</issue>), <fpage>2847</fpage>&#x2013;<lpage>2849</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btw313</pub-id> </citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gutierrez</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Escalera-Zamudio</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Pybus</surname>
<given-names>O. G.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Parallel Molecular Evolution and Adaptation in Viruses</article-title>. <source>Curr. Opin. Virol.</source> <volume>34</volume>, <fpage>90</fpage>&#x2013;<lpage>96</lpage>. <pub-id pub-id-type="doi">10.1016/j.coviro.2018.12.006</pub-id> </citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hadfield</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Megill</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Bell</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Huddleston</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Potter</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Callender</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Nextstrain: Real-Time Tracking of Pathogen Evolution</article-title>. <source>Bioinformatics</source> <volume>34</volume> (<issue>23</issue>), <fpage>4121</fpage>&#x2013;<lpage>4123</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bty407</pub-id> </citation>
</ref>
<ref id="B14">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Harrell</surname>
<given-names>F. E.</given-names>
<suffix>Jr</suffix>
</name>
<name>
<surname>Harrell</surname>
<given-names>M. F. E.</given-names>
<suffix>Jr</suffix>
</name>
</person-group> (<year>2019</year>). <source>Hmisc is R Package CRAN</source>, <fpage>235</fpage> </citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jangra</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ye</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Rathnasinghe</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Stadlbauer</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Personalized Virology Initiative study</surname>
<given-names>g.</given-names>
</name>
<name>
<surname>Krammer</surname>
<given-names>F.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>SARS-CoV-2 Spike E484K Mutation Reduces Antibody Neutralisation</article-title>. <source>Lancet Microbe</source> <volume>2</volume> (<issue>7</issue>), <fpage>e283</fpage>&#x2013;<lpage>e284</lpage>. <pub-id pub-id-type="doi">10.1016/S2666-5247(21)00068-9</pub-id> </citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jaroszewski</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Iyer</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Alisoltani</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sedova</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Godzik</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>The Interplay of SARS-CoV-2 Evolution and Constraints Imposed by the Structure and Functionality of its Proteins</article-title>. <source>Plos Comput. Biol.</source> <volume>17</volume> (<issue>7</issue>), <fpage>e1009147</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1009147</pub-id> </citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jespersen</surname>
<given-names>M. C.</given-names>
</name>
<name>
<surname>Peters</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Nielsen</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Marcatili</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>BepiPred-2.0: Improving Sequence-Based B-Cell Epitope Prediction Using Conformational Epitopes</article-title>. <source>Nucleic Acids Res.</source> <volume>45</volume> (<issue>W1</issue>), <fpage>W24</fpage>&#x2013;<lpage>W29</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkx346</pub-id> </citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jewell</surname>
<given-names>B. L.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Monitoring Differences between the SARS-CoV-2 B.1.1.7 Variant and Other Lineages</article-title>. <source>The Lancet Public Health</source> <volume>6</volume> (<issue>5</issue>), <fpage>e267</fpage>&#x2013;<lpage>e268</lpage>. <pub-id pub-id-type="doi">10.1016/S2468-2667(21)00073-6</pub-id> </citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Karim</surname>
<given-names>S. S. A.</given-names>
</name>
<name>
<surname>Karim</surname>
<given-names>Q. A.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Omicron SARS-CoV-2 Variant: a New Chapter in the COVID-19 Pandemic</article-title>. <source>The Lancet</source> <volume>398</volume> (<issue>10317</issue>), <fpage>2126</fpage>&#x2013;<lpage>2128</lpage>. <pub-id pub-id-type="doi">10.1016/s0140-6736(21)02758-6</pub-id> </citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Katoh</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Standley</surname>
<given-names>D. M.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>MAFFT Multiple Sequence Alignment Software Version 7: Improvements in Performance and Usability</article-title>. <source>Mol. Biol. Evol.</source> <volume>30</volume> (<issue>4</issue>), <fpage>772</fpage>&#x2013;<lpage>780</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/mst010</pub-id> </citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kimura</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Kosugi</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yamasoba</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Butlertanaka</surname>
<given-names>E. P.</given-names>
</name>
<name>
<surname>Tanaka</surname>
<given-names>Y. L.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>SARS-CoV-2 Lambda Variant Exhibits Higher Infectivity and Immune Resistance</article-title>. <source>Cell Rep</source> <volume>38</volume> (<issue>2</issue>), <fpage>110218</fpage>. <pub-id pub-id-type="doi">10.1016/j.celrep.2021.110218</pub-id> </citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Krogh</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Larsson</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>von Heijne</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Sonnhammer</surname>
<given-names>E. L. L.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Predicting Transmembrane Protein Topology with a Hidden Markov Model: Application to Complete genomes11Edited by F. Cohen</article-title>. <source>J. Mol. Biol.</source> <volume>305</volume> (<issue>3</issue>), <fpage>567</fpage>&#x2013;<lpage>580</lpage>. <pub-id pub-id-type="doi">10.1006/jmbi.2000.4315</pub-id> </citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kumar</surname>
<given-names>B. K.</given-names>
</name>
<name>
<surname>Rohit</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Prithvisagar</surname>
<given-names>K. S.</given-names>
</name>
<name>
<surname>Rai</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Karunasagar</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Karunasagar</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Deletion in the C-Terminal Region of the Envelope Glycoprotein in Some of the Indian SARS-CoV-2 Genome</article-title>. <source>Virus. Res.</source> <volume>291</volume>, <fpage>198222</fpage>. <pub-id pub-id-type="doi">10.1016/j.virusres.2020.198222</pub-id> </citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lam</surname>
<given-names>J.-Y.</given-names>
</name>
<name>
<surname>Yuen</surname>
<given-names>C.-K.</given-names>
</name>
<name>
<surname>Ip</surname>
<given-names>J. D.</given-names>
</name>
<name>
<surname>Wong</surname>
<given-names>W.-M.</given-names>
</name>
<name>
<surname>To</surname>
<given-names>K. K.-W.</given-names>
</name>
<name>
<surname>Yuen</surname>
<given-names>K.-Y.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Loss of Orf3b in the Circulating SARS-CoV-2 Strains</article-title>. <source>Emerging Microbes &#x26; Infections</source> <volume>9</volume> (<issue>1</issue>), <fpage>2685</fpage>&#x2013;<lpage>2696</lpage>. <pub-id pub-id-type="doi">10.1080/22221751.2020.1852892</pub-id> </citation>
</ref>
<ref id="B25">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Elbe</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Buckland-Merrett</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Data, disease and diplomacy: GISAID&#x2019;s innovative contribution to global health</article-title>. <source>Global Challenges</source> <volume>1</volume>, <fpage>33</fpage>&#x2013;<lpage>46</lpage>. <pub-id pub-id-type="doi">10.1002/gch2.1018</pub-id> </citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lei</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Dong</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Tian</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Activation and Evasion of Type I Interferon Responses by SARS-CoV-2</article-title>. <source>Nat. Commun.</source> <volume>11</volume> (<issue>1</issue>), <fpage>3810</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-020-17665-9</pub-id> </citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Teng</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Proteome-wide Epitope Mapping Identifies a Resource of Antibodies for SARS-CoV-2 Detection and Neutralization</article-title>. <source>Signal. Transduct. Target. Ther.</source> <volume>6</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>3</lpage>. <pub-id pub-id-type="doi">10.1038/s41392-021-00573-9</pub-id> </citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>J.-w.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>H.-c.</given-names>
</name>
<name>
<surname>Du</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Genomic Monitoring of SARS-CoV-2 Uncovers an Nsp1 Deletion Variant that Modulates Type I Interferon Response</article-title>. <source>Cell. Host. icrobe.</source> <volume>29</volume> (<issue>3</issue>), <fpage>489</fpage>&#x2013;<lpage>502</lpage>. <pub-id pub-id-type="doi">10.1016/j.chom.2021.01.015</pub-id> </citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Madhi</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Baillie</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Cutland</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Voysey</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Koen</surname>
<given-names>A. L.</given-names>
</name>
<name>
<surname>Fairlie</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Efficacy of the ChAdOx1 nCoV-19 Covid-19 Vaccine against the B.1.351 Variant</article-title>. <source>N. Engl. J. Med.</source> <volume>384</volume>, <fpage>1885</fpage>&#x2013;<lpage>1898</lpage>. <pub-id pub-id-type="doi">10.1056/NEJMoa2102214</pub-id> </citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Martin</surname>
<given-names>D. P.</given-names>
</name>
<name>
<surname>Weaver</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Tegally</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>San</surname>
<given-names>E. J.</given-names>
</name>
<name>
<surname>Shank</surname>
<given-names>S. D.</given-names>
</name>
<name>
<surname>Wilkinson</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>The Emergence and Ongoing Convergent Evolution of the N501Y Lineages Coincides with a Major Global Shift in the SARS-CoV-2 Selective Landscape</article-title>. <source>Cell</source> <volume>184</volume> (<issue>20</issue>), <fpage>5189</fpage>&#x2013;<lpage>5200.e7</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2021.09.003</pub-id> </citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>McCallum</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Marco</surname>
<given-names>A. D.</given-names>
</name>
<name>
<surname>Lempp</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Tortorici</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Pinto</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Walls</surname>
<given-names>A. C.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>N-terminal Domain Antigenic Mapping Reveals a Site of Vulnerability for SARS-CoV-2</article-title>. <source>bioRxiv</source>. <pub-id pub-id-type="doi">10.1101/2021.01.14.426475</pub-id> </citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>McCarthy</surname>
<given-names>K. R.</given-names>
</name>
<name>
<surname>Rennick</surname>
<given-names>L. J.</given-names>
</name>
<name>
<surname>Nambulli</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Robinson-McCarthy</surname>
<given-names>L. R.</given-names>
</name>
<name>
<surname>Bain</surname>
<given-names>W. G.</given-names>
</name>
<name>
<surname>Haidar</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Recurrent Deletions in the SARS-CoV-2 Spike Glycoprotein Drive Antibody Escape</article-title>. <source>Science</source> <volume>371</volume> (<issue>6534</issue>), <fpage>1139</fpage>&#x2013;<lpage>1142</lpage>. <pub-id pub-id-type="doi">10.1126/science.abf6950</pub-id> </citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Men&#xe9;ndez-Arias</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Matamoros</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Cases-Gonz&#xe1;lez</surname>
<given-names>C. E.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Insertions and Deletions in HIV-1 Reverse Transcriptase: Consequences for Drug Resistance and Viral Fitness</article-title>. <source>Curr. Pharm. Des.</source> <volume>12</volume> (<issue>15</issue>), <fpage>1811</fpage>. <pub-id pub-id-type="doi">10.2174/138161206776873608</pub-id> </citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Oostra</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>de Haan</surname>
<given-names>C. A. M.</given-names>
</name>
<name>
<surname>Rottier</surname>
<given-names>P. J. M.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>The 29-nucleotide Deletion Present in Human but Not in Animal Severe Acute Respiratory Syndrome Coronaviruses Disrupts the Functional Expression of Open reading Frame 8</article-title>. <source>J. Virol.</source> <volume>81</volume> (<issue>24</issue>), <fpage>13876</fpage>&#x2013;<lpage>13888</lpage>. <pub-id pub-id-type="doi">10.1128/JVI.01631-07</pub-id> </citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Palmer</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Poon</surname>
<given-names>A. F. Y.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Phylogenetic Measures of Indel Rate Variation Among the HIV-1 Group M Subtypes</article-title>. <source>Virus. Evol.</source> <volume>5</volume> (<issue>2</issue>), <fpage>vez022</fpage>. <pub-id pub-id-type="doi">10.1093/ve/vez022</pub-id> </citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Planas</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Veyer</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Baidaliuk</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Staropoli</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Guivel-Benhassine</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Rajah</surname>
<given-names>M. M.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Reduced Sensitivity of SARS-CoV-2 Variant Delta to Antibody Neutralization</article-title>. <source>Nature</source> <volume>596</volume>, <fpage>276</fpage>&#x2013;<lpage>280</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-021-03777-9</pub-id> </citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Plante</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Mitchell</surname>
<given-names>B. M.</given-names>
</name>
<name>
<surname>Plante</surname>
<given-names>K. S.</given-names>
</name>
<name>
<surname>Debbink</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Weaver</surname>
<given-names>S. C.</given-names>
</name>
<name>
<surname>Menachery</surname>
<given-names>V. D.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>The Variant Gambit: COVID-19&#x27;s Next Move</article-title>. <source>Cell. Host. Microbe</source> <volume>29</volume> (<issue>4</issue>), <fpage>508</fpage>&#x2013;<lpage>515</lpage>. <pub-id pub-id-type="doi">10.1016/j.chom.2021.02.020</pub-id> </citation>
</ref>
<ref id="B45">
<citation citation-type="other">
<collab>PyMOL</collab> (<year>2021</year>). <source>The PyMOL Molecular Graphics System</source> (<comment>Version 2.0 Schr&#x00F6;dinger, LLC</comment>).</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ribes</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Chaccour</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Moncunill</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Adapt or Perish: SARS-CoV-2 Antibody Escape Variants Defined by Deletions in the Spike N-Terminal Domain</article-title>. <source>Signal. Transduct. Target. Ther.</source> <volume>6</volume> (<issue>1</issue>), <fpage>164</fpage>. <pub-id pub-id-type="doi">10.1038/s41392-021-00601-8</pub-id> </citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Santacroce</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Charitos</surname>
<given-names>I. A.</given-names>
</name>
<name>
<surname>Carretta</surname>
<given-names>D. M.</given-names>
</name>
<name>
<surname>De Nitto</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Lovero</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>The Human Coronaviruses (HCoVs) and the Molecular Mechanisms of SARS-CoV-2 Infection</article-title>. <source>J. Mol. Med.</source> <volume>99</volume> (<issue>1</issue>), <fpage>93</fpage>&#x2013;<lpage>106</lpage>. <pub-id pub-id-type="doi">10.1007/s00109-020-02012-8</pub-id> </citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sedova</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Jaroszewski</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Alisoltani</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Godzik</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Coronavirus3D: 3D Structural Visualization of COVID-19 Genomic Divergence</article-title>. <source>Bioinformatics</source> <volume>36</volume> (<issue>15</issue>), <fpage>4360</fpage>&#x2013;<lpage>4362</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btaa550</pub-id> </citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>McCauley</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>GISAID: Global Initiative on Sharing All Influenza Data - from Vision to Reality</article-title>. <source>Euro Surveill.</source> <volume>22</volume> (<issue>13</issue>), <fpage>30494</fpage>. <pub-id pub-id-type="doi">10.2807/1560-7917.ES.2017.22.13.30494</pub-id> </citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Smith</surname>
<given-names>C. C.</given-names>
</name>
<name>
<surname>Olsen</surname>
<given-names>K. S.</given-names>
</name>
<name>
<surname>Gentry</surname>
<given-names>K. M.</given-names>
</name>
<name>
<surname>Sambade</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Beck</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Garness</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Landscape and Selection of Vaccine Epitopes in SARS-CoV-2</article-title>. <source>Genome Med.</source> <volume>13</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>23</lpage>. <pub-id pub-id-type="doi">10.1186/s13073-021-00910-1</pub-id> </citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Starr</surname>
<given-names>T. N.</given-names>
</name>
<name>
<surname>Greaney</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Hilton</surname>
<given-names>S. K.</given-names>
</name>
<name>
<surname>Ellis</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Crawford</surname>
<given-names>K. H. D.</given-names>
</name>
<name>
<surname>Dingens</surname>
<given-names>A. S.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Deep Mutational Scanning of SARS-CoV-2 Receptor Binding Domain Reveals Constraints on Folding and ACE2 Binding</article-title>. <source>Cell</source> <volume>182</volume> (<issue>5</issue>), <fpage>1295</fpage>&#x2013;<lpage>1310</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2020.08.012</pub-id> </citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Studer</surname>
<given-names>R. A.</given-names>
</name>
<name>
<surname>Dessailly</surname>
<given-names>B. H.</given-names>
</name>
<name>
<surname>Orengo</surname>
<given-names>C. A.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Residue Mutations and Their Impact on Protein Structure and Function: Detecting Beneficial and Pathogenic Changes</article-title>. <source>Biochem. J.</source> <volume>449</volume> (<issue>3</issue>), <fpage>581</fpage>&#x2013;<lpage>594</lpage>. <pub-id pub-id-type="doi">10.1042/BJ20121221</pub-id> </citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Ying</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Evolutionary Analysis and Lineage Designation of SARS-CoV-2 Genomes</article-title>. <source>Sci. Bull.</source> <volume>66</volume> (<issue>22</issue>), <fpage>2297</fpage>&#x2013;<lpage>2311</lpage>. <pub-id pub-id-type="doi">10.1016/j.scib.2021.02.012</pub-id> </citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tegally</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wilkinson</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Giovanetti</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Iranzadeh</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Fonseca</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Giandhari</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Detection of a SARS-CoV-2 Variant of Concern in South Africa</article-title>. <source>Nature</source> <volume>592</volume> (<issue>7854</issue>), <fpage>438</fpage>&#x2013;<lpage>443</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-021-03402-9</pub-id> </citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tromas</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Zwart</surname>
<given-names>M. P.</given-names>
</name>
<name>
<surname>Forment</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Elena</surname>
<given-names>S. F.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Shrinkage of Genome Size in a Plant RNA Virus upon Transfer of an Essential Viral Gene into the Host Genome</article-title>. <source>Genome Biol. Evol.</source> <volume>6</volume> (<issue>3</issue>), <fpage>538</fpage>&#x2013;<lpage>550</lpage>. <pub-id pub-id-type="doi">10.1093/gbe/evu036</pub-id> </citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Turakhia</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>De Maio</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Thornlow</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Gozashti</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Lanfear</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Walker</surname>
<given-names>C. R.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Stability of SARS-CoV-2 Phylogenies</article-title>. <source>Plos Genet.</source> <volume>16</volume> (<issue>11</issue>), <fpage>e1009175</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pgen.1009175</pub-id> </citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>van Dorp</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Acman</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Richard</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Shaw</surname>
<given-names>L. P.</given-names>
</name>
<name>
<surname>Ford</surname>
<given-names>C. E.</given-names>
</name>
<name>
<surname>Ormond</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Emergence of Genomic Diversity and Recurrent Mutations in SARS-CoV-2</article-title>. <source>Infect. Genet. Evol.</source> <volume>83</volume>, <fpage>104351</fpage>. <pub-id pub-id-type="doi">10.1016/j.meegid.2020.104351</pub-id> </citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Viana</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Moyo</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Amoako</surname>
<given-names>D. G.</given-names>
</name>
<name>
<surname>Tegally</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Scheepers</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Lessells</surname>
<given-names>R. J.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Rapid Epidemic Expansion of the SARS-CoV-2 Omicron Variant in Southern Africa</article-title>. <source>Nature</source> <volume>603</volume> (<issue>7902</issue>), <fpage>679</fpage>&#x2013;<lpage>686</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-022-04411-y</pub-id> </citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Viguera</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Canceill</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Ehrlich</surname>
<given-names>S. D.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Replication Slippage Involves DNA Polymerase Pausing and Dissociation</article-title>. <source>EMBO J.</source> <volume>20</volume> (<issue>10</issue>), <fpage>2587</fpage>&#x2013;<lpage>2595</lpage>. <pub-id pub-id-type="doi">10.1093/emboj/20.10.2587</pub-id> </citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Walker</surname>
<given-names>P. J.</given-names>
</name>
<name>
<surname>Firth</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Widen</surname>
<given-names>S. G.</given-names>
</name>
<name>
<surname>Blasdell</surname>
<given-names>K. R.</given-names>
</name>
<name>
<surname>Guzman</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wood</surname>
<given-names>T. G.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Evolution of Genome Size and Complexity in the Rhabdoviridae</article-title>. <source>Plos Pathog.</source> <volume>11</volume> (<issue>2</issue>), <fpage>e1004664</fpage>. <pub-id pub-id-type="doi">10.1371/journal.ppat.1004664</pub-id> </citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Waterhouse</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Procter</surname>
<given-names>J. B.</given-names>
</name>
<name>
<surname>Martin</surname>
<given-names>D. M. A.</given-names>
</name>
<name>
<surname>Clamp</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Barton</surname>
<given-names>G. J.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Jalview Version 2--a Multiple Sequence Alignment Editor and Analysis Workbench</article-title>. <source>Bioinformatics</source> <volume>25</volume> (<issue>9</issue>), <fpage>1189</fpage>&#x2013;<lpage>1191</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp033</pub-id> </citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wickham</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>ggplot2</article-title>. <source>Wires Comp. Stat.</source> <volume>3</volume> (<issue>2</issue>), <fpage>180</fpage>&#x2013;<lpage>185</lpage>. <pub-id pub-id-type="doi">10.1002/wics.147</pub-id> </citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wood</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Bhattacharya</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Keele</surname>
<given-names>B. F.</given-names>
</name>
<name>
<surname>Giorgi</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Gaschen</surname>
<given-names>B.</given-names>
</name>
<etal/>
</person-group> (<year>2009</year>). <article-title>HIV Evolution in Early Infection: Selection Pressures, Patterns of Insertion and Deletion, and the Impact of APOBEC</article-title>. <source>Plos Pathog.</source> <volume>5</volume> (<issue>5</issue>), <fpage>e1000414</fpage>. <pub-id pub-id-type="doi">10.1371/journal.ppat.1000414</pub-id> </citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Using Ggtree to Visualize Data on Tree&#x2010;Like Structures</article-title>. <source>Curr. Protoc. Bioinformatics</source> <volume>69</volume> (<issue>1</issue>), <fpage>e96</fpage>. <pub-id pub-id-type="doi">10.1002/cpbi.96</pub-id> </citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Haste-Andersen</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Beaver</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bourne</surname>
<given-names>P. E.</given-names>
</name>
<etal/>
</person-group> (<year>2008</year>). <article-title>Immune Epitope Database Analysis Resource (IEDB-AR)</article-title>. <source>Nucleic Acids Res.</source> <volume>36</volume> (<issue>Suppl. l_2</issue>), <fpage>W513</fpage>&#x2013;<lpage>W518</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkn254</pub-id> </citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>X.-L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.-G.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>W.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>A Pneumonia Outbreak Associated with a New Coronavirus of Probable Bat Origin</article-title>. <source>Nature</source> <volume>579</volume> (<issue>7798</issue>), <fpage>270</fpage>&#x2013;<lpage>273</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-020-2012-7</pub-id> </citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zwart</surname>
<given-names>M. P.</given-names>
</name>
<name>
<surname>Willemsen</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Dar&#xf2;s</surname>
<given-names>J.-A.</given-names>
</name>
<name>
<surname>Elena</surname>
<given-names>S. F.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Experimental Evolution of Pseudogenization and Gene Loss in a Plant RNA Virus</article-title>. <source>Mol. Biol. Evol.</source> <volume>31</volume> (<issue>1</issue>), <fpage>121</fpage>&#x2013;<lpage>134</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/mst175</pub-id> </citation>
</ref>
</ref-list>
</back>
</article>