<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<?covid-19-tdm?>
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Microbiol.</journal-id>
<journal-title>Frontiers in Microbiology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Microbiol.</abbrev-journal-title>
<issn pub-type="epub">1664-302X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmicb.2021.665041</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Microbiology</subject>
<subj-group>
<subject>Technology and Code</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>A Novel SARS-CoV-2 Viral Sequence Bioinformatic Pipeline Has Found Genetic Evidence That the Viral 3&#x2032; Untranslated Region (UTR) Is Evolving and Generating Increased Viral Diversity</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Farkas</surname> <given-names>Carlos</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1215714/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Mella</surname> <given-names>Andy</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1226092/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Turgeon</surname> <given-names>Maxime</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1226189/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Haigh</surname> <given-names>Jody J.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c002"><sup>&#x002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/140667/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Research Institute in Oncology and Hematology (RIOH), CancerCare Manitoba</institution>, <addr-line>Winnipeg, MB</addr-line>, <country>Canada</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Pharmacology and Therapeutics, Rady Faculty of Health Sciences, University of Manitoba</institution>, <addr-line>Winnipeg, MB</addr-line>, <country>Canada</country></aff>
<aff id="aff3"><sup>3</sup><institution>Departamento de F&#x00ED;sica, Facultad de Ciencias F&#x00ED;sicas y Matem&#x00E1;ticas, Universidad de Chile</institution>, <addr-line>Santiago</addr-line>, <country>Chile</country></aff>
<aff id="aff4"><sup>4</sup><institution>Instituto de Ciencias Naturales, Universidad de las Am&#x00E9;ricas</institution>, <addr-line>Santiago</addr-line>, <country>Chile</country></aff>
<aff id="aff5"><sup>5</sup><institution>Department of Statistics, University of Manitoba</institution>, <addr-line>Winnipeg, MB</addr-line>, <country>Canada</country></aff>
<aff id="aff6"><sup>6</sup><institution>Department of Computer Science, University of Manitoba</institution>, <addr-line>Winnipeg, MB</addr-line>, <country>Canada</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Matteo Negroni, Center for the National Scientific Research (CNRS), UPR 9002, Institut de Biologie Mol&#x00E9;culaire et Cellulaire, France</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Rachele Cagliani, Eugenio Medea Scientific Institute for Research, Hospitalization and Healthcare (IRCCS), Italy; El Hafidi Naima, Mohammed V University, Morocco</p></fn>
<corresp id="c001">&#x002A;Correspondence: Carlos Farkas, <email>Carlos.FarkasPool@umanitoba.ca</email></corresp>
<corresp id="c002">Jody J. Haigh, <email>Jody.Haigh@umanitoba.ca</email></corresp>
<fn fn-type="other" id="fn004"><p>This article was submitted to Virology, a section of the journal Frontiers in Microbiology</p></fn>
</author-notes>
<pub-date pub-type="epub">
<day>21</day>
<month>06</month>
<year>2021</year>
</pub-date>
<pub-date pub-type="collection">
<year>2021</year>
</pub-date>
<volume>12</volume>
<elocation-id>665041</elocation-id>
<history>
<date date-type="received">
<day>06</day>
<month>02</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>28</day>
<month>05</month>
<year>2021</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2021 Farkas, Mella, Turgeon and Haigh.</copyright-statement>
<copyright-year>2021</copyright-year>
<copyright-holder>Farkas, Mella, Turgeon and Haigh</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>An unprecedented amount of SARS-CoV-2 sequencing has been performed, however, novel bioinformatic tools to cope with and process these large datasets is needed. Here, we have devised a bioinformatic pipeline that inputs SARS-CoV-2 genome sequencing in FASTA/FASTQ format and outputs a single Variant Calling Format file that can be processed to obtain variant annotations and perform downstream population genetic testing. As proof of concept, we have analyzed over 229,000 SARS-CoV-2 viral sequences up until November 30, 2020. We have identified over 39,000 variants worldwide with increased polymorphisms, spanning the ORF3a gene as well as the 3&#x2032; untranslated (UTR) regions, specifically in the conserved stem loop region of SARS-CoV-2 which is accumulating greater observed viral diversity relative to chance variation. Our analysis pipeline has also discovered the existence of SARS-CoV-2 hypermutation with low frequency (less than in 2% of genomes) likely arising through host immune responses and not due to sequencing errors. Among annotated non-sense variants with a population frequency over 1%, recurrent inactivation of the ORF8 gene was found. This was found to be present in the newly identified B.1.1.7 SARS-CoV-2 lineage that originated in the United Kingdom. Almost all VOC-containing genomes possess one stop codon in ORF8 gene (Q27<sup>&#x2217;</sup>), however, 13% of these genomes also contains another stop codon (K68<sup>&#x2217;</sup>), suggesting that ORF8 loss does not interfere with SARS-CoV-2 spread and may play a role in its increased virulence. We have developed this computational pipeline to assist researchers in the rapid analysis and characterization of SARS-CoV-2 variation.</p>
</abstract>
<kwd-group>
<kwd>3cpsdummy&#x2032;UTR</kwd>
<kwd>SARS-CoV-2 variants</kwd>
<kwd>nucleotide diversity (&#x03C0;)</kwd>
<kwd>Tajima&#x2019;s D-statistic</kwd>
<kwd>viral evolution</kwd>
<kwd>VCF</kwd>
</kwd-group>
<contract-sponsor id="cn001">Canadian Institutes of Health Research<named-content content-type="fundref-id">10.13039/501100000024</named-content></contract-sponsor>
<counts>
<fig-count count="3"/>
<table-count count="0"/>
<equation-count count="0"/>
<ref-count count="99"/>
<page-count count="14"/>
<word-count count="0"/>
</counts>
</article-meta>
</front>
<body>
<sec id="S1">
<title>Introduction</title>
<p>The novel SARS-CoV-2 coronavirus that causes COVID-19 has surpassed 95 million infections worldwide within 1 year of pandemic, resulting in more than two million deaths until January 2021<sup><xref ref-type="fn" rid="footnote1">1</xref></sup> (<xref ref-type="bibr" rid="B15">Dong et al., 2020</xref>). In-depth characterization of this virus is urgently needed to improve outbreak surveillance, vaccine development and for effective treatments now and in the immediate future. SARS-CoV-2 is a positive single-stranded RNA virus (+ssRNA) with a crown-like appearance observed by electron microscopy that is due to the presence of spike glycoproteins on the lipid bilayer envelope (<xref ref-type="bibr" rid="B11">Cui et al., 2019</xref>; <xref ref-type="bibr" rid="B32">Ke et al., 2020</xref>). Another two transmembrane proteins are incorporated into the envelope: small envelope protein (E) and membrane protein (M) (<xref ref-type="bibr" rid="B94">Wu et al., 2020</xref>). As seen with SARS-CoV-1, SARS-CoV-2 binds through its Spike glycoprotein to cell membrane-bound angiotensin-converting enzyme 2 (ACE2) for entry into host cells (<xref ref-type="bibr" rid="B9">Crackower et al., 2002</xref>; <xref ref-type="bibr" rid="B44">Li et al., 2003</xref>; <xref ref-type="bibr" rid="B23">Ge et al., 2013</xref>; <xref ref-type="bibr" rid="B29">Hoffmann et al., 2020</xref>). Due to the importance of this Spike protein in SARS-CoV-2 infection, variants occurring in this protein are critical, since some can confer improved fitness to SARS-CoV-2 (<xref ref-type="bibr" rid="B77">Starr et al., 2020</xref>) and others affect antigenicity and maybe affect vaccine efficiency (<xref ref-type="bibr" rid="B42">Li J. et al., 2020</xref>). SARS-CoV-2 sequencing has been standardized through initiatives such as the Advancing Real-Time Infection Control Network (ARTIC) international initiative (<xref ref-type="bibr" rid="B85">Tyson et al., 2020</xref>) in which Illumina (<xref ref-type="bibr" rid="B30">Hourdel et al., 2020</xref>) or Oxford Nanopore (<xref ref-type="bibr" rid="B20">Freed et al., 2020</xref>; <xref ref-type="bibr" rid="B43">Li Q. et al., 2020</xref>) sequencing is carried out prior to whole viral genome amplification by tiling PCR or metagenomic approaches. The Centers for Disease Control and Prevention (CDC) maintains a GitHub page<sup><xref ref-type="fn" rid="footnote2">2</xref></sup> detailing recommended protocols, tools and resources for SARS-CoV-2 whole genome sequencing on the mentioned two sequencing platforms, including, PacBio and Ion Torrent technologies. After sequencing, initiatives such as GISAID<sup><xref ref-type="fn" rid="footnote3">3</xref></sup> (<xref ref-type="bibr" rid="B16">Elbe and Buckland-Merrett, 2017</xref>; <xref ref-type="bibr" rid="B75">Shu and McCauley, 2017</xref>) and the Sequence Read Archive (SRA<sup><xref ref-type="fn" rid="footnote4">4</xref></sup>) have been storing SARS-CoV-2 sequencing datasets worldwide from the beginning of the pandemic starting in January 2020, allowing researchers to track fixed variants and follow viral evolution by geographical region. The unprecedented amount of SARS-CoV-2 whole genome sequencing data can help to (1) characterize viral variants that occur within a given host, (2) understand variant fixation in a given population, and (3) understand how the virus changes over time. In fact, the Spike protein mutation D614G global transmission was discovered in this way and is associated with higher viral titers and increased fitness (<xref ref-type="bibr" rid="B36">Korber et al., 2020</xref>; <xref ref-type="bibr" rid="B65">Plante et al., 2020</xref>).</p>
<p>The SARS-CoV-2 genome possess coding capacity for structural proteins and a variety of accessory Open Reading Frames (ORFs), assessed by both computationally predictions and ribosomal profiling techniques. The transcription of SARS-CoV-2 is constant from 5&#x2032;UTR toward ORF1a and ORF1b structural proteins, and steadily increase toward the 3&#x2032; end due to the nested transcription of sub-genomic viral RNAs (<xref ref-type="bibr" rid="B18">Finkel et al., 2021</xref>). As a consequence of increased transcription, novel overlapping ORFs can be readily found in SARS-CoV-2 (<xref ref-type="bibr" rid="B55">Nelson et al., 2020</xref>; <xref ref-type="bibr" rid="B18">Finkel et al., 2021</xref>) including in-frame fusions (<xref ref-type="bibr" rid="B60">Nomburg et al., 2020</xref>). These accessory ORFs has been demonstrated to play a role in modulating the immune response from the host (<xref ref-type="bibr" rid="B39">Lei et al., 2020</xref>) and can disrupt host cell signaling capacity suppressing STAT1/2 phosphorylation, inhibiting interferon gamma mediated response, and causing immune evasion (<xref ref-type="bibr" rid="B95">Xia et al., 2020</xref>). Population-fixed variants can disrupt these ORFs by creating new stop codons, a phenomenon already demonstrated for ORF3a (<xref ref-type="bibr" rid="B38">Lam et al., 2020</xref>), ORF6 (<xref ref-type="bibr" rid="B67">Queromes et al., 2021</xref>) and ORF8 (<xref ref-type="bibr" rid="B27">Gong et al., 2020</xref>; <xref ref-type="bibr" rid="B58">Ngernmuen et al., 2020</xref>; <xref ref-type="bibr" rid="B19">Flower et al., 2021</xref>) amongst other ORFs. Also, variants toward the 3&#x2032;UTR of the virus can confer resistance to host miRNA viral targeting since several human miRNAs are predicted to prevent virus replication by binding to this untranslated region (<xref ref-type="bibr" rid="B4">Chen and Zhong, 2020</xref>; <xref ref-type="bibr" rid="B53">Mukherjee and Goswami, 2020</xref>). Thus, it is useful to track these newly fixed viral variants over time across populations using effective bioinformatic tools that are appropriate for these tasks.</p>
<p>Several bioinformatic pipelines have been developed to assist in the genomic epidemiology of SARS-CoV-2 that output sequence alignment analysis and/or variants in various formats. Tools such as VIRULIGN (<xref ref-type="bibr" rid="B46">Libin et al., 2019</xref>) and ViralMSA (<xref ref-type="bibr" rid="B50">Moshiri, 2020</xref>) rely on multiple sequence alignment algorithms to assess identity and further annotation of sequences by outputting viral sequence alignments. Similarly, the pangolin pipeline efficiently assigns input viral sequences to SARS-CoV-2 lineages by using sequence alignment and phylogenetic identification and has the potential to infer variants specifically associated to a specific lineage (<xref ref-type="bibr" rid="B68">Rambaut et al., 2020</xref>). Also tools such as CorGAT, can assist in the functional annotation of SARS-CoV-2 genomes by sequence alignment and outputting a pseudo-VCF file containing detected variants (<xref ref-type="bibr" rid="B6">Chiara et al., 2020</xref>). Clearly, multiple sequence alignment tools are useful in terms of phylogenetic reconstruction and identification, but the process to convert FASTA alignments to variant calls could be ambiguous depending on the variant report format, thus a uniform variant output format such as the Variant Calling Format (VCF) is convenient and suitable for downstream genetic analyses (<xref ref-type="bibr" rid="B12">Danecek et al., 2011</xref>). For these reasons, we devised a pipeline that can input viral Next Generation Sequencing (NGS) datasets or FASTA SARS-CoV-2 genome sequences and process them to obtain aggregated variants in standard population-aware VCF format, an output format that is suitable for variant filtering, annotation and calculation of nucleotide diversity and/or Tajima&#x2019;s D parameters, among other applications.</p>
</sec>
<sec id="S2">
<title>Methods</title>
<sec id="S2.SS1">
<title>Data and Code Availability</title>
<p>17,560 sequencing datasets were downloaded from Sequence Read Archive Repository (SRA<sup><xref ref-type="fn" rid="footnote5">5</xref></sup>) from December 1, 2019 until July 28, 2020. Associated sequencing run accessions, sequencing metadata and related BioProjects are listed in <xref ref-type="supplementary-material" rid="TS1">Supplementary Table 1</xref>. 229,124 FASTA genomes and associated sequencing metadata were downloaded from GISAID database from January 1, 2019 until November 30, 2020, specifying &#x201C;human&#x201D; as source host<sup><xref ref-type="fn" rid="footnote6">6</xref></sup>. Associated metadata and acknowledgments to laboratories/consortia involved in the corresponding genome sequencing is listed in <xref ref-type="supplementary-material" rid="TS2">Supplementary Tables 2</xref>, <xref ref-type="supplementary-material" rid="TS3">3</xref>, respectively. Aggregated variants in VCF format for the latter genomes including the associated predictions by SnpEff program (<xref ref-type="bibr" rid="B7">Cingolani et al., 2012</xref>) are available here: <ext-link ext-link-type="uri" xlink:href="https://usegalaxy.org/u/carlosfarkas/h/snpeffsars-cov-2">https://usegalaxy.org/u/carlosfarkas/h/snpeffsars-cov-2</ext-link>. 36,308 GISAID FASTA sequences from lineage B.1.1.7 were downloaded from GISAID database from January 1, 2019 until January 27, 2021, specifying &#x201C;human&#x201D; as source host and &#x201C;B.1.1.7&#x201D; as lineage in GISAID database. Aggregated variants in VCF format for the latter genomes including the associated sequencing metadata and acknowledgments are available here: <ext-link ext-link-type="uri" xlink:href="https://usegalaxy.org/u/carlosfarkas/h/b117">https://usegalaxy.org/u/carlosfarkas/h/b117</ext-link>. The code generated during this study to replicate most of the computational calculations performed in this manuscript is available at the following GitHub repository: <ext-link ext-link-type="uri" xlink:href="https://github.com/cfarkas/SARS-CoV-2-freebayes">https://github.com/cfarkas/SARS-CoV-2-freebayes</ext-link>.</p>
</sec>
<sec id="S2.SS2">
<title>Next-Generation Sequencing and FASTA Dataset Processing</title>
<p>To process next generation sequencing datasets, we employed our pipeline (SARS-CoV-2_freebayes) consisting of a bash/UNIX script that runs several programs in sequential order. We processed imputed list of SRA accessions with sra-tools<sup><xref ref-type="fn" rid="footnote7">7</xref></sup>, generating compressed FASTQ files per sequencing, automatically trimmed with fastp tool (<xref ref-type="bibr" rid="B5">Chen et al., 2018</xref>). Then, we aligned each trimmed fastq file against a provided reference genome (Wuhan-Hu-1, GenBank Accession: <ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="MN908947.3">MN908947.3</ext-link>) using Minimap2 splice-aware aligner in preset mode -ax sr (<xref ref-type="bibr" rid="B40">Li, 2018</xref>). We sorted and indexed the resulting BAM files by using Samtools (<xref ref-type="bibr" rid="B41">Li et al., 2009</xref>) and performed variant calling on every sorted BAM file, obtaining major frequency viral variants per genome in VCF format using the Freebayes variant calling program, as frequency-based pooled caller (&#x2212;F 0.49)<sup><xref ref-type="fn" rid="footnote8">8</xref></sup> (<xref ref-type="bibr" rid="B22">Garrison and Marth, 2012</xref>). Then, we used Jacquard program<sup><xref ref-type="fn" rid="footnote9">9</xref></sup> in the python environment (<xref ref-type="bibr" rid="B73">Sanner, 1999</xref>) to merge every VCF file containing variants associated to each bam file into a single VCF file, containing aggregated variants from all genomes. In the resulting merged VCF file, we recalculated viral frequencies using several UNIX tools (<xref ref-type="bibr" rid="B33">Kernighan and Morgan, 1982</xref>), in combination with vcflib<sup><xref ref-type="fn" rid="footnote10">10</xref></sup>. We used the variants per genome logfile &#x201C;logfile_variants_SRA_freebayes&#x201D; to construct <xref ref-type="fig" rid="F1">Figure 1B</xref> using GraphPad Prism 8 software<sup><xref ref-type="fn" rid="footnote11">11</xref></sup>. We processed GISAID FASTA genomes in a similar manner. We preprocess a single GISAID genome collection with SeqKit (<xref ref-type="bibr" rid="B74">Shen et al., 2016</xref>) to decompose a single FASTA file into individual FASTA files, each file containing a single genome. Then, we aligned every FASTA genome against SARS-CoV-2 reference genome (NC_045512.2) using Minimap2 aligner with preset -ax asm5 (<xref ref-type="bibr" rid="B40">Li, 2018</xref>) and performed variant calling on each BAM file using Freebayes variant caller with &#x2013;min-alternate-count 1 (C 1) option (see text footnote 8), outputting variants in VCF format. With these operations, we obtained major frequency viral variants in VCF format from each FASTA genome. Then, we aggregated variants into a single VCF file, as described with Jacquard. We constructed <xref ref-type="fig" rid="F1">Figure 1B</xref> graph by using variants per genome logfile, reported in the output file &#x201C;logfile_variants_GISAID_freebayes&#x201D; and imputed into the GraphPad Prism 8 software. We filtered out highly homoplasic sites from merged variant calls, as already reported to be frequent in SARS-CoV-2 sequencing see: <ext-link ext-link-type="uri" xlink:href="https://virological.org/t/issues-with-sars-cov-2-sequencing-data/473">https://virological.org/t/issues-with-sars-cov-2-sequencing-data/473</ext-link>. All these computational analyses are described here: <ext-link ext-link-type="uri" xlink:href="https://github.com/cfarkas/SARS-CoV-2-freebayes">https://github.com/cfarkas/SARS-CoV-2-freebayes</ext-link> (case examples I and II, respectively).</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption><p>SARS-CoV-2 genome analysis reveals inter-host diversity that in extreme cases leads to hypermutation. <bold>(A)</bold> Pipeline overview to process GISAID FASTA genome sequences and Sequence Read Archive datasets. Both inputs are aligned to SARS-CoV-2 reference genome (wuhCor1) and variant calling is performed to obtain a single population-aware VCF file suitable for downstream genetic analysis. Streamlined bioinformatic tools are depicted with blue letters. <bold>(B)</bold> Major viral frequency variants (via a consensus calling approach) for 17,560 next generation sequencing (NGS) datasets downloaded from SRA, separated by non-outliers (<italic>n</italic> = 17,200) and outliers (<italic>n</italic> = 360, <italic>Q</italic> = 1%, Grubbs&#x2019;s test). Outlier number and mean of variants are depicted at left. <bold>(C)</bold> Same as B for 229,124 SARS-CoV-2 GISAID genomes, separated by non-outliers (<italic>n</italic> = 228,093) and outliers (<italic>n</italic> = 143, <italic>Q</italic> = 10%, Grubbs&#x2019;s test). Outlier number and mean of variants are depicted at left. <bold>(D)</bold> IGV snapshots of outliers and non-outlier NGS samples from C. Outlier samples are depicted with black arrows, exceeding number of variants from non-outliers. Single nucleotide polymorphisms are depicted in red if nucleotide differs from the reference sequence by greater or equal to 50% of quality weighted reads. <bold>(E)</bold> Q20 statistics obtained with SeqKit program for mapped reads of 374 NGS datasets from Spain, 215 NGS datasets from United States, 397 NGS datasets from Australia and 360 outlier NGS datasets from SRA. Percentages are depicted in the <italic>y</italic>-axis. <bold>(F)</bold> Nucleotide change frequencies from 17,200 SRA NGS aggregated variants (non-outliers, left) and from 360 aggregated outlier variants (right), both annotated with SnpEff program. Frequency boxes are colored from white to dark red as number of changes increases. <bold>(G)</bold> (Upper) Pie charts depicting missense/silent ratios registered in the non-outliers and outlier NGS samples. Values are denoted as percentages and the total number of variants are denoted in the bottom of the graphs. (Lower) Same as upper, for transitions/transversions ratios (Ts/Tv). <bold>(H)</bold> Correlation between Average nucleotide diversity (&#x03C0;) provided by inStrain program and SNV counts (VF &#x003E; 10%) for Spain (<italic>n</italic> = 374, left), United States (<italic>n</italic> = 215, middle) and Australian NGS samples (<italic>n</italic> = 397, right). In the three countries, the two variables tend to increase together (see <italic>r</italic>-values of Spearman correlation analyses). <bold>(I)</bold> Proposed model of how APOBEC3G and ADAR complex (with minor contributions) can lead to hypermutation of SARS-CoV-2 (C &#x003E; U and A &#x003E; G editing) accompanied by intra-host diversity, homoplasies and increased transversions. In the majority of infections, it is probable that micro diversity is maintained at low frequencies due the action of the virus error correction machinery.</p></caption>
<graphic xlink:href="fmicb-12-665041-g001.tif"/>
</fig>
</sec>
<sec id="S2.SS3">
<title>Variant Visualization</title>
<p>We used the Integrative Genomics Viewer (IGV) software<sup><xref ref-type="fn" rid="footnote12">12</xref></sup> to visualize next generation sequencing alignments in bam format (<xref ref-type="bibr" rid="B72">Robinson et al., 2011</xref>, <xref ref-type="bibr" rid="B71">2017</xref>; <xref ref-type="bibr" rid="B84">Thorvaldsdottir et al., 2013</xref>). To visualize major viral frequency variants, the variant frequency threshold was set at 0.49.</p>
</sec>
<sec id="S2.SS4">
<title>SnpEff Annotation</title>
<p>We annotated merged variants from GISAID genomes (<italic>n</italic> = 229,124) using a repurposed version of SnpEff program, available in the Galaxy server (<xref ref-type="bibr" rid="B24">Giardine et al., 2005</xref>; <xref ref-type="bibr" rid="B7">Cingolani et al., 2012</xref>; <xref ref-type="bibr" rid="B1">Afgan et al., 2018</xref>). We parsed the resulting annotated VCF file using the SnpEff_processing.sh script, available here: <ext-link ext-link-type="uri" xlink:href="https://github.com/cfarkas/SARS-CoV-2-freebayes/blob/master/SnpEff_processing.sh">github.com/cfarkas/SARS-CoV-2-freebayes/blob/master/SnpEff_processing.sh</ext-link>. Aminoacid change chart related from <xref ref-type="fig" rid="F2">Figure 2D</xref> is available as SnpEff HTML output here: <ext-link ext-link-type="uri" xlink:href="https://usegalaxy.org/u/carlosfarkas/h/snpeffsars-cov-2">https://usegalaxy.org/u/carlosfarkas/h/snpeffsars-cov-2</ext-link>. All these computational analyses are described here: <ext-link ext-link-type="uri" xlink:href="https://github.com/cfarkas/SARS-CoV-2-freebayes">https://github.com/cfarkas/SARS-CoV-2-freebayes</ext-link> (case example III).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption><p>Non-neutral codon changes actively shape evolution of several SARS-CoV-2 proteins. <bold>(A)</bold> Nucleotide change frequencies from 229,124 aggregated GISAD genome variants annotated with SnpEff program. Frequency boxes are colored from white to dark red as number of changes increase. <bold>(B)</bold> Same as A for 133 aggregated GISAID genomes, corresponding to GISAID outlier samples. Frequencies boxes are colored from white to dark red, as the number of changes increases. <bold>(C)</bold> Missense, non-sense, frameshift, and synonymous number of occurrences in 229,124 GISAID genomes. Significance of comparisons were assessed with Mann-Whitney test (&#x002A;&#x002A;&#x002A;&#x002A;<italic>P</italic> &#x003C; 0.0001, <sup><italic>ns</italic></sup><italic>P</italic> &#x003E; 0.05). <bold>(D)</bold> Plot of change frequencies across 20 amino acid changes in SARS-CoV-2. Changes are grouped in six categories and colored from light to dark red, according to the number of changes. <bold>(E)</bold> Spike protein mutant molecular dynamics. (Left) RMSD values (in nanometers, nm) of 20 ns of simulation of the wild-type N-terminal domain (A222) or mutant domain (V222). Residue positions of the domain respect to the full Spike protein is depicted in light blue. (Middle) Same as left for the wild-type RBD domain of the Spike protein (S477) or mutant RBD domain (N477). Residue positions of the domain with respect to the full Spike protein that is depicted in light blue. (Right) Same as left for the wild-type stalk domain trimmer (V1176) or the mutant domain containing Phenylalanine in position 1176 (F1176).</p></caption>
<graphic xlink:href="fmicb-12-665041-g002.tif"/>
</fig>
</sec>
<sec id="S2.SS5">
<title>Tajima&#x2019;s D and Nucleotide Diversity (&#x03C0;) Calculation</title>
<p>We estimated Tajima&#x2019;s D and nucleotide diversity (&#x03C0;) metrics by using vcftools program (<xref ref-type="bibr" rid="B12">Danecek et al., 2011</xref>) on every geographical region as follows: joint variant calls from 4,301, 12,000, 145,888, 47,683, 2,325 and 17211 GISAID FASTA genomes from Africa, Asia, Europe, North America, South America, and Oceania, respectively, were processed from the alignment to the variant calling step as described in &#x201C;GISAID FASTA dataset processing section.&#x201D; Then, we imputed merged variants from every geographical region into vcftools, specifying the &#x2013;haploid flag and setting a genome wide scan of 50 bp in length. We merged bins containing non missing values of Tajima&#x2019;s D and &#x03C0; into a single file and we further processed this file with our pi-tajima.sh script (available in our repository) to obtain bins with Tajima&#x2019;s D values outside 95% CI. All these computational analyses are described here: <ext-link ext-link-type="uri" xlink:href="https://github.com/cfarkas/SARS-CoV-2-freebayes">https://github.com/cfarkas/SARS-CoV-2-freebayes</ext-link> (case example IV).</p>
</sec>
<sec id="S2.SS6">
<title>Intra-Host Diversity and Low Frequency Viral Variants</title>
<p>We estimated nucleotide diversity in 397, 448 and 308 next generation sequencing (NGS) samples from Australia, Spain, and United States populations, respectively, by using aligned reads per sample in BAM format against SARS-CoV-2 reference genome. These BAM files were imputed in loop to InStrain program<sup><xref ref-type="fn" rid="footnote13">13</xref></sup> (<xref ref-type="bibr" rid="B61">Olm et al., 2020</xref>, <xref ref-type="bibr" rid="B62">2021</xref>), obtaining several outputs such as analysis of coverage, intra-host diversity, SNV linkage, and sensitive SNP detection. As recommended by inStrain, we analyzed only sequencing samples with sufficient breadth of coverage (&#x003E;0.9), resulting in 397, 374 and 216 NGS samples from Australia, Spain and from United States, respectively. The list of the NGS samples in the three populations, including the referred calculations, are detailed in the spreadsheet inStrain_results.xlsx, available here: <ext-link ext-link-type="uri" xlink:href="https://github.com/cfarkas/SARS-CoV-2-freebayes">https://github.com/cfarkas/SARS-CoV-2-freebayes</ext-link>. We correlated in each country the number of variants with viral frequency &#x003E;5% against the nucleotide diversity (&#x03C0;) by using Spearman correlation. Spearman&#x2019;s correlation coefficients (r) and confident <italic>p</italic>-values (P, to discard random sampling) were calculated in GraphPad Prism 8. The significance thresholds were as follows: <italic>P</italic> &#x003C; 0.05<sup>&#x2217;</sup>, <italic>P</italic> &#x003C; 0.01<sup>&#x2217;&#x2217;</sup>, <italic>P</italic> &#x003C; 0.001<sup>&#x2217;&#x2217;&#x2217;</sup>, <italic>P</italic> &#x003C; 0.0001<sup>&#x002A;&#x002A;&#x002A;&#x002A;</sup>, <italic>P</italic> &#x003E; 0.05 ns.</p>
</sec>
<sec id="S2.SS7">
<title>Molecular Dynamics Simulations</title>
<p>We conducted molecular dynamics simulations of variants A222V (N-terminal of SARS-CoV-2 residues 1&#x2013;316), S477N (RBM domain, residues 331&#x2013;530) and V11766F (stalk domain trimmer, residues 1,130&#x2013;1,273). The full Spike protein trimmer was obtained from I-TASSER and variants were modeled by using Foldx5, as previously described in the Free energy estimation calculations section (&#x2013;command = BuildModel, first outputted model). We simulated wild-type and mutants structures to molecular dynamics by using GROMACS/2020.3 version, in gpu mode<sup><xref ref-type="fn" rid="footnote14">14</xref></sup> (<xref ref-type="bibr" rid="B86">Van Der Spoel et al., 2005</xref>; <xref ref-type="bibr" rid="B37">Kutzner et al., 2015</xref>).</p>
<p>The xvg file records per picosecond were used to plot graphs from <xref ref-type="fig" rid="F3">Figure 3C</xref>, on GraphPad Prism 8 software. PDB, solvated molecules (.gro) and correspondent compressed gromacs trajectories (with or without periodic border conditions) are available here : <ext-link ext-link-type="uri" xlink:href="https://usegalaxy.org/u/carlosfarkas/h/sars-cov-2-proteins-and-trayectories">https://usegalaxy.org/u/carlosfarkas/h/sars-cov-2-proteins-and-trayectories</ext-link>. Detailed commands to obtain these trajectories are available in <xref ref-type="supplementary-material" rid="DS1">Supplementary File 1</xref>.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption><p><bold>(A)</bold> Worldwide distribution of Tajima&#x2019;s D values versus nucleotide diversity (&#x03C0;) for 595 bins of 50 bp in length derived from 229,124 GISAID genomes. The dashed lines correspond to the upper (97.5%) and lower (2.5%) percentiles of the empirical distribution of Tajima&#x2019;s D for each bin. Genes containing the top three most extreme outlier bins are depicted in blue (in the upper 97.5th percentile) and red (in the lower 2.5th percentile). Also, genes containing the top three most diverse bins are depicted in purple. <bold>(B)</bold> Number of variants per gene derived from bins in the 97.5th and 2.5th percentile of the empirical distribution of Tajima&#x2019;s D values from A, respectively. <bold>(C)</bold> Parts-of-whole plot of non-sense (Stop Codon) frequencies with worldwide viral frequencies over or equal to 1% from 229,124 GISAID genomes until November 30, 2020. ORF8 stop codons are depicted in a range of red colors, ORF3a stop codons are depicted with a range of purple colors, and ORF7a stop codons are depicted with a range of green colors. <bold>(D)</bold> Variants per gene derived from bins in the 2.5th and 97.5th percentile of the empirical distribution of Tajima&#x2019;s D values, derived from African (<italic>n</italic> = 4301), Asian (<italic>n</italic> = 11,986), Oceanic (<italic>n</italic> = 17,211), North American (<italic>n</italic> = 47,658), South American (<italic>n</italic> = 2325), and European (<italic>n</italic> = 145,884). GISAID genomes, respectively. (Left) Plot of genes containing bins in the 97.5th percentile of the empirical distribution of Tajima&#x2019;s D from GISAID genomes in every geographical region, until November 30, 2020. (Right) Plot of genes containing bins in the 2.5th percentile of the empirical distribution of Tajima&#x2019;s D from GISAID genomes in every geographical region, until November 30, 2020. <bold>(E)</bold> Viral frequencies (as percentages) arranged per gene from variants contained in the VOC genomes submitted in GISAID until January 17th, 2021. <bold>(F)</bold> Same plot as <bold>(A)</bold> for GISAID genomes from the B.1.1.7 lineage submitted in GISAID until January 07, 2021 (left, 9194 genomes), January 17, 2021 (middle, 18,101 genomes), and January 27, 2020 (right, 36,308 genomes).</p></caption>
<graphic xlink:href="fmicb-12-665041-g003.tif"/>
</fig>
</sec>
<sec id="S2.SS8">
<title>Statistical Analysis</title>
<p>All statistical analyses were carried out by using GraphPad Prism 8 software (see text footnote 11). A Mann-Whitney test was used to account for the non-normality of the data. The significance thresholds were the following: <italic>P</italic> &#x003C; 0.05<sup>&#x2217;</sup>, <italic>P</italic> &#x003C; 0.01<sup>&#x2217;&#x2217;</sup>, <italic>P</italic> &#x003C; 0.001<sup>&#x2217;&#x2217;&#x2217;</sup>, <italic>P</italic> &#x003E; 0.05 ns. We interpreted the Spearman non-parametric correlation analyses as follows: perfect correlation (<xref ref-type="bibr" rid="B15">Dong et al., 2020</xref>), the two variables tend to increase or decrease together (0&#x2013;1), the two variables do not vary together at all (0), one variable increase as the other decreases (&#x2212;1&#x2013;0), and perfect inverse correlations (&#x2212;1). Since all correlations were calculated using more than 17 observations, <italic>p</italic>-values were computed using a normal approximation. We employed robust regression and outlier removal (ROUT) method (<xref ref-type="bibr" rid="B51">Motulsky and Brown, 2006</xref>) to remove outliers from data, with a strict false discovery ratio (Q = 10 and 1%).</p>
</sec>
</sec>
<sec id="S3">
<title>Results</title>
<sec id="S3.SS1">
<title>SARS-CoV-2 Genome Analysis Reveals Inter-Host Diversity That in Extreme Cases Leads to Hypermutation</title>
<p>To determine the degree of inter-host viral variation worldwide, we downloaded and analyzed 17,560 next-generation sequencing datasets from Sequence Read Archive (SRA) submitted since the beginning of the pandemic until July 28, 2020 (<xref ref-type="supplementary-material" rid="TS1">Supplementary Table 1</xref>) and 229,124 SARS-CoV-2 genome sequences available in the GISAID database up until November 30, 2020 (<xref ref-type="supplementary-material" rid="TS2">Supplementary Tables 2</xref>, <xref ref-type="supplementary-material" rid="TS3">3</xref>, respectively). We inputted both datasets in out pipeline to obtain a single population-aware VCF file for subsequent genetic analyses. Variant calls were performed on each individual dataset and all resulting individual VCF files were merged, obtaining a single population-aware VCF file with calculated viral frequencies (see pipeline scheme in <xref ref-type="fig" rid="F1">Figure 1A</xref>). Also, we accounted for sequencing artifacts and known homoplasies occurring in SARS-CoV-2 genomes due errors in sequencing and/or adaptor contamination were subtracted from these calls, as described here: <ext-link ext-link-type="uri" xlink:href="https://virological.org/t/issues-with-sars-cov-2-sequencing-data/473">https://virological.org/t/issues-with-sars-cov-2-sequencing-data/473</ext-link>. We benchmarked our pipeline with the pangolin pipeline for lineage reconstruction and Single Nucleotide Polymorphisms (SNP) detection, revealing good agreement on SNP detection between both (&#x003E;95%), but also pipeline also accounted for the detection of Multi-Nucleotide polymorphisms (MNPs), Insertions/deletions and complex variants as well (<xref ref-type="supplementary-material" rid="FS1">Supplementary Figure 1</xref>). SARS-CoV-2 genomes from GISAID accounted for the presence of major viral frequency variants (via a consensus calling approach) compared to the Wuhan-Hu-1 genome assembly (wuhCor1) and the next-generation sequencing datasets (NGS) also allowed us to analyze intra-host diversity given the depth of sequencing. Until July 28, 2020, NGS datasets contained on average 7&#x2013;8 viral variants with major alleles per genome (viral frequency &#x003E; 0.5) (see &#x201C;mean&#x201D; in <xref ref-type="fig" rid="F1">Figure 1B</xref>). As expected, GISAID datasets until November 30, 2020 contained more variants per genome on average because these sequences span more time and more variants and were fixed in SARS-CoV-2 over time [(<xref ref-type="bibr" rid="B30">Hourdel et al., 2020</xref>; <xref ref-type="bibr" rid="B85">Tyson et al., 2020</xref>) variants per genome, see &#x201C;mean&#x201D; in <xref ref-type="fig" rid="F1">Figure 1C</xref>]. The distribution from both sources also identified outliers with more than 18 viral variants per genome in NGS samples and more than 37 variants per genome in GISAID FASTA genomes (2 and 0.05% in SRA and GIDAID sequencing datasets, see &#x201C;outliers&#x201D; in <xref ref-type="fig" rid="F1">Figures 1B,C</xref>, respectively, Q = 1 and 10%, Grubbs&#x2019;s test). Integrative genomics viewer (IGV) snapshots of outlier samples from Spain, United States and Australian sequencing datasets clearly show hypermutability to varying degrees (viral frequency &#x003E; 0.49, see samples with black arrows, <xref ref-type="fig" rid="F1">Figure 1D</xref>). Australian outlier samples represent an extreme case of hypermutability (see <xref ref-type="fig" rid="F1">Figure 1D</xref>, bottom). Over 90% of the mapped reads against SARS-CoV-2 genome from the latter NGS datasets, including the outliers, contained phred quality scores of Q20 (99% base call accuracy), ensuring that the variant calling was reliable on these datasets and the variations registered are not due to sequencing errors (see <xref ref-type="fig" rid="F1">Figure 1E</xref>). 16,307 aggregated variants from SRA datasets reflect that the most recurrent single nucleotide substitutions occurring in all genomes from SRA repository are enriched in C &#x003E; U (C &#x003E; T) transitions and G &#x003E; U (G &#x003E; T) transversions, changes already reported for SARS-CoV-2 and MERS-CoV genomes (<xref ref-type="bibr" rid="B76">Simmonds, 2020</xref>). As previously described, the C &#x003E; U (C &#x003E; T) changes are likely elicited by APOBEC deaminases (<xref ref-type="bibr" rid="B13">Di Giorgio et al., 2020</xref>; <xref ref-type="fig" rid="F1">Figure 1F</xref>, left). This observation also applies for genomes containing an outlier number of variants from SRA, with the exception of A &#x003E; G transitions, which are caused by the ADAR editing enzyme (<xref ref-type="bibr" rid="B52">Mourier et al., 2020</xref>; <xref ref-type="fig" rid="F1">Figure 1F</xref>, right). Strikingly, most of the nucleotide substitutions harboring outlier samples from <xref ref-type="fig" rid="F1">Figure 1C</xref> correspond to missense/non-sense variants rather than silent variants that are enriched in transversion changes, since in outlier samples the raw Transition/Transversion (Ts/Tv) ratio is near one and in non-outlier samples is 4.6 (<xref ref-type="fig" rid="F1">Figure 1G</xref>). The amount of observed transversions in outlier samples correlates with the missense/non-sense vs. silent ratio observed in outliers, since transversions in viruses cause more detrimental changes than transitions (<xref ref-type="bibr" rid="B48">Lyons and Lauring, 2017</xref>). We chose three SARS-CoV-2 next generation sequencing datasets submitted by one single submitter with <italic>n</italic> &#x003E; 200 samples to estimate intra-host nucleotide diversity occurring in 397, 374, and 215 next generation sequencing samples from Australia, Spain, and United States populations, respectively, using aligned reads per sample against the SARS-CoV-2 reference genome. This calculation has been already validated to capture intra-host viral diversity, overcoming sequencing errors (<xref ref-type="bibr" rid="B56">Nelson and Hughes, 2015</xref>). In the three populations, average nucleotide diversity positively correlates with the number of Single Nucleotide Variants (SNVs) with viral frequencies over 10% (Spearman correlation, <italic>r</italic>-values from 0.24 to 0.44, <italic>P</italic> &#x003C; 0.0001). The latter supports the existence of intra-host minor variants and therefore SARS-CoV-2 quasi-species, coexisting within the same host (<xref ref-type="bibr" rid="B49">Miralles et al., 1999</xref>; <xref ref-type="bibr" rid="B93">Wright et al., 2011</xref>; <xref ref-type="bibr" rid="B14">Domingo et al., 2012</xref>; <xref ref-type="bibr" rid="B59">Ni et al., 2016</xref>); <xref ref-type="fig" rid="F1">Figure 1H</xref>). As previously described, we hypothesized SARS-CoV-2 normally evolve by the action of APOBEC3G-mediating RNA editing (C &#x003E; U) (<xref ref-type="bibr" rid="B13">Di Giorgio et al., 2020</xref>) including G &#x003E; U and A &#x003E; G changes in less extent, exerted by guanine-to-oxoguanine ROS-mediated generation and ADAR editing, respectively (<xref ref-type="bibr" rid="B52">Mourier et al., 2020</xref>). Conversely, hypermutants are mainly fueled by a higher intra-host diversity and homoplasies (different viral lineages emerged after the infection), reflected in more transversion changes that probably are maintained at low frequency in most SARS-CoV-2 infections, due the virus error correction machinery (<xref ref-type="fig" rid="F1">Figure 1I</xref>). Taking together, and in agreement with others (<xref ref-type="bibr" rid="B88">van Dorp et al., 2020</xref>), we propose that the human host&#x2019;s immune system substantially contributes to shaping SARS-CoV-2 genetic diversity, as evidenced in three distant population cohorts. Although intra-host diversity is probably one of the main sources of SARS-CoV-2 evolution, this is accompanied by RNA-editing at different levels, with SARS-CoV-2 RNA hypermutation as an extreme case of the latter, occurring in less than 2% of COVID-19 patients. This mechanism is predicted to inactivate the virus and is likely caused by host defense mechanisms involving higher RNA-editing as C &#x003E; T (C &#x003E; U) transitions and increased transversions, as frequent signatures observed in hypermutant genomes.</p>
</sec>
<sec id="S3.SS2">
<title>Non-neutral Amino Acid Changes Actively Shape Evolution of Several SARS-CoV-2 Proteins</title>
<p>We next analyzed all inter-host major viral alleles occurring in SARS-CoV-2 genomes worldwide, by using the GISAID consensus called variants. 39,036 aggregated variants from GISAID genomes submitted until November 30, 2020 demonstrate that overall, A &#x003E; G and T &#x003E; C changes are the two most predominant nucleotide changes, over the referred C &#x003E; U and G &#x003E; U changes seen in NGS merged variants (see <xref ref-type="fig" rid="F2">Figure 2A</xref> vs. <xref ref-type="fig" rid="F1">Figure 1F</xref>). These changes were also present in GISAID samples with an outlier number of variants per genome, but A &#x003E; G changes are not predominant in outlier samples, as seen with NGS outliers (<xref ref-type="fig" rid="F2">Figure 2B</xref>). To deduce amino acid changes as consequences of these nucleotide changes, we analyzed nucleotide changes occurring in the aggregated GISAID variants and we predicted its consequences by using SnpEff (<xref ref-type="bibr" rid="B7">Cingolani et al., 2012</xref>), a program to annotate variants in VCF format available in the Galaxy server<sup><xref ref-type="fn" rid="footnote15">15</xref></sup>. Occurrences per variant type demonstrate that missense and synonymous variant occurrences are more frequent compared to frameshift/non-sense variant occurrences per genome, and non-sense variants surpass frameshift variants (<xref ref-type="fig" rid="F2">Figure 2C</xref>). Amino acid change analysis demonstrates frequent threonine (Thr &#x003E; Ile), valine (Val &#x003E; Ile), leucine (Leu &#x003E; Phe) and alanine changes (Ala &#x003E; Ser, Ala &#x003E; Thr and Ala &#x003E; Val), respectively (<xref ref-type="fig" rid="F2">Figure 2D</xref>). Ala &#x003E; Val, Thr &#x003E; Ile and Leu &#x003E; Phe changes are sustained by the C &#x003E; U (C &#x003E; T) transitions in the second position of the Thr and Ala codons, and the first position of the Leu codon, respectively. Val &#x003E; Ile and Ala &#x003E; Thr is caused by the A &#x003E; G transition change in the first position of the valine codon and alanine codon, respectively. Other frequent changes, Lys &#x003E; Asn and Glu &#x003E; Asp are explained in part by G &#x003E; C and/or G &#x003E; T transversions. A priori, the previous nucleotides signatures are reflected in non-neutral SARS-CoV-2 amino acid changes that can affect SARS-CoV-2 protein structures and tend to be detrimental in terms of energetic changes, as previously demonstrated (<xref ref-type="bibr" rid="B66">Portelli et al., 2020</xref>). We performed several SARS-CoV-2 molecular dynamics simulations to see if these amino acid changes affect viral protein free energy trajectories. Two spike protein substitutions from European outbreaks containing Ala &#x003E; Val (A222V, viral frequency = 17%) and Ser &#x003E; Asn (S477N, viral frequency = 6.2%) including one Val &#x003E; Phe substitution from a Brazilian outbreak (V1176F, viral frequency = 0.22%) readily changed Spike protein&#x2019;s free energy-based motility to varying extents. The A222V change is predicted to decrease the motility of the N-terminal of Spike protein (NTD), while the S477N and V1176F variants are predicted to increase the motility of the Receptor Binding Domain (RBD) and Stalk domain of the Spike protein, respectively (<xref ref-type="fig" rid="F2">Figure 2E</xref>). Previously, it has been shown that S477N slightly improves the folding of the Spike protein and the fitness of RBD-ACE2 binding (<xref ref-type="bibr" rid="B77">Starr et al., 2020</xref>) and more flexibility in the NTD could help to bind ACE2 receptor. Taken together, non-neutral amino acid changes in SARS-CoV-2 can change viral protein motility and might confer improved fitness to the virus, as appears to be the case of the S477N variant. In conclusion, we demonstrated a great diversity of changes occurring in SARS-CoV-2 with completely different outcomes in the Spike protein, as an example. Since many Spike protein variants are now being characterized in the laboratory with phenotypic characterization (<xref ref-type="bibr" rid="B77">Starr et al., 2020</xref>; <xref ref-type="bibr" rid="B90">Weisblum et al., 2020</xref>), it is important to integrate these studies with genomics data in real-time.</p>
</sec>
<sec id="S3.SS3">
<title>SARS-CoV-2 3&#x2032; Untranslated Region (UTR) Is Evolving and Accumulating Greater Diversity</title>
<p>To gain insights into SARS-CoV-2 nucleotide variation in a population context, we divided SARS-CoV-2 genomes in 50 bp sequence bins and performed sliding window analysis to identify viral regions with skewing in viral frequency distribution toward low/rare frequency alleles using Tajima&#x2019;s D statistic, a population genetics test to determine if these regions are evolving randomly or not (<xref ref-type="bibr" rid="B81">Tajima, 1989</xref>). Values of D that fell outside the middle 95% of the empirical distribution were considered potential outliers. Also, we calculated nucleotide diversity &#x03C0; in these bins and compared both values (see pi-tajima.sh script in out repository). Until November 30, 2020, the empirical distribution of Tajima&#x2019;s D across Africa, Asia, Oceania, Europe, North America, and South America demonstrate consistent low nucleotide diversity in SARS-CoV-2 across bins, and negative Tajima&#x2019;s D values (see black dots in <xref ref-type="supplementary-material" rid="FS2">Supplementary Figure 2</xref>). This is consistent with a viral population expansion and the inclusion of rare variants across SARS-CoV-2 genomes, as already reported (<xref ref-type="bibr" rid="B47">Liu et al., 2020</xref>). Outlying Tajima&#x2019;s D values remain negative in all cases and bins 29,700 (region 29,650&#x2013;29,700) and 29,750 (region 29,700&#x2013;29,750) corresponding to the region ORF10-3&#x2032;UTR are frequent outliers from the empirical distribution (smaller than the 2.5% percentile). Also, regions with high nucleotide diversity but not extreme values of Tajima&#x2019;s D often span RNA-dependent RNA polymerase, Nucleocapsid and 3&#x2032;&#x2013;5&#x2032; exonuclease genes (see purple dots in <xref ref-type="supplementary-material" rid="FS2">Supplementary Figure 2</xref>). Overall, in every geographical region, bins containing most rare viral alleles outside 2.5% percentile of Tajima&#x2019;s D values tend to accumulate toward 3&#x2032;UTR of SARS-CoV-2 and not toward 5&#x2032;UTR of the virus, specifically from ORF3a until the end of SARS-CoV-2 virus (<xref ref-type="fig" rid="F3">Figure 3A</xref>). Consistent with the latter, ORF3a and 3&#x2032;UTR regions are outliers (smaller than the 2.5% percentile) from a worldwide perspective. Of notice, bin 29750 (3&#x2032;UTR), is an extreme outlier with a lower Tajima&#x2019;s D value and higher nucleotide diversity compared with the rest of the bins (<xref ref-type="fig" rid="F3">Figure 3B</xref>). The latter region corresponds to the highly conserved stem loop of SARS-CoV-2 (s2m, region: 29728-29768 Coronavirus 3&#x2032; stem loop II like-motif), conserved among coronavirus (<xref ref-type="bibr" rid="B91">Williams et al., 1999</xref>; <xref ref-type="bibr" rid="B83">Tengs and Jonassen, 2016</xref>) and essential for replication in other coronaviruses (<xref ref-type="bibr" rid="B31">Hsue et al., 2000</xref>; <xref ref-type="bibr" rid="B25">Goebel et al., 2004</xref>). Conversely, a bin corresponding with the 5&#x2032;UTR of SARS-CoV-2 () present similar nucleotide diversity but not an extremely low Tajima&#x2019;s D values as observed in the 3&#x2032; UTR regions (<xref ref-type="fig" rid="F3">Figure 3B</xref>, purple dots). Overall, genes toward the 5&#x2032;UTR in SARS-CoV-2 present the higher Tajima&#x2019;s D values such as non-structural proteins nsp3, nsp4 the RNA-dependent RNA polymerase gene and the spike protein, among others. Interestingly, genes toward the 3&#x2032;UTR of SARS-CoV-2 except for the E, M and ORF7b genes have the lowest Tajima&#x2019;s D values, supporting that these genes are prone to accumulate rare viral alleles (<xref ref-type="fig" rid="F3">Figure 3C</xref>). Until November 30, 2020 most of the non-sense variants in SARS-CoV-2 show variant accumulation in the ORF8 gene, as previously described, and suggest that ORF8 is dispensable for SARS-CoV-2 transmission (<xref ref-type="bibr" rid="B64">Pereira, 2020</xref>; <xref ref-type="fig" rid="F3">Figure 3D</xref>). Regarding the latter, an outbreak in Singapore (45 genomes) contained a large 382-nucleotide deletion that truncated ORF7b and ablated ORF8 expression, but the transmission failed to continue (<xref ref-type="bibr" rid="B79">Su et al., 2020</xref>). The emergent SARS-CoV-2 B.1.1.7 lineage in United Kingdom contained at least 21 non-synonymous substitutions including a stop codon in the ORF8 gene (Q27<sup>&#x2217;</sup>, <xref ref-type="fig" rid="F3">Figure 3E</xref>) and is constantly increasing its worldwide viral frequency in GISAID database since the beginning of November 30, 2020 until the time of writing of this manuscript (<xref ref-type="bibr" rid="B8">Claro et al., 2021</xref>; <xref ref-type="bibr" rid="B21">Galloway et al., 2021</xref>). Regardless the fact that all genomes from B.1.1.7 lineage contain the Q27<sup>&#x2217;</sup> variant, around 14% of these genomes contain another downstream stop codon, Q68<sup>&#x2217;</sup> (<xref ref-type="fig" rid="F3">Figure 3E</xref>), confirming that ORF8 is prone to accumulate non-sense variants and B.1.1.7 lineage transmits successfully without expression of ORF8. ORF8 from SARS-CoV-2 has been shown to accumulate in the endoplasmic reticulum (ER) and activate an ER-mediated stress response that cause immune evasion via downregulation of the expression of interferon beta (<xref ref-type="bibr" rid="B69">Rashid et al., 2021</xref>) and the major histocompatibility complex I (<xref ref-type="bibr" rid="B99">Zhang Y. et al., 2020</xref>). Also, along with ORF3b, it is responsible for initiating an early antibody response in the host (<xref ref-type="bibr" rid="B28">Hachim et al., 2020</xref>). Thus, early neutralizing antibody responses in infections with SARS-CoV-2 B.1.1.7 lineage could be impaired (<xref ref-type="bibr" rid="B78">Sterlin et al., 2021</xref>) causing potential immune evasion (<xref ref-type="bibr" rid="B54">Neches et al., 2021</xref>). The population structure of lineage B.1.1.7 over time is similar to those observed worldwide with respect to Tajima&#x2019;s D and &#x03C0; values. The genes that show the greatest diversity include the helicase and ORF8 genes and not those encoding for the nucleocapsid and Spike proteins. Notably, over time, the bin 29700 (ORF10-3&#x2032;UTR) arose as an outlier among Tajima&#x2019;s D values but not regions in the 5&#x2032;UTR region of SARS-CoV-2 virus, as seen in the regional and worldwide population structure analysis of SARS-CoV-2 (<xref ref-type="fig" rid="F3">Figure 3F</xref>).</p>
<p>The Tajima&#x2019;s D-&#x03C0; combined graphs presented in this manuscript can be also useful to track the most diverse regions of SARS-CoV-2 and may challenge primer binding design strategies and test sensitivity (<xref ref-type="bibr" rid="B63">Osorio and Correia-Neves, 2020</xref>). We intersected worldwide Tajima&#x2019;s D and &#x03C0; values against common primers used in qPCR testing. Among bins with Tajima&#x2019;s D values lower than the 2.5% percentile of the empirical distribution, regions 28286&#x2013;28306, 28308&#x2013;28332, and 28334&#x2013;28358 of SARS-CoV-2 intersect with CDC primers: 2019-nCoV_N1_Forward_Primer, 2019-nCoV_N1_Probe and 2019-nCoV_N1_Reverse_Primer, respectively (<xref ref-type="supplementary-material" rid="TS4">Supplementary Table 4</xref>). Since these regions are prone to accumulate rare alleles, it is possible that this fact can explain aspects of the false negative ratio of the SARS-CoV-2 test (<xref ref-type="bibr" rid="B17">Farkas et al., 2020</xref>; <xref ref-type="bibr" rid="B34">Khan and Cheung, 2020</xref>; <xref ref-type="bibr" rid="B92">Woloshin et al., 2020</xref>).</p>
<p>Taken together, several regions toward the 3&#x2032; region of the SARS-CoV-2 genome such as ORF3a, ORF8 and the 3&#x2032;UTR (specifically in the s2m stem loop) but not the 5&#x2032;UTR, contain an excess of low frequency variants relative to chance variation, evidenced by their outlying Tajima&#x2019;s D values (<xref ref-type="bibr" rid="B81">Tajima, 1989</xref>). This distinction also applies to the ORF7a/ORF7b genes, where regions of sequence variation in ORF7a register the lowest Tajima&#x2019;s D values, whereas these changes are not seen in ORF7b sequence. Thus, the Tajima&#x2019;s D-&#x03C0; graphs can be helpful to identify and track these regions over time. Our pipeline offers a straightforward way to collect SARS-CoV-2 variants, consolidate them under the VCF format, and further apply downstream variant annotation and/or evolutionary analysis to identify regions under active evolution.</p>
</sec>
</sec>
<sec id="S4">
<title>Discussion</title>
<p>In this study we aimed to analyze over 230,000 SARS-CoV-2 sequences deposited between GISAID and SRA databases within the first 11 months of this pandemic (up until the end of November 2020) by using our pipeline. We characterized the existence of intra-host viral hypermutation that results in an excessive number of variants per genome that occurs in less than 2% of SARS-CoV-2 sequences (<xref ref-type="fig" rid="F1">Figures 1A,B</xref>, respectively). This phenomenon was already described for HIV-1 virus <italic>in vivo</italic>, demonstrating that HIV-1 reverse transcriptase contributed only to 2% of mutations, and the majority was caused by host cytidine deaminases of the A3 family mediated editing (<xref ref-type="bibr" rid="B10">Cuevas et al., 2015</xref>). Since SARS-CoV-2 is subjected to this type of RNA edition (<xref ref-type="bibr" rid="B13">Di Giorgio et al., 2020</xref>), we propose that this enzymatic activity in combination with higher intra-host diversity contributes to SARS-CoV-2 overall diversity at a global level, leading to more than 39,000 major viral frequency variants within 229,000 GISAID genomes. In SARS-CoV-2 genomes, it has been proposed that the catalytic activity of APOBEC deaminases, adenosine deaminase acting on RNA proteins (ADAR), and reactive oxygen species (ROS), are the main drivers of SARS-CoV-2 variation (<xref ref-type="bibr" rid="B52">Mourier et al., 2020</xref>). The APOBEC-mediated C &#x003E; T (C &#x003E; U) transversion is substantially present both in hypermutants and non-hypermutant samples, suggesting APOBEC3G mediated RNA editing involvement, as previously reported in smaller sample sizes (<xref ref-type="bibr" rid="B13">Di Giorgio et al., 2020</xref>; <xref ref-type="bibr" rid="B76">Simmonds, 2020</xref>). Conversely, ADAR-mediated A &#x003E; G transversion is not substantially present in hypermutant genomes (<xref ref-type="fig" rid="F1">Figures 1E</xref>, <xref ref-type="fig" rid="F2">2A,B</xref>, respectively), arguing that ADAR-mediating RNA editing is not the main enzyme involved in the hypermutation mechanism. The hypermutated SARS-CoV-2 variant signature often contains transversions and non-sense variants that are predicted to inactivate several SARS-CoV-2 proteins, probably leading to an efficient mechanism of lethal mutagenesis to control viral spread (<xref ref-type="fig" rid="F1">Figure 1F</xref>). Consistent with this, transversions are known to be more detrimental than transitions (<xref ref-type="bibr" rid="B48">Lyons and Lauring, 2017</xref>) and G &#x003E; T and G &#x003E; C transversions are predominant overall in the GISAID genomes. The first transversion has been already reported for other RNA viruses such as Maize streak virus (<xref ref-type="bibr" rid="B87">van der Walt et al., 2008</xref>) and has been linked with the formation of 8-oxoguanine, known to be the most common cause of spontaneous G &#x003E; T (G &#x003E; U) transversions in RNA (<xref ref-type="bibr" rid="B45">Li et al., 2006</xref>). The second transversion and the excess of other transversion changes in hypermutants can be explained in part by a guanine oxidation product, imidazolone (<xref ref-type="bibr" rid="B35">Kino and Sugiyama, 2001</xref>). Thus, it is possible that an exacerbated innate immune response followed by inflammation (<xref ref-type="bibr" rid="B2">Birra et al., 2020</xref>; <xref ref-type="bibr" rid="B80">Taefehshokr et al., 2020</xref>) can lead to hypermutation; nevertheless, according to this study, this response is extremely limited in the population at the RNA level.</p>
<p>Although we found significant inter-host variation in SARS-CoV-2, neutral evolutionary theory predicts most of these variants as having no or neutral effects (<xref ref-type="bibr" rid="B26">Gojobori et al., 1990</xref>). Most of the amino acid changes in SARS-CoV-2 have already been characterized as energetically detrimental (<xref ref-type="bibr" rid="B66">Portelli et al., 2020</xref>), and we agree with this fact in terms of structural dynamics, since recurrent changes in the Spike protein are capable of increasing (S477N, V1176F) or decreasing (A222V) the molecular dynamics of certain domains of the protein, implying that SARS-CoV-2 proteins are prone to evolve over time by variant accumulation. Nevertheless, variants with minimal changes in fitness such as the mutation D614G in the early months of the pandemic (<xref ref-type="bibr" rid="B66">Portelli et al., 2020</xref>) shifts the S protein conformation toward an ACE2-binding fusion competent state (<xref ref-type="bibr" rid="B97">Yurkovetskiy et al., 2020</xref>), thereby increasing infectivity (<xref ref-type="bibr" rid="B65">Plante et al., 2020</xref>; <xref ref-type="bibr" rid="B98">Zhang L. et al., 2020</xref>). Hence, it is difficult to predict real drivers of SARS-CoV-2 evolution using structural analysis alone but is important to continuously track these changes in order to integrate this data with the increasing knowledge of SARS-CoV-2 variation obtained in the laboratory. This is the case of ORF8 impaired expression in VOC genomes and the probable detrimental consequences on the early antibody responses in the host (<xref ref-type="bibr" rid="B89">Wang et al., 2020</xref>; <xref ref-type="bibr" rid="B99">Zhang Y. et al., 2020</xref>; <xref ref-type="bibr" rid="B78">Sterlin et al., 2021</xref>). Since stop codons in ORF8 are increasingly emerging, it is important to track these changes in the future and perform additional studies with these viral variants concerning their ability to elucidate a full immune response.</p>
<p>Population genetics can offer a view of how SARS-CoV-2 is evolving and rapidly characterize novel outbreaks. In this manuscript, we have proposed as previously done in the field of population genetics (<xref ref-type="bibr" rid="B3">Biswas and Akey, 2006</xref>) to computationally implement the calculation of Tajima&#x2019;s D and &#x03C0; values across genome-wide scans of SARS-CoV-2 and estimate the empirical distribution of Tajima&#x2019;s D values to dissect viral regions outside 95% percentiles of Tajima&#x2019;s D. As proof of concept, until November 30, 2020 we observed an excess of rare viral alleles toward the 3&#x2032;UTR of SARS-CoV-2, with the most extreme case in the two regions of viral sequence belonging to 3&#x2032;UTR of the virus. One of these regions (29700&#x2013;29750) falls into the last stem loop of SARS-CoV-2 (s2m, region: 29728&#x2013;29768 Coronavirus 3&#x2032; stem loop II like-motif). It has been reported that s2m motif is highly conserved among coronavirus and unlikely to evolve due to this high degree of conservation (<xref ref-type="bibr" rid="B91">Williams et al., 1999</xref>; <xref ref-type="bibr" rid="B83">Tengs and Jonassen, 2016</xref>) and for its role in replication observed in other coronaviruses (<xref ref-type="bibr" rid="B31">Hsue et al., 2000</xref>; <xref ref-type="bibr" rid="B25">Goebel et al., 2004</xref>); nevertheless, at the beginning of the pandemic Australian genomes were reported to contain several variants in the s2m region, likely due to recombination events (<xref ref-type="bibr" rid="B96">Yeh and Contreras, 2020</xref>). We report that the accumulation of rare alleles in this region is frequently occurring worldwide and might represent two scenarios: this sequence was a recent acquisition in SARS-CoV-2 and is still in the adaptation phase within the host (<xref ref-type="bibr" rid="B82">Tengs et al., 2021</xref>); and it might represent a defense response exerted by the host, since this sequence has been demonstrated to be important in the viral replication process and proposed as a potential target for antivirals in SARS-CoV (<xref ref-type="bibr" rid="B57">Neuman et al., 2005</xref>; <xref ref-type="bibr" rid="B70">Robertson et al., 2005</xref>). Thus, it is important to track these changes as the pandemic continues to evolve.</p>
<p>In summary, we have presented potential molecular mechanisms that help researchers understand variation diversity fueled by natural selection in SARS-CoV-2, and we proposed a portable bioinformatic pipeline to collect viral variants, consolidate them as a single VCF format file and further calculate population genetic statistics to infer actively evolving SARS-CoV-2 regions. It is important to continuously track emergent viral variants with the bioinformatics tools developed and we hope these tools combined with others, can provide a bioinformatic platform for ongoing studies in SARS-CoV-2. We believe that this is an essential first step in identifying emergent forms of the virus but also underscore the need to perform structure-function based experiments of these variants using relevant preclinical <italic>in vivo</italic> models.</p>
</sec>
<sec id="S5">
<title>Data Availability Statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="supplementary-material" rid="DS1">Supplementary Material</xref>, further inquiries can be directed to the corresponding author/s.</p>
</sec>
<sec id="S6">
<title>Author Contributions</title>
<p>CF conceived of this study and performed all bioinformatics analysis and wrote the manuscript. AM performed mutant SARS-CoV-2 protein analysis and assist in biophysical studies. MT assisted in the statistical analysis, and data interpretation and manuscript writing. JH assisted study design, data interpretation and manuscript writing. All authors contributed to the article and approved the submitted version.</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
</body>
<back>
<fn-group>
<fn fn-type="financial-disclosure">
<p><bold>Funding.</bold> Powered@NLHPC: This research was partially supported by the supercomputing infrastructure of the NLHPC (ECM-02). This research was partially funded by research funding from the Research Manitoba and the CancerCare MB Research Foundation.</p>
</fn>
</fn-group>
<sec id="S8" sec-type="supplementary material"><title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fmicb.2021.665041/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fmicb.2021.665041/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.PDF" id="FS1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Supplementary Figure 1</label>
<caption><p>Benchmarking of detected variants between Pangolin and SARS-CoV-2-freebayes pipelines, using as input 229,162 GISAID FASTA genomes accessed until November 30, 2020. Venn diagram comparison between the Single Nucleotide Polymorphisms (SNPs) detected by the Pangolin pipeline (<xref ref-type="bibr" rid="B68">Rambaut et al., 2020</xref>) vs. SARS-CoV-2-freebayes pipeline. Both pipelines reconcile over 95% of detected SNPs from 229,162 GISAID genomes (see gray overlap). SARS-CoV-2-freebayes pipeline additionally detected Multi-Nucleotide polymorphisms (MNPs), Insertions, Deletions and Complex variants (Mixed) (see green circles, respectively). Number of variants are denoted with blue bold numbers.</p></caption>
</supplementary-material>
<supplementary-material xlink:href="Data_Sheet_2.PDF" id="FS2" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Supplementary Figure 2</label>
<caption><p>Empirical distribution of Tajima&#x2019;s D values across geographical regions until November 30, 2020. Worldwide distribution of Tajima&#x2019;s D values vs. nucleotide diversity (&#x03C0;) for bins of 50 bp in length derived from African (<italic>n</italic> = 4,301), Asian (<italic>n</italic> = 11,986), Oceanic (<italic>n</italic> = 17,211), North American (<italic>n</italic> = 47,658), South American (<italic>n</italic> = 2,325), and European (<italic>n</italic> = 145,884) GISAID genomes until November 30, 2020. The dashed lines correspond to the upper (97.5%) and lower (2.5%) percentiles of the empirical distribution of Tajima&#x2019;s D for each bin. In both percentiles, genes containing the top three or four most extreme outlier bins are depicted with numbers. Also, genes containing the top three or four most diverse bins are depicted in purple. 3&#x2032;UTR bins are depicted in red.</p></caption>
</supplementary-material>
<supplementary-material xlink:href="Table_1.XLSX" id="TS1" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Supplementary TABLE 1</label>
<caption><p>Sequencing metadata of 17560 Sequencing Read Archive (SRA) datasets downloaded until July 28, 2020. For every SRA dataset, we provided NCBI run accession, Assay type (indicates if amplicon, RNA-seq u other sequencing corresponds), sequencing size (bases, in nucleotides), Biosample accession ID, Center Name (depositor), release date, SRA study accession, BioProject and geographic location, among other information. The last column indicates the number of variants per sample (Major viral variants, viral frequency &#x003E; 0.5).</p></caption>
</supplementary-material>
<supplementary-material xlink:href="Table_2.XLSX" id="TS2" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Supplementary TABLE 2</label>
<caption><p>Sequencing metadata of 229,162 GISAID genomes downloaded until November 30, 2020. For every GISAID genome, we provided GISAID genome name, GISAID unique identifier (Accession ID), geographic location, host, sequencing technology, lineage, and clade fields, among other information.</p></caption>
</supplementary-material>
<supplementary-material xlink:href="Table_3.XLSX" id="TS3" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Supplementary TABLE 3</label>
<caption><p>Acknowledgments from sequencing laboratories and/or consortia associated with GISAID genomes, listed in <xref ref-type="supplementary-material" rid="TS1">Supplementary Table 2</xref>.</p></caption>
</supplementary-material>
<supplementary-material xlink:href="Data_Sheet_3.PDF" id="TS4" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Supplementary TABLE 4</label>
<caption><p>Intersection between common primers for SARS-CoV-2 testing against bins outside 2.5th percentiles of Tajima&#x2019;s D empirical distribution. Bins in the upper (97.5%) and lower (2.5%) percentiles of the empirical distribution of Tajima&#x2019;s D from worldwide merged variants (<italic>n</italic> = 39,035) until November 30, 2020 were intersected against common primers for qPCR testing (as bed).</p></caption>
</supplementary-material>
<supplementary-material xlink:href="Table_4.DOCX" id="DS1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Supplementary File 1</label>
<caption><p>Bioinformatic pipeline used for Molecular Dynamics Trajectory analysis implemented in this study.</p></caption>
</supplementary-material>
</sec>
<ref-list>
<title>References</title>
<ref id="B1"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Afgan</surname> <given-names>E.</given-names></name> <name><surname>Baker</surname> <given-names>D.</given-names></name> <name><surname>Batut</surname> <given-names>B.</given-names></name> <name><surname>van den Beek</surname> <given-names>M.</given-names></name> <name><surname>Bouvier</surname> <given-names>D.</given-names></name> <name><surname>Cech</surname> <given-names>M.</given-names></name><etal/></person-group> (<year>2018</year>). <article-title>The Galaxy platform for accessible, reproducible and collaborative biomedical analyses: 2018 update.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>46</volume> <fpage>W537</fpage>&#x2013;<lpage>W544</lpage>.</citation></ref>
<ref id="B2"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Birra</surname> <given-names>D.</given-names></name> <name><surname>Benucci</surname> <given-names>M.</given-names></name> <name><surname>Landolfi</surname> <given-names>L.</given-names></name> <name><surname>Merchionda</surname> <given-names>A.</given-names></name> <name><surname>Loi</surname> <given-names>G.</given-names></name> <name><surname>Amato</surname> <given-names>P.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>COVID 19: a clue from innate immunity.</article-title> <source><italic>Immunol. Res.</italic></source> <volume>68</volume> <fpage>161</fpage>&#x2013;<lpage>168</lpage>. <pub-id pub-id-type="doi">10.1007/s12026-020-09137-5</pub-id> <pub-id pub-id-type="pmid">32524333</pub-id></citation></ref>
<ref id="B3"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Biswas</surname> <given-names>S.</given-names></name> <name><surname>Akey</surname> <given-names>J. M.</given-names></name></person-group> (<year>2006</year>). <article-title>Genomic insights into positive selection.</article-title> <source><italic>Trends Genet.</italic></source> <volume>22</volume> <fpage>437</fpage>&#x2013;<lpage>446</lpage>. <pub-id pub-id-type="doi">10.1016/j.tig.2006.06.005</pub-id> <pub-id pub-id-type="pmid">16808986</pub-id></citation></ref>
<ref id="B4"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>L.</given-names></name> <name><surname>Zhong</surname> <given-names>L.</given-names></name></person-group> (<year>2020</year>). <article-title>Genomics functional analysis and drug screening of SARS-CoV-2.</article-title> <source><italic>Genes. Dis.</italic></source> <volume>7</volume> <fpage>542</fpage>&#x2013;<lpage>550</lpage>. <pub-id pub-id-type="doi">10.1016/j.gendis.2020.04.002</pub-id> <pub-id pub-id-type="pmid">32363223</pub-id></citation></ref>
<ref id="B5"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>S.</given-names></name> <name><surname>Zhou</surname> <given-names>Y.</given-names></name> <name><surname>Chen</surname> <given-names>Y.</given-names></name> <name><surname>Gu</surname> <given-names>J.</given-names></name></person-group> (<year>2018</year>). <article-title>fastp: an ultra-fast all-in-one FASTQ preprocessor.</article-title> <source><italic>Bioinformatics</italic></source> <volume>34</volume> <fpage>i884</fpage>&#x2013;<lpage>i890</lpage>.</citation></ref>
<ref id="B6"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chiara</surname> <given-names>M.</given-names></name> <name><surname>Zambelli</surname> <given-names>F.</given-names></name> <name><surname>Tangaro</surname> <given-names>M. A.</given-names></name> <name><surname>Mandreoli</surname> <given-names>P.</given-names></name> <name><surname>Horner</surname> <given-names>D. S.</given-names></name> <name><surname>Pesole</surname> <given-names>G.</given-names></name></person-group> (<year>2020</year>). <article-title>CorGAT: a tool for the functional annotation of SARS-CoV-2 genomes.</article-title> <source><italic>Bioinformatics</italic></source>, <volume>36</volume> <fpage>5522</fpage>&#x2013;<lpage>5523</lpage>.</citation></ref>
<ref id="B7"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cingolani</surname> <given-names>P.</given-names></name> <name><surname>Platts</surname> <given-names>A.</given-names></name> <name><surname>Wang le</surname> <given-names>L.</given-names></name> <name><surname>Coon</surname> <given-names>M.</given-names></name> <name><surname>Nguyen</surname> <given-names>T.</given-names></name> <name><surname>Wang</surname> <given-names>L.</given-names></name><etal/></person-group> (<year>2012</year>). <article-title>A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.</article-title> <source><italic>Fly (Austin)</italic></source> <volume>6</volume> <fpage>80</fpage>&#x2013;<lpage>92</lpage>. <pub-id pub-id-type="doi">10.4161/fly.19695</pub-id> <pub-id pub-id-type="pmid">22728672</pub-id></citation></ref>
<ref id="B8"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Claro</surname> <given-names>I. M.</given-names></name> <name><surname>da Silva Sales</surname> <given-names>F. C.</given-names></name> <name><surname>Ramundo</surname> <given-names>M. S.</given-names></name> <name><surname>Candido</surname> <given-names>D. S.</given-names></name> <name><surname>Silva</surname> <given-names>C. A. M.</given-names></name> <name><surname>de Jesus</surname> <given-names>J. G.</given-names></name><etal/></person-group> (<year>2021</year>). <article-title>Local transmission of SARS-CoV-2 lineage B.1.1.7, Brazil, December 2020.</article-title> <source><italic>Emerg. Infect. Dis.</italic></source> <volume>27</volume> <fpage>970</fpage>&#x2013;<lpage>972</lpage>. <pub-id pub-id-type="doi">10.3201/eid2703.210038</pub-id> <pub-id pub-id-type="pmid">33496249</pub-id></citation></ref>
<ref id="B9"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Crackower</surname> <given-names>M. A.</given-names></name> <name><surname>Sarao</surname> <given-names>R.</given-names></name> <name><surname>Oudit</surname> <given-names>G. Y.</given-names></name> <name><surname>Yagil</surname> <given-names>C.</given-names></name> <name><surname>Kozieradzki</surname> <given-names>I.</given-names></name> <name><surname>Scanga</surname> <given-names>S. E.</given-names></name><etal/></person-group> (<year>2002</year>). <article-title>Angiotensin-converting enzyme 2 is an essential regulator of heart function.</article-title> <source><italic>Nature</italic></source> <volume>417</volume> <fpage>822</fpage>&#x2013;<lpage>828</lpage>.</citation></ref>
<ref id="B10"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cuevas</surname> <given-names>J. M.</given-names></name> <name><surname>Geller</surname> <given-names>R.</given-names></name> <name><surname>Garijo</surname> <given-names>R.</given-names></name> <name><surname>Lopez-Aldeguer</surname> <given-names>J.</given-names></name> <name><surname>Sanjuan</surname> <given-names>R.</given-names></name></person-group> (<year>2015</year>). <article-title>Extremely high mutation rate of HIV-1 in vivo.</article-title> <source><italic>PLoS Biol.</italic></source> <volume>13</volume>:<issue>e1002251</issue>. <pub-id pub-id-type="doi">10.1371/journal.pbio.1002251</pub-id> <pub-id pub-id-type="pmid">26375597</pub-id></citation></ref>
<ref id="B11"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cui</surname> <given-names>J.</given-names></name> <name><surname>Li</surname> <given-names>F.</given-names></name> <name><surname>Shi</surname> <given-names>Z. L.</given-names></name></person-group> (<year>2019</year>). <article-title>Origin and evolution of pathogenic coronaviruses.</article-title> <source><italic>Nat. Rev. Microbiol.</italic></source> <volume>17</volume> <fpage>181</fpage>&#x2013;<lpage>192</lpage>. <pub-id pub-id-type="doi">10.1038/s41579-018-0118-9</pub-id> <pub-id pub-id-type="pmid">30531947</pub-id></citation></ref>
<ref id="B12"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Danecek</surname> <given-names>P.</given-names></name> <name><surname>Auton</surname> <given-names>A.</given-names></name> <name><surname>Abecasis</surname> <given-names>G.</given-names></name> <name><surname>Albers</surname> <given-names>C. A.</given-names></name> <name><surname>Banks</surname> <given-names>E.</given-names></name> <name><surname>DePristo</surname> <given-names>M. A.</given-names></name><etal/></person-group> (<year>2011</year>). <article-title>The variant call format and VCFtools.</article-title> <source><italic>Bioinformatics</italic></source> <volume>27</volume> <fpage>2156</fpage>&#x2013;<lpage>2158</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btr330</pub-id> <pub-id pub-id-type="pmid">21653522</pub-id></citation></ref>
<ref id="B13"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Di Giorgio</surname> <given-names>S.</given-names></name> <name><surname>Martignano</surname> <given-names>F.</given-names></name> <name><surname>Torcia</surname> <given-names>M. G.</given-names></name> <name><surname>Mattiuz</surname> <given-names>G.</given-names></name> <name><surname>Conticello</surname> <given-names>S. G.</given-names></name></person-group> (<year>2020</year>). <article-title>Evidence for host-dependent RNA editing in the transcriptome of SARS-CoV-2.</article-title> <source><italic>Sci. Adv.</italic></source> <volume>6</volume>:<issue>eabb5813</issue>. <pub-id pub-id-type="doi">10.1126/sciadv.abb5813</pub-id> <pub-id pub-id-type="pmid">32596474</pub-id></citation></ref>
<ref id="B14"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Domingo</surname> <given-names>E.</given-names></name> <name><surname>Sheldon</surname> <given-names>J.</given-names></name> <name><surname>Perales</surname> <given-names>C.</given-names></name></person-group> (<year>2012</year>). <article-title>Viral quasispecies evolution.</article-title> <source><italic>Microbiol. Mol. Biol. Rev.</italic></source> <volume>76</volume> <fpage>159</fpage>&#x2013;<lpage>216</lpage>. <pub-id pub-id-type="doi">10.1128/mmbr.05023-11</pub-id> <pub-id pub-id-type="pmid">22688811</pub-id></citation></ref>
<ref id="B15"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dong</surname> <given-names>E.</given-names></name> <name><surname>Du</surname> <given-names>H.</given-names></name> <name><surname>Gardner</surname> <given-names>L.</given-names></name></person-group> (<year>2020</year>). <article-title>An interactive web-based dashboard to track COVID-19 in real time (vol 20, pg 533, 2020).</article-title> <source><italic>Lancet Infect. Dis.</italic></source> <volume>20</volume>:<issue>E215</issue>.</citation></ref>
<ref id="B16"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Elbe</surname> <given-names>S.</given-names></name> <name><surname>Buckland-Merrett</surname> <given-names>G.</given-names></name></person-group> (<year>2017</year>). <article-title>Data, disease and diplomacy: GISAID&#x2019;s innovative contribution to global health.</article-title> <source><italic>Glob. Chall.</italic></source> <volume>1</volume> <fpage>33</fpage>&#x2013;<lpage>46</lpage>. <pub-id pub-id-type="doi">10.1002/gch2.1018</pub-id> <pub-id pub-id-type="pmid">31565258</pub-id></citation></ref>
<ref id="B17"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Farkas</surname> <given-names>C.</given-names></name> <name><surname>Fuentes-Villalobos</surname> <given-names>F.</given-names></name> <name><surname>Garrido</surname> <given-names>J. L.</given-names></name> <name><surname>Haigh</surname> <given-names>J.</given-names></name> <name><surname>Barria</surname> <given-names>M. I.</given-names></name></person-group> (<year>2020</year>). <article-title>Insights on early mutational events in SARS-CoV-2 virus reveal founder effects across geographical regions.</article-title> <source><italic>PeerJ</italic></source> <volume>8</volume>:<issue>e9255</issue>. <pub-id pub-id-type="doi">10.7717/peerj.9255</pub-id> <pub-id pub-id-type="pmid">32509472</pub-id></citation></ref>
<ref id="B18"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Finkel</surname> <given-names>Y.</given-names></name> <name><surname>Mizrahi</surname> <given-names>O.</given-names></name> <name><surname>Nachshon</surname> <given-names>A.</given-names></name> <name><surname>Weingarten-Gabbay</surname> <given-names>S.</given-names></name> <name><surname>Morgenstern</surname> <given-names>D.</given-names></name> <name><surname>Yahalom-Ronen</surname> <given-names>Y.</given-names></name><etal/></person-group> (<year>2021</year>). <article-title>The coding capacity of SARS-CoV-2.</article-title> <source><italic>Nature</italic></source> <volume>589</volume> <fpage>125</fpage>&#x2013;<lpage>130</lpage>.</citation></ref>
<ref id="B19"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Flower</surname> <given-names>T. G.</given-names></name> <name><surname>Buffalo</surname> <given-names>C. Z.</given-names></name> <name><surname>Hooy</surname> <given-names>R. M.</given-names></name> <name><surname>Allaire</surname> <given-names>M.</given-names></name> <name><surname>Ren</surname> <given-names>X.</given-names></name> <name><surname>Hurley</surname> <given-names>J. H.</given-names></name></person-group> (<year>2021</year>). <article-title>Structure of SARS-CoV-2 ORF8, a rapidly evolving immune evasion protein.</article-title> <source><italic>Proc. Natl. Acad. Sci. U.S.A.</italic></source> <volume>118</volume>:<issue>e2021785118</issue>. <pub-id pub-id-type="doi">10.1073/pnas.2021785118</pub-id> <pub-id pub-id-type="pmid">33361333</pub-id></citation></ref>
<ref id="B20"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Freed</surname> <given-names>N. E.</given-names></name> <name><surname>Vlkova</surname> <given-names>M.</given-names></name> <name><surname>Faisal</surname> <given-names>M. B.</given-names></name> <name><surname>Silander</surname> <given-names>O. K.</given-names></name></person-group> (<year>2020</year>). <article-title>Rapid and inexpensive whole-genome sequencing of SARS-CoV-2 using 1200 bp tiled amplicons and Oxford Nanopore Rapid Barcoding.</article-title> <source><italic>Biol. Methods Protoc.</italic></source> <volume>5</volume>:<issue>baa014</issue>.</citation></ref>
<ref id="B21"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Galloway</surname> <given-names>S. E.</given-names></name> <name><surname>Paul</surname> <given-names>P.</given-names></name> <name><surname>MacCannell</surname> <given-names>D. R.</given-names></name> <name><surname>Johansson</surname> <given-names>M. A.</given-names></name> <name><surname>Brooks</surname> <given-names>J. T.</given-names></name> <name><surname>MacNeil</surname> <given-names>A.</given-names></name><etal/></person-group> (<year>2021</year>). <article-title>Emergence of SARS-CoV-2 B.1.1.7 lineage - United States, December 29, 2020-January 12, 2021.</article-title> <source><italic>MMWR Morb. Mortal. Wkly Rep.</italic></source> <volume>70</volume> <fpage>95</fpage>&#x2013;<lpage>99</lpage>. <pub-id pub-id-type="doi">10.15585/mmwr.mm7003e2</pub-id> <pub-id pub-id-type="pmid">33476315</pub-id></citation></ref>
<ref id="B22"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Garrison</surname> <given-names>E.</given-names></name> <name><surname>Marth</surname> <given-names>G.</given-names></name></person-group> (<year>2012</year>). <article-title>Haplotype-based variant detection from short-read sequencing(2012 July 01, 2012).</article-title> <source><italic>arXiv</italic></source><comment>[Preprint] arXiv:1207.3907</comment>,</citation></ref>
<ref id="B23"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ge</surname> <given-names>X. Y.</given-names></name> <name><surname>Li</surname> <given-names>J. L.</given-names></name> <name><surname>Yang</surname> <given-names>X. L.</given-names></name> <name><surname>Chmura</surname> <given-names>A. A.</given-names></name> <name><surname>Zhu</surname> <given-names>G.</given-names></name> <name><surname>Epstein</surname> <given-names>J. H.</given-names></name><etal/></person-group> (<year>2013</year>). <article-title>Isolation and characterization of a bat SARS-like coronavirus that uses the ACE2 receptor.</article-title> <source><italic>Nature</italic></source> <volume>503</volume> <fpage>535</fpage>&#x2013;<lpage>538</lpage>. <pub-id pub-id-type="doi">10.1038/nature12711</pub-id> <pub-id pub-id-type="pmid">24172901</pub-id></citation></ref>
<ref id="B24"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Giardine</surname> <given-names>B.</given-names></name> <name><surname>Riemer</surname> <given-names>C.</given-names></name> <name><surname>Hardison</surname> <given-names>R. C.</given-names></name> <name><surname>Burhans</surname> <given-names>R.</given-names></name> <name><surname>Elnitski</surname> <given-names>L.</given-names></name> <name><surname>Shah</surname> <given-names>P.</given-names></name><etal/></person-group> (<year>2005</year>). <article-title>Galaxy: a platform for interactive large-scale genome analysis.</article-title> <source><italic>Genome Res.</italic></source> <volume>15</volume> <fpage>1451</fpage>&#x2013;<lpage>1455</lpage>. <pub-id pub-id-type="doi">10.1101/gr.4086505</pub-id> <pub-id pub-id-type="pmid">16169926</pub-id></citation></ref>
<ref id="B25"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Goebel</surname> <given-names>S. J.</given-names></name> <name><surname>Hsue</surname> <given-names>B.</given-names></name> <name><surname>Dombrowski</surname> <given-names>T. F.</given-names></name> <name><surname>Masters</surname> <given-names>P. S.</given-names></name></person-group> (<year>2004</year>). <article-title>Characterization of the RNA components of a putative molecular switch in the 3&#x2032; untranslated region of the murine coronavirus genome.</article-title> <source><italic>J. Virol.</italic></source> <volume>78</volume> <fpage>669</fpage>&#x2013;<lpage>682</lpage>. <pub-id pub-id-type="doi">10.1128/jvi.78.2.669-682.2004</pub-id> <pub-id pub-id-type="pmid">14694098</pub-id></citation></ref>
<ref id="B26"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gojobori</surname> <given-names>T.</given-names></name> <name><surname>Moriyama</surname> <given-names>E. N.</given-names></name> <name><surname>Kimura</surname> <given-names>M.</given-names></name></person-group> (<year>1990</year>). <article-title>Molecular clock of viral evolution, and the neutral theory.</article-title> <source><italic>Proc. Natl. Acad. Sci. U.S.A.</italic></source> <volume>87</volume> <fpage>10015</fpage>&#x2013;<lpage>10018</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.87.24.10015</pub-id> <pub-id pub-id-type="pmid">2263602</pub-id></citation></ref>
<ref id="B27"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gong</surname> <given-names>Y. N.</given-names></name> <name><surname>Tsao</surname> <given-names>K. C.</given-names></name> <name><surname>Hsiao</surname> <given-names>M. J.</given-names></name> <name><surname>Huang</surname> <given-names>C. G.</given-names></name> <name><surname>Huang</surname> <given-names>P. N.</given-names></name> <name><surname>Huang</surname> <given-names>P. W.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>SARS-CoV-2 genomic surveillance in Taiwan revealed novel ORF8-deletion mutant and clade possibly associated with infections in Middle East.</article-title> <source><italic>Emerg. Microbes Infect.</italic></source> <volume>9</volume> <fpage>1457</fpage>&#x2013;<lpage>1466</lpage>. <pub-id pub-id-type="doi">10.1080/22221751.2020.1782271</pub-id> <pub-id pub-id-type="pmid">32543353</pub-id></citation></ref>
<ref id="B28"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hachim</surname> <given-names>A.</given-names></name> <name><surname>Kavian</surname> <given-names>N.</given-names></name> <name><surname>Cohen</surname> <given-names>C. A.</given-names></name> <name><surname>Chin</surname> <given-names>A. W. H.</given-names></name> <name><surname>Chu</surname> <given-names>D. K. W.</given-names></name> <name><surname>Mok</surname> <given-names>C. K. P.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>ORF8 and ORF3b antibodies are accurate serological markers of early and late SARS-CoV-2 infection.</article-title> <source><italic>Nat. Immunol.</italic></source> <volume>21</volume> <fpage>1293</fpage>&#x2013;<lpage>1301</lpage>. <pub-id pub-id-type="doi">10.1038/s41590-020-0773-7</pub-id> <pub-id pub-id-type="pmid">32807944</pub-id></citation></ref>
<ref id="B29"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hoffmann</surname> <given-names>M.</given-names></name> <name><surname>Kleine-Weber</surname> <given-names>H.</given-names></name> <name><surname>Schroeder</surname> <given-names>S.</given-names></name> <name><surname>Kruger</surname> <given-names>N.</given-names></name> <name><surname>Herrler</surname> <given-names>T.</given-names></name> <name><surname>Erichsen</surname> <given-names>S.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>SARS-CoV-2 cell entry depends on ACE2 and TMPRSS2 and Is blocked by a clinically proven protease inhibitor.</article-title> <source><italic>Cell</italic></source> <volume>181</volume> <fpage>271</fpage>&#x2013;<lpage>80 e8</lpage>.</citation></ref>
<ref id="B30"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hourdel</surname> <given-names>V.</given-names></name> <name><surname>Kwasiborski</surname> <given-names>A.</given-names></name> <name><surname>Baliere</surname> <given-names>C.</given-names></name> <name><surname>Matheus</surname> <given-names>S.</given-names></name> <name><surname>Batejat</surname> <given-names>C. F.</given-names></name> <name><surname>Manuguerra</surname> <given-names>J. C.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Rapid genomic characterization of SARS-CoV-2 by direct amplicon-based sequencing through comparison of MinION and illumina iSeq100(TM) system.</article-title> <source><italic>Front. Microbiol.</italic></source> <volume>11</volume>:<issue>571328</issue>.</citation></ref>
<ref id="B31"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hsue</surname> <given-names>B.</given-names></name> <name><surname>Hartshorne</surname> <given-names>T.</given-names></name> <name><surname>Masters</surname> <given-names>P. S.</given-names></name></person-group> (<year>2000</year>). <article-title>Characterization of an essential RNA secondary structure in the 3&#x2032; untranslated region of the murine coronavirus genome.</article-title> <source><italic>J. Virol.</italic></source> <volume>74</volume> <fpage>6911</fpage>&#x2013;<lpage>6921</lpage>. <pub-id pub-id-type="doi">10.1128/jvi.74.15.6911-6921.2000</pub-id> <pub-id pub-id-type="pmid">10888630</pub-id></citation></ref>
<ref id="B32"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ke</surname> <given-names>Z.</given-names></name> <name><surname>Oton</surname> <given-names>J.</given-names></name> <name><surname>Qu</surname> <given-names>K.</given-names></name> <name><surname>Cortese</surname> <given-names>M.</given-names></name> <name><surname>Zila</surname> <given-names>V.</given-names></name> <name><surname>McKeane</surname> <given-names>L.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Structures and distributions of SARS-CoV-2 spike proteins on intact virions.</article-title> <source><italic>Nature</italic></source> <volume>588</volume> <fpage>498</fpage>&#x2013;<lpage>502</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-020-2665-2</pub-id> <pub-id pub-id-type="pmid">32805734</pub-id></citation></ref>
<ref id="B33"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kernighan</surname> <given-names>B. W.</given-names></name> <name><surname>Morgan</surname> <given-names>S. P.</given-names></name></person-group> (<year>1982</year>). <article-title>The UNIX operating system: a model for software design.</article-title> <source><italic>Science</italic></source> <volume>215</volume> <fpage>779</fpage>&#x2013;<lpage>783</lpage>. <pub-id pub-id-type="doi">10.1126/science.215.4534.779</pub-id> <pub-id pub-id-type="pmid">17747840</pub-id></citation></ref>
<ref id="B34"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Khan</surname> <given-names>K. A.</given-names></name> <name><surname>Cheung</surname> <given-names>P.</given-names></name></person-group> (<year>2020</year>). <article-title>Presence of mismatches between diagnostic PCR assays and coronavirus SARS-CoV-2 genome.</article-title> <source><italic>R. Soc. Open Sci.</italic></source> <volume>7</volume>:<issue>200636</issue>. <pub-id pub-id-type="doi">10.1098/rsos.200636</pub-id> <pub-id pub-id-type="pmid">32742701</pub-id></citation></ref>
<ref id="B35"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kino</surname> <given-names>K.</given-names></name> <name><surname>Sugiyama</surname> <given-names>H.</given-names></name></person-group> (<year>2001</year>). <article-title>Possible cause of G-C&#x2013;&#x003E;C-G transversion mutation by guanine oxidation product, imidazolone.</article-title> <source><italic>Chem. Biol.</italic></source> <volume>8</volume> <fpage>369</fpage>&#x2013;<lpage>378</lpage>. <pub-id pub-id-type="doi">10.1016/s1074-5521(01)00019-9</pub-id></citation></ref>
<ref id="B36"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Korber</surname> <given-names>B.</given-names></name> <name><surname>Fischer</surname> <given-names>W. M.</given-names></name> <name><surname>Gnanakaran</surname> <given-names>S.</given-names></name> <name><surname>Yoon</surname> <given-names>H.</given-names></name> <name><surname>Theiler</surname> <given-names>J.</given-names></name> <name><surname>Abfalterer</surname> <given-names>W.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Tracking changes in SARS-CoV-2 spike: evidence that D614G increases infectivity of the COVID-19 Virus.</article-title> <source><italic>Cell</italic></source> <volume>182</volume> <fpage>812</fpage>&#x2013;<lpage>27 e19</lpage>.</citation></ref>
<ref id="B37"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kutzner</surname> <given-names>C.</given-names></name> <name><surname>Pall</surname> <given-names>S.</given-names></name> <name><surname>Fechner</surname> <given-names>M.</given-names></name> <name><surname>Esztermann</surname> <given-names>A.</given-names></name> <name><surname>de Groot</surname> <given-names>B. L.</given-names></name> <name><surname>Grubmuller</surname> <given-names>H.</given-names></name></person-group> (<year>2015</year>). <article-title>Best bang for your buck: GPU nodes for GROMACS biomolecular simulations.</article-title> <source><italic>J. Comput. Chem.</italic></source> <volume>36</volume> <fpage>1990</fpage>&#x2013;<lpage>2008</lpage>. <pub-id pub-id-type="doi">10.1002/jcc.24030</pub-id> <pub-id pub-id-type="pmid">26238484</pub-id></citation></ref>
<ref id="B38"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lam</surname> <given-names>J. Y.</given-names></name> <name><surname>Yuen</surname> <given-names>C. K.</given-names></name> <name><surname>Ip</surname> <given-names>J. D.</given-names></name> <name><surname>Wong</surname> <given-names>W. M.</given-names></name> <name><surname>To</surname> <given-names>K. K.</given-names></name> <name><surname>Yuen</surname> <given-names>K. Y.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Loss of orf3b in the circulating SARS-CoV-2 strains.</article-title> <source><italic>Emerg. Microbes Infect.</italic></source> <volume>9</volume> <fpage>2685</fpage>&#x2013;<lpage>2696</lpage>. <pub-id pub-id-type="doi">10.1080/22221751.2020.1852892</pub-id> <pub-id pub-id-type="pmid">33205709</pub-id></citation></ref>
<ref id="B39"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lei</surname> <given-names>X.</given-names></name> <name><surname>Dong</surname> <given-names>X.</given-names></name> <name><surname>Ma</surname> <given-names>R.</given-names></name> <name><surname>Wang</surname> <given-names>W.</given-names></name> <name><surname>Xiao</surname> <given-names>X.</given-names></name> <name><surname>Tian</surname> <given-names>Z.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Activation and evasion of type I interferon responses by SARS-CoV-2.</article-title> <source><italic>Nat. Commun.</italic></source> <volume>11</volume>:<issue>3810</issue>.</citation></ref>
<ref id="B40"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>H.</given-names></name></person-group> (<year>2018</year>). <article-title>Minimap2: pairwise alignment for nucleotide sequences.</article-title> <source><italic>Bioinformatics</italic></source> <volume>34</volume> <fpage>3094</fpage>&#x2013;<lpage>3100</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bty191</pub-id> <pub-id pub-id-type="pmid">29750242</pub-id></citation></ref>
<ref id="B41"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>H.</given-names></name> <name><surname>Handsaker</surname> <given-names>B.</given-names></name> <name><surname>Wysoker</surname> <given-names>A.</given-names></name> <name><surname>Fennell</surname> <given-names>T.</given-names></name> <name><surname>Ruan</surname> <given-names>J.</given-names></name> <name><surname>Homer</surname> <given-names>N.</given-names></name><etal/></person-group> (<year>2009</year>). <article-title>The sequence alignment/map format and SAMtools.</article-title> <source><italic>Bioinformatics</italic></source> <volume>25</volume> <fpage>2078</fpage>&#x2013;<lpage>2079</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp352</pub-id> <pub-id pub-id-type="pmid">19505943</pub-id></citation></ref>
<ref id="B42"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>J.</given-names></name> <name><surname>Wang</surname> <given-names>H.</given-names></name> <name><surname>Mao</surname> <given-names>L.</given-names></name> <name><surname>Yu</surname> <given-names>H.</given-names></name> <name><surname>Yu</surname> <given-names>X.</given-names></name> <name><surname>Sun</surname> <given-names>Z.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Rapid genomic characterization of SARS-CoV-2 viruses from clinical specimens using nanopore sequencing.</article-title> <source><italic>Sci. Rep.</italic></source> <volume>10</volume>:<issue>17492</issue>.</citation></ref>
<ref id="B43"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>Q.</given-names></name> <name><surname>Wu</surname> <given-names>J.</given-names></name> <name><surname>Nie</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>L.</given-names></name> <name><surname>Hao</surname> <given-names>H.</given-names></name> <name><surname>Liu</surname> <given-names>S.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>The impact of mutations in SARS-CoV-2 spike on viral infectivity and antigenicity.</article-title> <source><italic>Cell</italic></source> <volume>182</volume> <fpage>1284</fpage>&#x2013;<lpage>94 e9</lpage>.</citation></ref>
<ref id="B44"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>W.</given-names></name> <name><surname>Moore</surname> <given-names>M. J.</given-names></name> <name><surname>Vasilieva</surname> <given-names>N.</given-names></name> <name><surname>Sui</surname> <given-names>J.</given-names></name> <name><surname>Wong</surname> <given-names>S. K.</given-names></name> <name><surname>Berne</surname> <given-names>M. A.</given-names></name><etal/></person-group> (<year>2003</year>). <article-title>Angiotensin-converting enzyme 2 is a functional receptor for the SARS coronavirus.</article-title> <source><italic>Nature</italic></source> <volume>426</volume> <fpage>450</fpage>&#x2013;<lpage>454</lpage>.</citation></ref>
<ref id="B45"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>Z.</given-names></name> <name><surname>Wu</surname> <given-names>J.</given-names></name> <name><surname>Deleo</surname> <given-names>C. J.</given-names></name></person-group> (<year>2006</year>). <article-title>RNA damage and surveillance under oxidative stress.</article-title> <source><italic>IUBMB Life</italic></source> <volume>58</volume> <fpage>581</fpage>&#x2013;<lpage>588</lpage>. <pub-id pub-id-type="doi">10.1080/15216540600946456</pub-id> <pub-id pub-id-type="pmid">17050375</pub-id></citation></ref>
<ref id="B46"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Libin</surname> <given-names>P. J. K.</given-names></name> <name><surname>Deforche</surname> <given-names>K.</given-names></name> <name><surname>Abecasis</surname> <given-names>A. B.</given-names></name> <name><surname>Theys</surname> <given-names>K.</given-names></name></person-group> (<year>2019</year>). <article-title>VIRULIGN: fast codon-correct alignment and annotation of viral genomes.</article-title> <source><italic>Bioinformatics</italic></source> <volume>35</volume> <fpage>1763</fpage>&#x2013;<lpage>1765</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bty851</pub-id> <pub-id pub-id-type="pmid">30295730</pub-id></citation></ref>
<ref id="B47"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>Q.</given-names></name> <name><surname>Zhao</surname> <given-names>S.</given-names></name> <name><surname>Shi</surname> <given-names>C. M.</given-names></name> <name><surname>Song</surname> <given-names>S.</given-names></name> <name><surname>Zhu</surname> <given-names>S.</given-names></name> <name><surname>Su</surname> <given-names>Y.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Population Genetics of SARS-CoV-2: disentangling effects of sampling bias and infection clusters.</article-title> <source><italic>Genom. Proteom. Bioinform.</italic></source> [Online ahead of print].</citation></ref>
<ref id="B48"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lyons</surname> <given-names>D. M.</given-names></name> <name><surname>Lauring</surname> <given-names>A. S.</given-names></name></person-group> (<year>2017</year>). <article-title>Evidence for the Selective Basis of Transition-to-Transversion Substitution Bias in Two RNA Viruses.</article-title> <source><italic>Mol Biol Evol</italic></source> <volume>34</volume> <fpage>3205</fpage>&#x2013;<lpage>3215</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msx251</pub-id> <pub-id pub-id-type="pmid">29029187</pub-id></citation></ref>
<ref id="B49"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Miralles</surname> <given-names>R.</given-names></name> <name><surname>Gerrish</surname> <given-names>P. J.</given-names></name> <name><surname>Moya</surname> <given-names>A.</given-names></name> <name><surname>Elena</surname> <given-names>S. F.</given-names></name></person-group> (<year>1999</year>). <article-title>Clonal interference and the evolution of RNA viruses.</article-title> <source><italic>Science</italic></source> <volume>285</volume> <fpage>1745</fpage>&#x2013;<lpage>1747</lpage>. <pub-id pub-id-type="doi">10.1126/science.285.5434.1745</pub-id> <pub-id pub-id-type="pmid">10481012</pub-id></citation></ref>
<ref id="B50"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Moshiri</surname> <given-names>N.</given-names></name></person-group> (<year>2020</year>). <article-title>ViralMSA: massively scalable reference-guided multiple sequence alignment of viral genomes.</article-title> <source><italic>biorxiv</italic></source><comment>[Preprint]</comment> <pub-id pub-id-type="doi">10.1093/bioinformatics/btaa743</pub-id> <pub-id pub-id-type="pmid">32814953</pub-id></citation></ref>
<ref id="B51"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Motulsky</surname> <given-names>H. J.</given-names></name> <name><surname>Brown</surname> <given-names>R. E.</given-names></name></person-group> (<year>2006</year>). <article-title>Detecting outliers when fitting data with nonlinear regression - a new method based on robust nonlinear regression and the false discovery rate.</article-title> <source><italic>BMC Bioinformatics</italic></source> <volume>7</volume>:<issue>123</issue>.</citation></ref>
<ref id="B52"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mourier</surname> <given-names>T.</given-names></name> <name><surname>Sadykov</surname> <given-names>M.</given-names></name> <name><surname>Carr</surname> <given-names>M. J.</given-names></name> <name><surname>Gonzalez</surname> <given-names>G.</given-names></name> <name><surname>Hall</surname> <given-names>W. W.</given-names></name> <name><surname>Pain</surname> <given-names>A.</given-names></name></person-group> (<year>2020</year>). <article-title>Host-directed editing of the SARS-CoV-2 genome.</article-title> <source><italic>Biochem. Biophys. Res. Commun.</italic></source> <volume>538</volume> <fpage>35</fpage>&#x2013;<lpage>39</lpage>. <pub-id pub-id-type="doi">10.1016/j.bbrc.2020.10.092</pub-id> <pub-id pub-id-type="pmid">33234239</pub-id></citation></ref>
<ref id="B53"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mukherjee</surname> <given-names>M.</given-names></name> <name><surname>Goswami</surname> <given-names>S.</given-names></name></person-group> (<year>2020</year>). <article-title>Global cataloguing of variations in untranslated regions of viral genome and prediction of key host RNA binding protein-microRNA interactions modulating genome stability in SARS-CoV-2.</article-title> <source><italic>PLoS One</italic></source> <volume>15</volume>:<issue>e0237559</issue>. <pub-id pub-id-type="doi">10.1371/journal.pone.0237559</pub-id> <pub-id pub-id-type="pmid">32780783</pub-id></citation></ref>
<ref id="B54"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Neches</surname> <given-names>R. Y.</given-names></name> <name><surname>Kyrpides</surname> <given-names>N. C.</given-names></name> <name><surname>Ouzounis</surname> <given-names>C. A.</given-names></name></person-group> (<year>2021</year>). <article-title>Atypical divergence of SARS-CoV-2 Orf8 from Orf7a within the Coronavirus lineage suggests potential stealthy viral strategies in immune evasion.</article-title> <source><italic>mBio</italic></source> <volume>12</volume> <fpage>e03014</fpage>&#x2013;<lpage>20</lpage>.</citation></ref>
<ref id="B55"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nelson</surname> <given-names>C. W.</given-names></name> <name><surname>Ardern</surname> <given-names>Z.</given-names></name> <name><surname>Goldberg</surname> <given-names>T. L.</given-names></name> <name><surname>Meng</surname> <given-names>C.</given-names></name> <name><surname>Kuo</surname> <given-names>C. H.</given-names></name> <name><surname>Ludwig</surname> <given-names>C.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Dynamically evolving novel overlapping gene as a factor in the SARS-CoV-2 pandemic.</article-title> <source><italic>Elife</italic></source> <volume>9</volume>:<issue>e59633</issue>.</citation></ref>
<ref id="B56"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nelson</surname> <given-names>C. W.</given-names></name> <name><surname>Hughes</surname> <given-names>A. L.</given-names></name></person-group> (<year>2015</year>). <article-title>Within-host nucleotide diversity of virus populations: Insights from next-generation sequencing.</article-title> <source><italic>Infect. Genet. Evol.</italic></source> <volume>30</volume> <fpage>1</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1016/j.meegid.2014.11.026</pub-id> <pub-id pub-id-type="pmid">25481279</pub-id></citation></ref>
<ref id="B57"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Neuman</surname> <given-names>B. W.</given-names></name> <name><surname>Stein</surname> <given-names>D. A.</given-names></name> <name><surname>Kroeker</surname> <given-names>A. D.</given-names></name> <name><surname>Churchill</surname> <given-names>M. J.</given-names></name> <name><surname>Kim</surname> <given-names>A. M.</given-names></name> <name><surname>Kuhn</surname> <given-names>P.</given-names></name><etal/></person-group> (<year>2005</year>). <article-title>Inhibition, escape, and attenuated growth of severe acute respiratory syndrome coronavirus treated with antisense morpholino oligomers.</article-title> <source><italic>J. Virol.</italic></source> <volume>79</volume> <fpage>9665</fpage>&#x2013;<lpage>9676</lpage>. <pub-id pub-id-type="doi">10.1128/jvi.79.15.9665-9676.2005</pub-id> <pub-id pub-id-type="pmid">16014928</pub-id></citation></ref>
<ref id="B58"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ngernmuen</surname> <given-names>A.</given-names></name> <name><surname>Suktrakul</surname> <given-names>W.</given-names></name> <name><surname>Kate-Ngam</surname> <given-names>S.</given-names></name> <name><surname>Jantasuriyarat</surname> <given-names>C.</given-names></name></person-group> (<year>2020</year>). <article-title>Transcriptome comparison of defense responses in the rice variety &#x2018;jao hom nin&#x2019; regarding two blast resistant genes, pish and pik.</article-title> <source><italic>Plants (Basel)</italic></source> <volume>9</volume>:<issue>694</issue>. <pub-id pub-id-type="doi">10.3390/plants9060694</pub-id> <pub-id pub-id-type="pmid">32485961</pub-id></citation></ref>
<ref id="B59"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ni</surname> <given-names>M.</given-names></name> <name><surname>Chen</surname> <given-names>C.</given-names></name> <name><surname>Qian</surname> <given-names>J.</given-names></name> <name><surname>Xiao</surname> <given-names>H. X.</given-names></name> <name><surname>Shi</surname> <given-names>W. F.</given-names></name> <name><surname>Luo</surname> <given-names>Y.</given-names></name><etal/></person-group> (<year>2016</year>). <article-title>Intra-host dynamics of Ebola virus during 2014.</article-title> <source><italic>Nat. Microbiol.</italic></source> <volume>1</volume>:<issue>16151</issue>.</citation></ref>
<ref id="B60"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nomburg</surname> <given-names>J.</given-names></name> <name><surname>Meyerson</surname> <given-names>M.</given-names></name> <name><surname>DeCaprio</surname> <given-names>J. A.</given-names></name></person-group> (<year>2020</year>). <article-title>Pervasive generation of non-canonical subgenomic RNAs by SARS-CoV-2.</article-title> <source><italic>Genome Med.</italic></source> <volume>12</volume>:<issue>108</issue>.</citation></ref>
<ref id="B61"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Olm</surname> <given-names>M. R.</given-names></name> <name><surname>Crits-Christoph</surname> <given-names>A.</given-names></name> <name><surname>Bouma-Gregson</surname> <given-names>K.</given-names></name> <name><surname>Firek</surname> <given-names>B.</given-names></name> <name><surname>Morowitz</surname> <given-names>M. J.</given-names></name> <name><surname>Banfield</surname> <given-names>J. F.</given-names></name></person-group> (<year>2020</year>). <article-title>InStrain enables population genomic analysis from metagenomic data and rigorous detection of identical microbial strains.</article-title> <source><italic>bioRxiv</italic></source><comment>[Preprint]</comment> <pub-id pub-id-type="doi">10.1101/2020.01.22.915579</pub-id> bioRxiv 2020.01.22.915579,</citation></ref>
<ref id="B62"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Olm</surname> <given-names>M. R.</given-names></name> <name><surname>Crits-Christoph</surname> <given-names>A.</given-names></name> <name><surname>Bouma-Gregson</surname> <given-names>K.</given-names></name> <name><surname>Firek</surname> <given-names>B. A.</given-names></name> <name><surname>Morowitz</surname> <given-names>M. J.</given-names></name> <name><surname>Banfield</surname> <given-names>J. F.</given-names></name></person-group> (<year>2021</year>). <article-title>inStrain profiles population microdiversity from metagenomic data and sensitively detects shared microbial strains.</article-title> <source><italic>Nat. Biotechnol.</italic></source> <pub-id pub-id-type="doi">10.1038/s41587-020-00797-0</pub-id> <pub-id pub-id-type="pmid">33462508</pub-id></citation></ref>
<ref id="B63"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Osorio</surname> <given-names>N. S.</given-names></name> <name><surname>Correia-Neves</surname> <given-names>M.</given-names></name></person-group> (<year>2020</year>). <article-title>Implication of SARS-CoV-2 evolution in the sensitivity of RT-qPCR diagnostic assays.</article-title> <source><italic>Lancet Infect. Dis.</italic></source> <volume>21</volume> <fpage>166</fpage>&#x2013;<lpage>167</lpage>. <pub-id pub-id-type="doi">10.1016/s1473-3099(20)30435-7</pub-id>[Online ahead of print].</citation></ref>
<ref id="B64"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pereira</surname> <given-names>F.</given-names></name></person-group> (<year>2020</year>). <article-title>Evolutionary dynamics of the SARS-CoV-2 ORF8 accessory gene.</article-title> <source><italic>Infect. Genet. Evol.</italic></source> <volume>85</volume>:<issue>104525</issue>. <pub-id pub-id-type="doi">10.1016/j.meegid.2020.104525</pub-id> <pub-id pub-id-type="pmid">32890763</pub-id></citation></ref>
<ref id="B65"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Plante</surname> <given-names>J. A.</given-names></name> <name><surname>Liu</surname> <given-names>Y.</given-names></name> <name><surname>Liu</surname> <given-names>J.</given-names></name> <name><surname>Xia</surname> <given-names>H.</given-names></name> <name><surname>Johnson</surname> <given-names>B. A.</given-names></name> <name><surname>Lokugamage</surname> <given-names>K. G.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Spike mutation D614G alters SARS-CoV-2 fitness.</article-title> <source><italic>Nature</italic></source> <volume>592</volume> <fpage>116</fpage>&#x2013;<lpage>121</lpage>.</citation></ref>
<ref id="B66"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Portelli</surname> <given-names>S.</given-names></name> <name><surname>Olshansky</surname> <given-names>M.</given-names></name> <name><surname>Rodrigues</surname> <given-names>C. H. M.</given-names></name> <name><surname>D&#x2019;Souza</surname> <given-names>E. N.</given-names></name> <name><surname>Myung</surname> <given-names>Y.</given-names></name> <name><surname>Silk</surname> <given-names>M.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Exploring the structural distribution of genetic variation in SARS-CoV-2 with the COVID-3D online resource.</article-title> <source><italic>Nat. Genet</italic></source> <volume>52</volume> <fpage>999</fpage>&#x2013;<lpage>1001</lpage>. <pub-id pub-id-type="doi">10.1038/s41588-020-0693-3</pub-id> <pub-id pub-id-type="pmid">32908256</pub-id></citation></ref>
<ref id="B67"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Queromes</surname> <given-names>G.</given-names></name> <name><surname>Destras</surname> <given-names>G.</given-names></name> <name><surname>Bal</surname> <given-names>A.</given-names></name> <name><surname>Regue</surname> <given-names>H.</given-names></name> <name><surname>Burfin</surname> <given-names>G.</given-names></name> <name><surname>Brun</surname> <given-names>S.</given-names></name><etal/></person-group> (<year>2021</year>). <article-title>Characterization of SARS-CoV-2 ORF6 deletion variants detected in a nosocomial cluster during routine genomic surveillance. Lyon, France.</article-title> <source><italic>Emerg. Microbes Infect.</italic></source> <volume>10</volume> <fpage>167</fpage>&#x2013;<lpage>177</lpage>. <pub-id pub-id-type="doi">10.1080/22221751.2021.1872351</pub-id> <pub-id pub-id-type="pmid">33399033</pub-id></citation></ref>
<ref id="B68"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rambaut</surname> <given-names>A.</given-names></name> <name><surname>Holmes</surname> <given-names>E. C.</given-names></name> <name><surname>O&#x2019;Toole</surname> <given-names>A.</given-names></name> <name><surname>Hill</surname> <given-names>V.</given-names></name> <name><surname>McCrone</surname> <given-names>J. T.</given-names></name> <name><surname>Ruis</surname> <given-names>C.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>A dynamic nomenclature proposal for SARS-CoV-2 lineages to assist genomic epidemiology.</article-title> <source><italic>Nat. Microbiol.</italic></source> <volume>5</volume> <fpage>1403</fpage>&#x2013;<lpage>1407</lpage>. <pub-id pub-id-type="doi">10.1038/s41564-020-0770-5</pub-id> <pub-id pub-id-type="pmid">32669681</pub-id></citation></ref>
<ref id="B69"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rashid</surname> <given-names>F.</given-names></name> <name><surname>Dzakah</surname> <given-names>E. E.</given-names></name> <name><surname>Wang</surname> <given-names>H.</given-names></name> <name><surname>Tang</surname> <given-names>S.</given-names></name></person-group> (<year>2021</year>). <article-title>The ORF8 protein of SARS-CoV-2 induced endoplasmic reticulum stress and mediated immune evasion by antagonizing production of interferon beta.</article-title> <source><italic>Virus Res.</italic></source> <volume>296</volume>:<issue>198350</issue>. <pub-id pub-id-type="doi">10.1016/j.virusres.2021.198350</pub-id> <pub-id pub-id-type="pmid">33626380</pub-id></citation></ref>
<ref id="B70"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Robertson</surname> <given-names>M. P.</given-names></name> <name><surname>Igel</surname> <given-names>H.</given-names></name> <name><surname>Baertsch</surname> <given-names>R.</given-names></name> <name><surname>Haussler</surname> <given-names>D.</given-names></name> <name><surname>Ares</surname> <given-names>M.</given-names> <suffix>Jr.</suffix></name> <name><surname>Scott</surname> <given-names>W. G.</given-names></name></person-group> (<year>2005</year>). <article-title>The structure of a rigorously conserved RNA element within the SARS virus genome.</article-title> <source><italic>PLoS Biol.</italic></source> <volume>3</volume>:<issue>e5</issue>. <pub-id pub-id-type="doi">10.1371/journal.pbio.0030005</pub-id> <pub-id pub-id-type="pmid">15630477</pub-id></citation></ref>
<ref id="B71"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Robinson</surname> <given-names>J. T.</given-names></name> <name><surname>Thorvaldsdottir</surname> <given-names>H.</given-names></name> <name><surname>Wenger</surname> <given-names>A. M.</given-names></name> <name><surname>Zehir</surname> <given-names>A.</given-names></name> <name><surname>Mesirov</surname> <given-names>J. P.</given-names></name></person-group> (<year>2017</year>). <article-title>Variant review with the integrative genomics viewer.</article-title> <source><italic>Cancer Res.</italic></source> <volume>77</volume> <fpage>E31</fpage>&#x2013;<lpage>E34</lpage>. <pub-id pub-id-type="doi">10.1158/0008-5472.Can-17-0337</pub-id> <pub-id pub-id-type="pmid">29092934</pub-id></citation></ref>
<ref id="B72"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Robinson</surname> <given-names>J. T.</given-names></name> <name><surname>Thorvaldsdottir</surname> <given-names>H.</given-names></name> <name><surname>Winckler</surname> <given-names>W.</given-names></name> <name><surname>Guttman</surname> <given-names>M.</given-names></name> <name><surname>Lander</surname> <given-names>E. S.</given-names></name> <name><surname>Getz</surname> <given-names>G.</given-names></name><etal/></person-group> (<year>2011</year>). <article-title>Integrative genomics viewer.</article-title> <source><italic>Nat. Biotechnol.</italic></source> <volume>29</volume> <fpage>24</fpage>&#x2013;<lpage>26</lpage>. <pub-id pub-id-type="doi">10.1038/nbt.1754</pub-id> <pub-id pub-id-type="pmid">21221095</pub-id></citation></ref>
<ref id="B73"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sanner</surname> <given-names>M. F.</given-names></name></person-group> (<year>1999</year>). <article-title>Python: a programming language for software integration and development.</article-title> <source><italic>J. Mol. Graph. Model</italic></source> <volume>17</volume> <fpage>57</fpage>&#x2013;<lpage>61</lpage>.</citation></ref>
<ref id="B74"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shen</surname> <given-names>W.</given-names></name> <name><surname>Le</surname> <given-names>S.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Hu</surname> <given-names>F.</given-names></name></person-group> (<year>2016</year>). <article-title>SeqKit: a cross-platform and ultrafast Toolkit for FASTA/Q file manipulation.</article-title> <source><italic>PLoS One</italic></source> <volume>11</volume>:<issue>e0163962</issue>. <pub-id pub-id-type="doi">10.1371/journal.pone.0163962</pub-id> <pub-id pub-id-type="pmid">27706213</pub-id></citation></ref>
<ref id="B75"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shu</surname> <given-names>Y.</given-names></name> <name><surname>McCauley</surname> <given-names>J.</given-names></name></person-group> (<year>2017</year>). <article-title>GISAID: global initiative on sharing all influenza data - from vision to reality.</article-title> <source><italic>Euro Surveill</italic></source> <volume>22</volume>:<issue>30494</issue>.</citation></ref>
<ref id="B76"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Simmonds</surname> <given-names>P.</given-names></name></person-group> (<year>2020</year>). <article-title>Rampant C&#x2013;&#x003E;U Hypermutation in the Genomes of SARS-CoV-2 and Other Coronaviruses: Causes and Consequences for Their Short- and Long-Term Evolutionary Trajectories.</article-title> <source><italic>mSphere</italic></source> <volume>5</volume> <fpage>e00408</fpage>&#x2013;<lpage>20</lpage>.</citation></ref>
<ref id="B77"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Starr</surname> <given-names>T. N.</given-names></name> <name><surname>Greaney</surname> <given-names>A. J.</given-names></name> <name><surname>Hilton</surname> <given-names>S. K.</given-names></name> <name><surname>Ellis</surname> <given-names>D.</given-names></name> <name><surname>Crawford</surname> <given-names>K. H. D.</given-names></name> <name><surname>Dingens</surname> <given-names>A. S.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Deep mutational scanning of SARS-CoV-2 receptor binding domain reveals constraints on folding and ACE2 binding.</article-title> <source><italic>Cell</italic></source> <volume>182</volume> <fpage>1295</fpage>&#x2013;<lpage>310 e20</lpage>.</citation></ref>
<ref id="B78"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sterlin</surname> <given-names>D.</given-names></name> <name><surname>Mathian</surname> <given-names>A.</given-names></name> <name><surname>Miyara</surname> <given-names>M.</given-names></name> <name><surname>Mohr</surname> <given-names>A.</given-names></name> <name><surname>Anna</surname> <given-names>F.</given-names></name> <name><surname>Claer</surname> <given-names>L.</given-names></name><etal/></person-group> (<year>2021</year>). <article-title>IgA dominates the early neutralizing antibody response to SARS-CoV-2.</article-title> <source><italic>Sci. Transl. Med.</italic></source> <volume>13</volume>:<issue>eabd2223</issue>. <pub-id pub-id-type="doi">10.1126/scitranslmed.abd2223</pub-id> <pub-id pub-id-type="pmid">33288662</pub-id></citation></ref>
<ref id="B79"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Su</surname> <given-names>Y. C. F.</given-names></name> <name><surname>Anderson</surname> <given-names>D. E.</given-names></name> <name><surname>Young</surname> <given-names>B. E.</given-names></name> <name><surname>Linster</surname> <given-names>M.</given-names></name> <name><surname>Zhu</surname> <given-names>F.</given-names></name> <name><surname>Jayakumar</surname> <given-names>J.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Discovery and genomic characterization of a 382-nucleotide deletion in ORF7b and ORF8 during the early evolution of SARS-CoV-2.</article-title> <source><italic>mBio</italic></source> <volume>11</volume> <fpage>e01610</fpage>&#x2013;<lpage>20</lpage>.</citation></ref>
<ref id="B80"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Taefehshokr</surname> <given-names>N.</given-names></name> <name><surname>Taefehshokr</surname> <given-names>S.</given-names></name> <name><surname>Hemmat</surname> <given-names>N.</given-names></name> <name><surname>Heit</surname> <given-names>B.</given-names></name></person-group> (<year>2020</year>). <article-title>Covid-19: perspectives on innate immune evasion.</article-title> <source><italic>Front. Immunol.</italic></source> <volume>11</volume>:<issue>580641</issue>.</citation></ref>
<ref id="B81"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tajima</surname> <given-names>F.</given-names></name></person-group> (<year>1989</year>). <article-title>Statistical method for testing the neutral mutation hypothesis by DNA polymorphism.</article-title> <source><italic>Genetics</italic></source> <volume>123</volume> <fpage>585</fpage>&#x2013;<lpage>595</lpage>. <pub-id pub-id-type="doi">10.1093/genetics/123.3.585</pub-id></citation></ref>
<ref id="B82"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tengs</surname> <given-names>T.</given-names></name> <name><surname>Delwiche</surname> <given-names>C. F.</given-names></name> <name><surname>Monceyron Jonassen</surname> <given-names>C.</given-names></name></person-group> (<year>2021</year>). <article-title>A genetic element in the SARS-CoV-2 genome is shared with multiple insect species.</article-title> <source><italic>J. Gen. Virol.</italic></source> <volume>102</volume>.</citation></ref>
<ref id="B83"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tengs</surname> <given-names>T.</given-names></name> <name><surname>Jonassen</surname> <given-names>C. M.</given-names></name></person-group> (<year>2016</year>). <article-title>Distribution and evolutionary history of the mobile genetic element s2m in Coronaviruses.</article-title> <source><italic>Diseases</italic></source> <volume>4</volume>:<issue>27</issue>. <pub-id pub-id-type="doi">10.3390/diseases4030027</pub-id> <pub-id pub-id-type="pmid">28933407</pub-id></citation></ref>
<ref id="B84"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Thorvaldsdottir</surname> <given-names>H.</given-names></name> <name><surname>Robinson</surname> <given-names>J. T.</given-names></name> <name><surname>Mesirov</surname> <given-names>J. P.</given-names></name></person-group> (<year>2013</year>). <article-title>Integrative Genomics Viewer (IGV): high-performance genomics data visualization and exploration.</article-title> <source><italic>Brief. Bioinform.</italic></source> <volume>14</volume> <fpage>178</fpage>&#x2013;<lpage>192</lpage>. <pub-id pub-id-type="doi">10.1093/bib/bbs017</pub-id> <pub-id pub-id-type="pmid">22517427</pub-id></citation></ref>
<ref id="B85"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tyson</surname> <given-names>J. R.</given-names></name> <name><surname>James</surname> <given-names>P.</given-names></name> <name><surname>Stoddart</surname> <given-names>D.</given-names></name> <name><surname>Sparks</surname> <given-names>N.</given-names></name> <name><surname>Wickenhagen</surname> <given-names>A.</given-names></name> <name><surname>Hall</surname> <given-names>G.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Improvements to the ARTIC multiplex PCR method for SARS-CoV-2 genome sequencing using nanopore.</article-title> <source><italic>bioRxiv</italic></source><comment>[Preprint]</comment> <pub-id pub-id-type="doi">10.1101/2020.09.04.283077</pub-id> <pub-id pub-id-type="pmid">32908977</pub-id></citation></ref>
<ref id="B86"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Van Der Spoel</surname> <given-names>D.</given-names></name> <name><surname>Lindahl</surname> <given-names>E.</given-names></name> <name><surname>Hess</surname> <given-names>B.</given-names></name> <name><surname>Groenhof</surname> <given-names>G.</given-names></name> <name><surname>Mark</surname> <given-names>A. E.</given-names></name> <name><surname>Berendsen</surname> <given-names>H. J.</given-names></name></person-group> (<year>2005</year>). <article-title>GROMACS: fast, flexible, and free.</article-title> <source><italic>J. Comput. Chem.</italic></source> <volume>26</volume> <fpage>1701</fpage>&#x2013;<lpage>1718</lpage>. <pub-id pub-id-type="doi">10.1002/jcc.20291</pub-id> <pub-id pub-id-type="pmid">16211538</pub-id></citation></ref>
<ref id="B87"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>van der Walt</surname> <given-names>E.</given-names></name> <name><surname>Martin</surname> <given-names>D. P.</given-names></name> <name><surname>Varsani</surname> <given-names>A.</given-names></name> <name><surname>Polston</surname> <given-names>J. E.</given-names></name> <name><surname>Rybicki</surname> <given-names>E. P.</given-names></name></person-group> (<year>2008</year>). <article-title>Experimental observations of rapid Maize streak virus evolution reveal a strand-specific nucleotide substitution bias.</article-title> <source><italic>Virol. J.</italic></source> <volume>5</volume>:<issue>104</issue>. <pub-id pub-id-type="doi">10.1186/1743-422x-5-104</pub-id> <pub-id pub-id-type="pmid">18816368</pub-id></citation></ref>
<ref id="B88"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>van Dorp</surname> <given-names>L.</given-names></name> <name><surname>Richard</surname> <given-names>D.</given-names></name> <name><surname>Tan</surname> <given-names>C. C. S.</given-names></name> <name><surname>Shaw</surname> <given-names>L. P.</given-names></name> <name><surname>Acman</surname> <given-names>M.</given-names></name> <name><surname>Balloux</surname> <given-names>F.</given-names></name></person-group> (<year>2020</year>). <article-title>No evidence for increased transmissibility from recurrent mutations in SARS-CoV-2.</article-title> <source><italic>Nat. Commun.</italic></source> <volume>11</volume>:<issue>5986</issue>.</citation></ref>
<ref id="B89"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>B.</given-names></name> <name><surname>Wang</surname> <given-names>L.</given-names></name> <name><surname>Kong</surname> <given-names>X.</given-names></name> <name><surname>Geng</surname> <given-names>J.</given-names></name> <name><surname>Xiao</surname> <given-names>D.</given-names></name> <name><surname>Ma</surname> <given-names>C.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Long-term coexistence of SARS-CoV-2 with antibody response in COVID-19 patients.</article-title> <source><italic>J. Med. Virol.</italic></source> <volume>92</volume> <fpage>1684</fpage>&#x2013;<lpage>1689</lpage>. <pub-id pub-id-type="doi">10.1002/jmv.25946</pub-id> <pub-id pub-id-type="pmid">32343415</pub-id></citation></ref>
<ref id="B90"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Weisblum</surname> <given-names>Y.</given-names></name> <name><surname>Schmidt</surname> <given-names>F.</given-names></name> <name><surname>Zhang</surname> <given-names>F.</given-names></name> <name><surname>DaSilva</surname> <given-names>J.</given-names></name> <name><surname>Poston</surname> <given-names>D.</given-names></name> <name><surname>Lorenzi</surname> <given-names>J. C.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Escape from neutralizing antibodies by SARS-CoV-2 spike protein variants.</article-title> <source><italic>Elife</italic></source> <volume>9</volume>:<issue>e61312</issue>.</citation></ref>
<ref id="B91"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Williams</surname> <given-names>G. D.</given-names></name> <name><surname>Chang</surname> <given-names>R. Y.</given-names></name> <name><surname>Brian</surname> <given-names>D. A.</given-names></name></person-group> (<year>1999</year>). <article-title>A phylogenetically conserved hairpin-type 3&#x2032; untranslated region pseudoknot functions in coronavirus RNA replication.</article-title> <source><italic>J. Virol.</italic></source> <volume>73</volume> <fpage>8349</fpage>&#x2013;<lpage>8355</lpage>. <pub-id pub-id-type="doi">10.1128/jvi.73.10.8349-8355.1999</pub-id> <pub-id pub-id-type="pmid">10482585</pub-id></citation></ref>
<ref id="B92"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Woloshin</surname> <given-names>S.</given-names></name> <name><surname>Patel</surname> <given-names>N.</given-names></name> <name><surname>Kesselheim</surname> <given-names>A. S.</given-names></name></person-group> (<year>2020</year>). <article-title>False negative tests for SARS-CoV-2 Infection &#x2013; challenges and implications.</article-title> <source><italic>N. Engl. J. Med.</italic></source> <volume>383</volume>:<issue>e38</issue>. <pub-id pub-id-type="doi">10.1056/nejmp2015897</pub-id> <pub-id pub-id-type="pmid">32502334</pub-id></citation></ref>
<ref id="B93"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wright</surname> <given-names>C. F.</given-names></name> <name><surname>Morelli</surname> <given-names>M. J.</given-names></name> <name><surname>Thebaud</surname> <given-names>G.</given-names></name> <name><surname>Knowles</surname> <given-names>N. J.</given-names></name> <name><surname>Herzyk</surname> <given-names>P.</given-names></name> <name><surname>Paton</surname> <given-names>D. J.</given-names></name><etal/></person-group> (<year>2011</year>). <article-title>Beyond the consensus: dissecting within-host viral population diversity of foot-and-mouth disease virus by using next-generation genome sequencing.</article-title> <source><italic>J. Virol.</italic></source> <volume>85</volume> <fpage>2266</fpage>&#x2013;<lpage>2275</lpage>. <pub-id pub-id-type="doi">10.1128/jvi.01396-10</pub-id> <pub-id pub-id-type="pmid">21159860</pub-id></citation></ref>
<ref id="B94"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>A.</given-names></name> <name><surname>Peng</surname> <given-names>Y.</given-names></name> <name><surname>Huang</surname> <given-names>B.</given-names></name> <name><surname>Ding</surname> <given-names>X.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Niu</surname> <given-names>P.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Genome composition and divergence of the novel coronavirus (2019-nCoV) Originating in China.</article-title> <source><italic>Cell Host Microbe</italic></source> <volume>27</volume> <fpage>325</fpage>&#x2013;<lpage>328</lpage>. <pub-id pub-id-type="doi">10.1016/j.chom.2020.02.001</pub-id> <pub-id pub-id-type="pmid">32035028</pub-id></citation></ref>
<ref id="B95"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xia</surname> <given-names>H.</given-names></name> <name><surname>Cao</surname> <given-names>Z.</given-names></name> <name><surname>Xie</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Chen</surname> <given-names>J. Y.</given-names></name> <name><surname>Wang</surname> <given-names>H.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Evasion of Type I interferon by SARS-CoV-2.</article-title> <source><italic>Cell Rep.</italic></source> <volume>33</volume>:<issue>108234</issue>.</citation></ref>
<ref id="B96"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yeh</surname> <given-names>T. Y.</given-names></name> <name><surname>Contreras</surname> <given-names>G. P.</given-names></name></person-group> (<year>2020</year>). <article-title>Emerging viral mutants in Australia suggest RNA recombination event in the SARS-CoV-2 genome.</article-title> <source><italic>Med. J. Aust.</italic></source> <volume>213</volume> <fpage>44</fpage>&#x2013;<lpage>e1</lpage>.</citation></ref>
<ref id="B97"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yurkovetskiy</surname> <given-names>L.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Pascal</surname> <given-names>K. E.</given-names></name> <name><surname>Tomkins-Tinch</surname> <given-names>C.</given-names></name> <name><surname>Nyalile</surname> <given-names>T. P.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Structural and functional analysis of the D614G SARS-CoV-2 spike protein variant.</article-title> <source><italic>Cell</italic></source> <volume>183</volume> <fpage>739</fpage>&#x2013;<lpage>51 e8</lpage>.</citation></ref>
<ref id="B98"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>L.</given-names></name> <name><surname>Jackson</surname> <given-names>C. B.</given-names></name> <name><surname>Mou</surname> <given-names>H.</given-names></name> <name><surname>Ojha</surname> <given-names>A.</given-names></name> <name><surname>Peng</surname> <given-names>H.</given-names></name> <name><surname>Quinlan</surname> <given-names>B. D.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>SARS-CoV-2 spike-protein D614G mutation increases virion spike density and infectivity.</article-title> <source><italic>Nat. Commun.</italic></source> <volume>11</volume>:<issue>6013</issue>.</citation></ref>
<ref id="B99"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Chen</surname> <given-names>Y.</given-names></name> <name><surname>Luo</surname> <given-names>B.</given-names></name> <name><surname>Yuan</surname> <given-names>Y.</given-names></name> <name><surname>Huang</surname> <given-names>F.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>The ORF8 Protein of SARS-CoV-2 mediates immune evasion through potently downregulating MHC-I.</article-title> <source><italic>bioRxiv</italic></source><comment>[Preprint]</comment> <pub-id pub-id-type="doi">10.1101/2020.05.24.111823</pub-id> bioRxiv 2020.05.24.111823,</citation></ref>
</ref-list><fn-group>
<fn id="footnote1">
<label>1</label>
<p><ext-link ext-link-type="uri" xlink:href="https://coronavirus.jhu.edu/map.html">https://coronavirus.jhu.edu/map.html</ext-link></p></fn>
<fn id="footnote2">
<label>2</label>
<p><ext-link ext-link-type="uri" xlink:href="https://github.com/CDCgov/SARS-CoV-2_Sequencing">https://github.com/CDCgov/SARS-CoV-2_Sequencing</ext-link></p></fn>
<fn id="footnote3">
<label>3</label>
<p><ext-link ext-link-type="uri" xlink:href="https://www.gisaid.org/">https://www.gisaid.org/</ext-link></p></fn>
<fn id="footnote4">
<label>4</label>
<p><ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/sra">https://www.ncbi.nlm.nih.gov/sra</ext-link></p></fn>
<fn id="footnote5">
<label>5</label>
<p><ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/sars-cov-2/">https://www.ncbi.nlm.nih.gov/sars-cov-2/</ext-link></p></fn>
<fn id="footnote6">
<label>6</label>
<p><ext-link ext-link-type="uri" xlink:href="https://www.gisaid.org/">https://www.gisaid.org/</ext-link></p></fn>
<fn id="footnote7">
<label>7</label>
<p><ext-link ext-link-type="uri" xlink:href="https://github.com/ncbi/sra-tools">https://github.com/ncbi/sra-tools</ext-link></p></fn>
<fn id="footnote8">
<label>8</label>
<p><ext-link ext-link-type="uri" xlink:href="https://github.com/ekg/freebayes">https://github.com/ekg/freebayes</ext-link></p></fn>
<fn id="footnote9">
<label>9</label>
<p><ext-link ext-link-type="uri" xlink:href="https://jacquard.readthedocs.io/en/v0.42/index.html">https://jacquard.readthedocs.io/en/v0.42/index.html</ext-link></p></fn>
<fn id="footnote10">
<label>10</label>
<p><ext-link ext-link-type="uri" xlink:href="https://github.com/vcflib/vcflib">https://github.com/vcflib/vcflib</ext-link></p></fn>
<fn id="footnote11">
<label>11</label>
<p><ext-link ext-link-type="uri" xlink:href="https://www.graphpad.com/scientific-software/prism/">https://www.graphpad.com/scientific-software/prism/</ext-link></p></fn>
<fn id="footnote12">
<label>12</label>
<p><ext-link ext-link-type="uri" xlink:href="http://software.broadinstitute.org/software/igv/home">http://software.broadinstitute.org/software/igv/home</ext-link></p></fn>
<fn id="footnote13">
<label>13</label>
<p><ext-link ext-link-type="uri" xlink:href="https://instrain.readthedocs.io/en/latest/">https://instrain.readthedocs.io/en/latest/</ext-link></p></fn>
<fn id="footnote14">
<label>14</label>
<p><ext-link ext-link-type="uri" xlink:href="http://manual.gromacs.org/documentation/">http://manual.gromacs.org/documentation/</ext-link></p></fn>
<fn id="footnote15">
<label>15</label>
<p><ext-link ext-link-type="uri" xlink:href="https://usegalaxy.org/">https://usegalaxy.org/</ext-link></p></fn>
</fn-group>
</back>
</article>