<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<?covid-19-tdm?>
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Public Health</journal-id>
<journal-title>Frontiers in Public Health</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Public Health</abbrev-journal-title>
<issn pub-type="epub">2296-2565</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpubh.2024.1491623</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Public Health</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Trends and impacts of SARS-CoV-2 genome sharing: a comparative analysis of China and the global community, 2020&#x2013;2023</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Feng</surname> <given-names>Yenan</given-names></name>
<uri xlink:href="https://loop.frontiersin.org/people/2702153/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Chen</surname> <given-names>Songqi</given-names></name>
<uri xlink:href="https://loop.frontiersin.org/people/2887244/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname> <given-names>Anqi</given-names></name>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhao</surname> <given-names>Zhongfu</given-names></name>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Chen</surname> <given-names>Cao</given-names></name>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/561367/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff><institution>National Key Laboratory of Intelligent Tracking and Forecasting for Infectious Diseases, NHC Key Laboratory of Medical Virology and Viral Diseases, National Institute for Viral Disease Control and Prevention, Chinese Center for Disease Control and Prevention</institution>, <addr-line>Beijing</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by" id="fn0004">
<p>Edited by: Jessica L. Jones, United States Food and Drug Administration, United States</p>
</fn>
<fn fn-type="edited-by" id="fn0005">
<p>Reviewed by: Guennadi Kouzaev, Norwegian University of Science and Technology, Norway</p>
<p>Ruth Evangeline Timme, US Food and Drug Administration, United States</p>
<p>Mohammad Khalid, Link&#x00F6;ping University Hospital, Sweden</p>
<p>Otun Saha, Washington State University, United States</p>
<p>Neha Periwal, Jamia Hamdard University, India</p>
</fn>
<corresp id="c001">&#x002A;Correspondence: Cao Chen, <email>chencao@ivdc.chinacdc.cn</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>20</day>
<month>11</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>12</volume>
<elocation-id>1491623</elocation-id>
<history>
<date date-type="received">
<day>05</day>
<month>09</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>07</day>
<month>11</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2024 Feng, Chen, Wang, Zhao and Chen.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Feng, Chen, Wang, Zhao and Chen</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec id="sec1">
<title>Objective</title>
<p>The global sharing of pathogen genome sequences has been significantly expedited by the COVID-19 pandemic. This study aims to elucidate the global landscape of SARS-CoV-2 genome sharing between 2020 and 2023 with a focus on quantity, timeliness, and quality. Specifically, the characteristics of China are examined.</p>
</sec>
<sec id="sec2">
<title>Methods</title>
<p>SARS-CoV-2 genomes along with associated metadata were sourced from GISAID database. The genomes were analyzed to evaluate the quantity, timeliness, and quality across different countries/regions. The metadata characteristics of shared genomes in China in 2023 were examined and compared with the actual demographic data of China in 2023.</p>
</sec>
<sec id="sec3">
<title>Results</title>
<p>From 2020 to 2023, European countries consistently maintained high levels of genomic data sharing in terms of quantity, timeliness, and quality. In 2023, China made remarkable improvements in sequence sharing, ranking among the top 3.89% globally for quantity, 22.78% for timeliness, and 17.78% for quality. The genome sharing in China in 2023 covered all provinces with Shanghai Municipality contributing the most genomes. Human samples accounted for 99.73% of the shared genomes and exhibited three distinct peaks in collection dates. Males constituted 52.06%, while females constituted 47.94%. Notably, there was an increase in individuals aged 65 and above within the GISAID database compared to China&#x2019;s overall population in 2023.</p>
</sec>
<sec id="sec4">
<title>Conclusion</title>
<p>The global sharing of SARS-CoV-2 genomes in 2020&#x2013;2023 exhibited disparities in terms of quantity, timeliness, and quality. However, China has made significant advancements since 2023 by achieving comprehensive coverage across provinces, timely dissemination of data, and widespread population monitoring. Strengthening data sharing capabilities in countries like China during the SARS-CoV-2 pandemic will play a crucial role in containing and responding to future pandemics caused by emerging pathogens.</p>
</sec>
</abstract>
<kwd-group>
<kwd>SARS-CoV-2</kwd>
<kwd>genomic sequence</kwd>
<kwd>data sharing</kwd>
<kwd>GISAID</kwd>
<kwd>China</kwd>
</kwd-group>
<contract-sponsor id="cn1">National Natural Science Foundation of China<named-content content-type="fundref-id">10.13039/501100001809</named-content></contract-sponsor>
<contract-sponsor id="cn2">National key research and development program of China<named-content content-type="fundref-id">10.13039/501100012166</named-content></contract-sponsor>
<counts>
<fig-count count="4"/>
<table-count count="0"/>
<equation-count count="0"/>
<ref-count count="44"/>
<page-count count="8"/>
<word-count count="5535"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Infectious Diseases: Epidemiology and Prevention</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec5">
<label>1</label>
<title>Introduction</title>
<p>The timely sharing of genomic sequences and associated metadata has played a crucial role in promoting global data awareness, enhancing our understanding of pathogenic evolution characteristics, and facilitating the development of detection reagents, vaccines, and drugs (<xref ref-type="bibr" rid="ref1 ref2 ref3 ref4">1&#x2013;4</xref>), which was particularly evident during the COVID-19 pandemic (<xref ref-type="bibr" rid="ref5">5</xref>, <xref ref-type="bibr" rid="ref6">6</xref>). Since January 10, 2020, when China released the initial genome sequence of SARS-CoV-2 into the Global Initiative on Sharing All Influenza Data (GISAID) database, more than 16 million SARS-CoV-2 genomes have been stored in GISAID to date (<xref ref-type="bibr" rid="ref7">7</xref>). In contrast, GenBank, serves as another prominent repository, hosted a collection of over 8 million SARS-CoV-2 genomes (<xref ref-type="bibr" rid="ref8">8</xref>). It encompasses all other International Nucleotide Sequence Database Collaboration (INSDC) databases (<xref ref-type="bibr" rid="ref9">9</xref>), including the European Nucleotide Archive (ENA) (<xref ref-type="bibr" rid="ref10">10</xref>) and the DNA Data Bank of Japan (DDBJ) (<xref ref-type="bibr" rid="ref11">11</xref>). Other repositories, such as the China National Center for Bioinformation GenBase (<xref ref-type="bibr" rid="ref12">12</xref>), have stored more than 40,000 SARS-CoV-2 genomes. The China National GeneBank DataBase (CNGBdb) (<xref ref-type="bibr" rid="ref13">13</xref>), along with the Novel Coronavirus National Science and Technology Resource Service System at the National Microbiology Data Center, has, respectively, collected a small number of 87 and 305 SARS-CoV-2 genomes (<xref ref-type="bibr" rid="ref9">9</xref>). Several analysis platforms such as Outbreak. Info (<xref ref-type="bibr" rid="ref14">14</xref>), <ext-link xlink:href="http://Cov-Spectrum.org" ext-link-type="uri">Cov-Spectrum.org</ext-link> (<xref ref-type="bibr" rid="ref15">15</xref>), and <ext-link xlink:href="http://CoVariants.org" ext-link-type="uri">CoVariants.org</ext-link> (<xref ref-type="bibr" rid="ref16">16</xref>) were opportunistically developed utilizing these publicly available datasets. Additionally, there have been numerous intriguing investigations conducted based on the sharing of SARS-CoV-2 data to further explore the virus&#x2019;s structure, pathogenic mechanisms, mutation biases, and more (<xref ref-type="bibr" rid="ref17 ref18 ref19 ref20 ref21 ref22 ref23">17&#x2013;23</xref>). The World Health Organization (WHO) Guiding Principles for Pathogen Genome Data Sharing (<xref ref-type="bibr" rid="ref24">24</xref>) advocate for the timely and high-quality sharing of genome data; however, there exists significant variation in data sharing levels among different countries/regions (<xref ref-type="bibr" rid="ref25">25</xref>). The increased sharing of data may further exacerbate these imbalances and discrepancies. Therefore, a comprehensive understanding of global disparities in shared genomes can enhance objectivity when interpreting genomic data-driven analyses.</p>
<p>During the COVID-19 pandemic from 2020 to 2023, China endeavored to disseminate newly identified SARS-CoV-2 genomes through public databases, encompassing those obtained from the initial COVID-19 patient and the first SARS-CoV-2 genome isolated from the external packaging of cold-chain products (<xref ref-type="bibr" rid="ref26">26</xref>). However, there is a limited scope for comprehensive assessment and comparison China with other global regions during this timeframe. Therefore, this study aims to comprehensively analyze the global landscape of SARS-CoV-2 genome sharing between 2020 and 2023, focusing on quantity, timeliness, and quality of shared genomes. Additionally, it will specifically examine China&#x2019;s characteristics in terms of sharing SARS-CoV-2 genomes. Considering the challenges associated with integrating SARS-CoV-2 genome data from diverse repositories, including sequence discrepancies and inconsistent metadata, we opted to utilize the GISAID database as our source for this study due to its extensive collection of SARS-CoV-2 sequences and comprehensive meta-information that surpasses other available resources (<xref ref-type="bibr" rid="ref9">9</xref>).</p>
</sec>
<sec sec-type="materials|methods" id="sec6">
<label>2</label>
<title>Materials and methods</title>
<sec id="sec7">
<label>2.1</label>
<title>Data source</title>
<p>The genome and metadata of SARS-CoV-2 were obtained from the GISAID database on October 7, 2024. The total population data of China in 2023 was extracted from National Bureau of Statistics of China.<xref ref-type="fn" rid="fn0001"><sup>1</sup></xref> The standard map [No. GS (2023) 2767] was downloaded without modification from the standard map service website of the National Administration of Surveying, Mapping and Geographic Information. The global and Chinese count of reported cases was sourced from John Hopkins University and the WHO<xref ref-type="fn" rid="fn0002"><sup>2</sup></xref> via the Global Epidemic Analysis and Risk Assessment Platform of China CDC.</p>
</sec>
<sec id="sec8">
<label>2.2</label>
<title>Inclusion criteria and data management</title>
<p>The period for genome submission ranged from January 1, 2020 to December 31, 2023. Genomes meeting the criteria of providing complete country of origin information and sampling date. The sampling date no later than the submit date were included in the analysis. The genomes were classified according to the continent and country/region, based on the information provided in the &#x201C;Location&#x201D; field of the metadata associated with each genome, indicating the geographical locations where samples were collected. The high-quality whole genome sequences were filtered with a length above 29,000&#x2009;nt and Ns &#x2264;5% in the entire genome. Genomes from China does not include the Hong Kong Special Administrative Region (SAR), Macau SAR, and Taiwan, China. To investigate the characteristics of shared SARS-CoV-2 genomes in China during 2023, we extracted genomes from GISAID submissions originating from China, covering the period from January 1 to December 31, 2023. The analysis included only individuals classified as &#x201C;male&#x201D; or &#x201C;female&#x201D; for gender, and age was limited to numeric values ranging from 0 to 200, excluding any symbols other than the decimal point. Genomes meeting both the sex and age criteria were selected for inclusion in the analysis. The prevalence of variants in each year were analyzed based on the information provided in the &#x201C;Variant&#x201D; field of the metadata associated with each genome. The proportion of each variant to the total number of shared genomes in each year was calculated.</p>
</sec>
<sec id="sec9">
<label>2.3</label>
<title>Statistical analysis</title>
<p>Descriptive analysis was conducted to present the general characteristics of the genomes sharing. Continuous variables were reported using the median and interquartile range (IQR), while categorical variables were presented as counts and proportions. Structured Query Language and Python were used for data cleaning, processing, and generating descriptive statistics, while both Python and GraphPad Prism 9 (GraphPad Software, Inc., LaJolla, CA, United States) were employed for data visualization. Detailed information and codes can be found.<xref ref-type="fn" rid="fn0003"><sup>3</sup></xref></p>
</sec>
</sec>
<sec sec-type="results" id="sec10">
<label>3</label>
<title>Results</title>
<sec id="sec11">
<label>3.1</label>
<title>The sharing of SARS-CoV-2 genomes exhibited worldwide variation in quantity and timeliness across continents</title>
<p>From 2020 to 2023, by searching in the GISAID database, a total of 222 countries/regions actively contributed 16,001,611 SARS-CoV-2 genomes. The annual counts were as follows: 142, 205, 210, and 180 countries/regions with genome contributions amounting to 307,565; 6,205,472; 7,623,101; and 1,865,473, respectively. Compared to the number of reported cases each year, we observed a similar trend between the number of cases and the sharing of genomes. Since the emergence of variants of concern (VOCs), variants of interest (VOIs) and variants under monitoring (VUMs) of the WHO, there has been a global increase in both reported cases and shared genomes. The peak in both case numbers and genome sharing occurred with the Omicron variant in 2022 (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figure S1A</xref>).</p>
<p>Next, the numbers and median deposition days of genomes were analyzed across six continents. Certain European, North American, and Asian countries/regions, such as United Kingdom, United States of America, and Singapore et al., exhibited both substantial numbers of shared genomes and short median deposition days (<xref ref-type="fig" rid="fig1">Figure 1A</xref>). Overall, the European region consistently contributed a high median number of shared genomes and short median deposition days throughout the years from 2020 to 2023, indicating the continuity and timeliness of genome sharing in Europe (<xref ref-type="fig" rid="fig1">Figures 1B</xref>,<xref ref-type="fig" rid="fig1">C</xref>). Compared to the prior to 2023, the number and timeliness of shared genomes in China in 2023 (total number: 64302; median deposition days: 27, IQR: 16&#x2013;52) were far higher than those of countries/regions in Asian (median total numbers: 1258.5, IQR: 265.3&#x2013;4391.5; median deposition days: 55.3, IQR: 31.5&#x2013;145.3) and were among the forefront of the world (<xref ref-type="fig" rid="fig1">Figures 1A</xref>&#x2013;<xref ref-type="fig" rid="fig1">C</xref>).</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>The total number of shared genomes and the median time of genome deposition for each country/region in the period from 2020 to 2023. (A) Scatter plots of total number of shared genomes and the median time of genome deposition for each country or region. (B) Box plots of total number of shared genomes for each country/region divided by six continents. (C) Box plots of median time of genome deposition for each country/region divided by six continents. China was marked in red plot. Median time of genome deposition means the time interval between sample collection and genome sharing.</p>
</caption>
<graphic xlink:href="fpubh-12-1491623-g001.tif"/>
</fig>
</sec>
<sec id="sec12">
<label>3.2</label>
<title>The levels in SARS-CoV-2 genome sharing from China remarkable strengthened in 2023</title>
<p>To further investigate the features of Chinese SARS-CoV-2 genome sharing, we analyzed the SARS-CoV-2 genomes shared by China on GISAID database. The results showed that the number of shared sequences increased significantly in 2023 compared to the period of 2020&#x2013;2022 (<xref ref-type="fig" rid="fig2">Figures 2A</xref>,<xref ref-type="fig" rid="fig2">B</xref>). The highest number of shared sequences occurred in January 2023 (total numbers: 9862), with two small peaks observed in April (total numbers: 7963) and June (total numbers: 8136), followed by a smaller peak in November (total numbers: 3198). Although the number of shared genomes in China from 2020 to 2022 was very similar, the relative ranking of China&#x2019;s shared genomes in the global countries/regions was lower than the median level in 2021 and 2022, indicating a relatively lower level of genome sharing compared to other regions worldwide. The number of genomes shared by China in 2023 significantly exceeded the median number of shared genomes of countries/regions globally (median total numbers: 379.5, IQR: 94.8&#x2013;4049.3), and the relative ranking of China had risen to the top 3.89% globally (<xref ref-type="fig" rid="fig2">Figure 2C</xref>). Similarly to the global, there were much closer trend between the number of reported cases and the sharing of genomes in China (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figure S1B</xref>).</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>The number of shared genomes and the median time of genome deposition in China from 2020 to 2023. (A) The number of shared genomes in China each month during 2020&#x2013;2023. (B) The median number of shared genomes in China and among countries/regions worldwide from 2020 to 2023. (C) The relative ranking of China in the world on the number of shared genomes during 2020&#x2013;2023. (D) The median time of genome deposition in China during 2020&#x2013;2023. (E) The median time of genome deposition in China and among countries/regions worldwide from 2020 to 2023. (F) The relative ranking of China in the world on the median time of genome deposition during 2020&#x2013;2023.</p>
</caption>
<graphic xlink:href="fpubh-12-1491623-g002.tif"/>
</fig>
<p>The timeliness trend of genome sharing also demonstrates similar patterns, with a more consistent and shorter time interval in 2023 compared to the period of 2020&#x2013;2022 in China (<xref ref-type="fig" rid="fig2">Figure 2D</xref>). In 2023, the median deposition period for genome sharing in China was notably shorter compared to the global median (median deposition days: 56.3, IQR: 28.8&#x2013;173.0; <xref ref-type="fig" rid="fig2">Figure 2E</xref>), ranking among the top 22.78% worldwide (<xref ref-type="fig" rid="fig2">Figure 2F</xref>). Therefore, in contrast to the increased global median deposition days for genomes in 2023, China has achieved advancements in both quantity and timeliness.</p>
</sec>
<sec id="sec13">
<label>3.3</label>
<title>Noticeable disparity existed in the quality of globally shared genomes</title>
<p>Subsequently, we conducted a comprehensive analysis on the quality of the SARS-CoV-2 genomes shared via GISAID. Genomes with a length above 29,000&#x2009;nt were selected, while low coverage sequences (Ns &#x003E;5%) were excluded to obtain high-quality whole genome sequences. The ratio of these high-quality sequences to the total number of sequences was then calculated. The findings indicated that there was high median ratio and minimal degree of dispersion in genome quality among the shared genomes from European countries/regions from 2020 to 2023, demonstrating consistently high quality in European countries/regions (<xref ref-type="fig" rid="fig3">Figure 3A</xref>). Conversely, African exhibited a lower median ratio and a wider dispersion, suggesting an overall lower quality of genomic sequences with significant disparities among countries/regions. Notably, the quality of genomes shared from China in 2020 falls below the median level for Asia and globally. However, it demonstrated steady improvement over subsequent years, surpassing the median levels of both Asia and global by 2022 (<xref ref-type="fig" rid="fig3">Figures 3A</xref>,<xref ref-type="fig" rid="fig3">B</xref>). By 2023, the relative ranking of quality for shared genomes by China has risen to the top 17.78% worldwide (<xref ref-type="fig" rid="fig3">Figure 3C</xref>).</p>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption>
<p>The quality of shared genomes for each country/region in the period from 2020 to 2023. (A) Box plots depict the percentage of genome with high coverage, excluding genomes below 29,000&#x2009;nt and those with over 5% Ns, for each country or region across six continents. China was marked in red plots. (B) The percentage of genomes with high coverage of China in each year during 2020&#x2013;2023. (C) The relative ranking of China in the world on the percentage of genomes with high coverage during 2020&#x2013;2023.</p>
</caption>
<graphic xlink:href="fpubh-12-1491623-g003.tif"/>
</fig>
</sec>
<sec id="sec14">
<label>3.4</label>
<title>The genomic metadata shared in China in 2023 exhibited distinct characteristics</title>
<p>In 2023, China made great efforts in genome sharing. Although all provinces in China shared genomic sequences, there were regional differences. Shanghai Municipality, Guangdong Province, and Beijing Municipality shared the most sequences, with 9,970, 6,310, and 4,693, respectively, (<xref ref-type="fig" rid="fig4">Figure 4A</xref>). From the species composition of the shared sequence, the vast majority were human samples (total number: 64,302), followed by environmental samples (total number: 130), and 43 samples of unidentified species (<xref ref-type="fig" rid="fig4">Figure 4B</xref>), suggesting that China&#x2019;s monitoring strategy in 2023 focused primarily on population surveillance with secondary emphasis on environmental monitoring. The sampling collection dates of population data showed a concentration after December 2022 with three prominent peaks: December 2022 to January 2023, May to June 2023, and August to September 2023 (<xref ref-type="fig" rid="fig4">Figure 4C</xref>), indicating that concentrated data sharing during these periods may be associated with clustered outbreaks. Gender distribution was evenly balanced, with 52.06% male and 47.94% female. The gender composition spanned all age groups but was mainly concentrated in the age ranges of 10&#x2013;29 and 65&#x2013;74 (<xref ref-type="fig" rid="fig4">Figure 4D</xref>). A comparison between GISAID&#x2019;s sampled population distribution and China&#x2019;s total population revealed similar gender ratios but marked differences in age structure (<xref ref-type="fig" rid="fig4">Figure 4E</xref>, male: total population vs. GISAID: 51.10% vs. 52.06%). Notably, a significantly higher proportion of individuals over 65&#x2009;years old among GISAID&#x2019;s shared genomic data (<xref ref-type="fig" rid="fig4">Figure 4F</xref>, total population vs. GISAID: 15.40% vs. 30.01%), potentially attributed to specific surveillance targeting this older adult population.</p>
<fig position="float" id="fig4">
<label>Figure 4</label>
<caption>
<p>Characteristics of shared SARS-CoV-2 genomes in China in 2023. (A) The geographical distribution of SARS-CoV-2 genomes shared from China in 2023. The 395 genomes that lacked province information were excluded. (B) The host distribution of SARS-CoV-2 genomes shared from China in 2023. (C) The temporal distribution of collection dates for genomes sampled from the Chinese population and shared in 2023. (D) The distribution of sex and age of genomes sampled from the Chinese population in 2023. The 10,767 genomes lacking standard sex and age information were excluded from the analysis. (E) The sex distribution Chinese total population in 2023 between the total population and GISAID. (F) The age distribution comparison of the Chinese total population in 2023 between the total population and GISAID.</p>
</caption>
<graphic xlink:href="fpubh-12-1491623-g004.tif"/>
</fig>
</sec>
</sec>
<sec sec-type="discussion" id="sec15">
<label>4</label>
<title>Discussion</title>
<p>This study systematically analyzed and compared the differences in SARS-CoV-2 genome sharing among countries/regions across continents during the pandemic from 2020 to 2023. The study findings demonstrated that the European countries/regions had superior performance in terms of the quantity, timeliness, and quality of shared genomes compared to other countries/regions. Starting from 2023, China has made significant improvements in all these aspects, with full coverage of provinces, timely sharing, and widespread monitoring of the population. The findings of our research suggested an expansion of data sharing capacity during the SARS-CoV-2 pandemic. It will be critical in containing and responding to future pandemics caused by novel pathogens.</p>
<p>Our analysis reveals substantial disparities in global genome sharing among different countries/regions. In view of the extensive incorporation of genomic data within GISAID, conducting comprehensive genome quality control measures, such as identifying frameshifts, posed a challenge. In our study, a threshold exceeding 29,000&#x2009;nt coverage along with less than 5% Ns were utilized to define high quality whole genome sequencing. Notably, we found that Europe demonstrated a higher level of genome sharing compared to other continents, while Africa exhibits relatively lower performance. These variations may stem from diverse factors encompassing discrepancies in sequencing capacities as well as policy and financial support across nations. Consistent with our findings, most East African Community nations encountered challenges including insufficient local NGS equipment, limited bioinformatics expertise, inadequate computational resources, and ineffective data-sharing mechanisms (<xref ref-type="bibr" rid="ref27">27</xref>). However, the Public Health England has been recognized as an early leader at a national level for employing high-throughput sequencing for pathogeny surveillance (<xref ref-type="bibr" rid="ref28">28</xref>, <xref ref-type="bibr" rid="ref29">29</xref>). The quality of related metadata, in addition to sequence quality, was also deemed important. A previous study revealed a prevalent occurrence of incomplete metadata worldwide for GISAID sequences. Specifically, approximately 63% of the sequences lacked demographic information, 84% were devoid of sampling strategy details, and patient-level clinical information was missing in over 95% of the cases (<xref ref-type="bibr" rid="ref30">30</xref>). One limitation of our study is that we used the sample collection location as the country for analyzing genome sharing levels. However, there may be potential bias in assessing actual sharing performance among countries/regions due to inter-regional scientific projects leading to differences between the submitting country and sample collection country/region. Regardless, the COVID-19 pandemic undeniably propels pathogen whole-genome sequencing endeavors and facilitates data sharing.</p>
<p>The growing prevalence of shared genomes presents several challenges, such as the management of extensive public databases, and the issue of duplicate data uploading. The RCoV19 database, for instance, possesses the capability to integrate and eliminate redundant genomes as well as annotate database sources (<xref ref-type="bibr" rid="ref31">31</xref>, <xref ref-type="bibr" rid="ref32">32</xref>). RCoV19 offers a comprehensive integration of data and identifies the same genome sequences submitted to different sources by comparing key meta information (virus name, collection date, and location) as well as sequences after removing Ns and unifying the letter case (<xref ref-type="bibr" rid="ref9">9</xref>). Besides RCoV19, the VirusDIP (<xref ref-type="bibr" rid="ref33">33</xref>), ViruSurf (<xref ref-type="bibr" rid="ref34">34</xref>), and CoV-Seq (<xref ref-type="bibr" rid="ref35">35</xref>) databases also perform data integration and de-redundancy processing. However, it is worth noting that ViruSurf and CoV-Seq have not been updated since January 2022 and September 2020, respectively. On the other hand, VirusDIP integrates data from GISAID, GenBank, and CNGBdb but does not include information from GenBase and NCNSTRSS. There databases&#x2019;s efforts have greatly improved the accessibility of comprehensive datasets for users. Even then, data incompleteness is an unavoidable limitation for integration, potentially resulting in information loss due to format discrepancies across different databases. Moreover, this limitation also hampers genomic surveillance as the representation of virus distribution may be skewed due to information incompleteness on local or travel-related cases in the majority of genomes.</p>
<p>Our findings demonstrate the substantial progress made by China in sharing SARS-CoV-2 genomic data. As demonstrated in a previous study, achieving a sequencing turnaround time of less than 21&#x2009;days could serve as a benchmark for effective SARS-CoV-2 genomic surveillance (<xref ref-type="bibr" rid="ref36">36</xref>). Here, the median turnaround time for China in 2023 was 27&#x2009;days, which closely approached the aforementioned threshold, indicating a significant improvement and underscoring the imperative for sustained efforts. Before 2023, China&#x2019;s robust prevention and control measures, coupled with successful vaccination campaigns, led to a minimal incidence of cases, with the majority of domestic outbreaks attributed to imported infections from overseas. This correlation was consistent with the limited number of shared genomes by China during the period spanning 2020&#x2013;2022. Due to the adjustment of COVID-19 prevention and control policy at the end of 2022 (<xref ref-type="bibr" rid="ref37">37</xref>), coupled with the continuous evolution of Omicron variant, there was an increase in reported cases can be observed in China. Simultaneously, there has been a corresponding rise in shared genomes showing consistency. Upon analyzing the temporal distribution of shared genomes of China sampled in 2023, we observed three distinct peaks in the epidemic: December 2022 to January 2023, May 2023 to June 2023, and August 2023 to September 2023. Remarkably, these peaks closely align with the positive rate of COVID-19 among influenza-like cases reported by the China CDC (<xref ref-type="bibr" rid="ref38">38</xref>). However, there were variations in peak intensity, particularly observed during the May to June 2023 peak. Therefore, genomic data sharing can only serve as a reference for rough estimating the actual epidemic. Given that China made adjustments to its epidemic prevention and control policy at the end of 2022, timely sharing of genomic data will facilitate comprehensive and expeditious analysis of circulating variants within China by both domestic and international researchers.</p>
<p>All in all, the analysis of SARS-CoV-2 genomic data sharing during 2020&#x2013;2023 reveals significant advancements, particularly in countries like China. The efforts made by China and the global community in sequencing and sharing genome sequences during the COVID-19 pandemic undeniably contribute to advancing the One Health objective&#x2019;s requirements of ensuring discoverable, accessible, interoperable, and reusable data (<xref ref-type="bibr" rid="ref39">39</xref>). These endeavors also facilitated further researches and the evidence-based policies in response to the spread of VOCs and VOIs (<xref ref-type="bibr" rid="ref40 ref41 ref42 ref43 ref44">40&#x2013;44</xref>). However, it is crucial to acknowledge the variations in the extent of global genomic data sharing across different regions worldwide. The timely sharing of data is crucial for effectively addressing the current COVID-19 situation, as well as enhancing our preparedness for future outbreaks of emerging pathogens. Further efforts are warranted to address the disparity in global genomic data sharing and establish a universally standardized platform for data utilization, in order to promote scientific collaboration and advance research progress.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="sec16">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found at: <ext-link xlink:href="https://github.com/SongqiChen/covid19-genome-sharing-analysis" ext-link-type="uri">https://github.com/SongqiChen/covid19-genome-sharing-analysis</ext-link>.</p>
</sec>
<sec sec-type="author-contributions" id="sec17">
<title>Author contributions</title>
<p>YF: Conceptualization, Funding acquisition, Writing &#x2013; original draft. SC: Data curation, Methodology, Software, Writing &#x2013; review &#x0026; editing. AW: Investigation, Writing &#x2013; original draft. ZZ: Software, Visualization, Writing &#x2013; review &#x0026; editing. CC: Conceptualization, Funding acquisition, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec sec-type="funding-information" id="sec18">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. This work was supported by the National Natural Science Foundation of China (82341035, 82341034) and National key research and development program of China (2023YFC2306000).</p>
</sec>
<ack>
<p>We gratefully acknowledge all data contributors, i.e., the authors and their originating laboratories responsible for obtaining the specimens, and their submitting laboratories for generating the genetic sequence and metadata and sharing via the GISAID Initiative, on which this research is based.</p>
</ack>
<sec sec-type="COI-statement" id="sec19">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="sec20">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="sec21">
<title>Supplementary material</title>
<p>The Supplementary material for this article can be found online at: <ext-link xlink:href="https://www.frontiersin.org/articles/10.3389/fpubh.2024.1491623/full#supplementary-material" ext-link-type="uri">https://www.frontiersin.org/articles/10.3389/fpubh.2024.1491623/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Supplementary_file_1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<fn-group>
<fn id="fn0001">
<p><sup>1</sup>
<ext-link xlink:href="https://data.stats.gov.cn/english/easyquery.htm?cn=C01" ext-link-type="uri">https://data.stats.gov.cn/english/easyquery.htm?cn=C01</ext-link>
</p>
</fn>
<fn id="fn0002">
<p><sup>2</sup>
<ext-link xlink:href="https://covid19.who.int/data" ext-link-type="uri">https://covid19.who.int/data</ext-link>
</p>
</fn>
<fn id="fn0003">
<p><sup>3</sup>
<ext-link xlink:href="https://github.com/SongqiChen/covid19-genome-sharing-analysis" ext-link-type="uri">https://github.com/SongqiChen/covid19-genome-sharing-analysis</ext-link>
</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="ref1"><label>1.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Armstrong</surname> <given-names>GL</given-names></name> <name><surname>MacCannell</surname> <given-names>DR</given-names></name> <name><surname>Taylor</surname> <given-names>J</given-names></name> <name><surname>Carleton</surname> <given-names>HA</given-names></name> <name><surname>Neuhaus</surname> <given-names>EB</given-names></name> <name><surname>Bradbury</surname> <given-names>RS</given-names></name> <etal/></person-group>. <article-title>Pathogen genomics in public health</article-title>. <source>N Engl J Med</source>. (<year>2019</year>) <volume>381</volume>:<fpage>2569</fpage>&#x2013;<lpage>80</lpage>. doi: <pub-id pub-id-type="doi">10.1056/NEJMsr1813907</pub-id>, PMID: <pub-id pub-id-type="pmid">31881145</pub-id></citation></ref>
<ref id="ref2"><label>2.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Leguia</surname> <given-names>M</given-names></name> <name><surname>Vila-Sanjurjo</surname> <given-names>A</given-names></name> <name><surname>Chain</surname> <given-names>PSG</given-names></name> <name><surname>Berry</surname> <given-names>IM</given-names></name> <name><surname>Jarman</surname> <given-names>RG</given-names></name> <name><surname>Pollett</surname> <given-names>S</given-names></name></person-group>. <article-title>Precision medicine and precision public health in the era of pathogen next-generation sequencing</article-title>. <source>J Infect Dis</source>. (<year>2020</year>) <volume>221</volume>:<fpage>S289</fpage>&#x2013;<lpage>91</lpage>. doi: <pub-id pub-id-type="doi">10.1093/infdis/jiz424</pub-id>, PMID: <pub-id pub-id-type="pmid">31751454</pub-id></citation></ref>
<ref id="ref3"><label>3.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bonam</surname> <given-names>SR</given-names></name> <name><surname>Hu</surname> <given-names>H</given-names></name></person-group>. <article-title>Next-generation vaccines against COVID-19 variants: beyond the spike protein</article-title>. <source>Zoonoses (Burlingt)</source>. (<year>2023</year>) <volume>3</volume>. doi: <pub-id pub-id-type="doi">10.15212/ZOONOSES-2023-0003</pub-id></citation></ref>
<ref id="ref4"><label>4.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lin</surname> <given-names>L</given-names></name> <name><surname>Pei</surname> <given-names>Y</given-names></name> <name><surname>Li</surname> <given-names>Z</given-names></name> <name><surname>Luo</surname> <given-names>D</given-names></name></person-group>. <article-title>Progress and challenges of mRNA vaccines</article-title>. <source>Interdis Med</source>. (<year>2023</year>) <volume>1</volume>:<fpage>e20220008</fpage>. doi: <pub-id pub-id-type="doi">10.1002/INMD.20220008</pub-id></citation></ref>
<ref id="ref5"><label>5.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Oude Munnink</surname> <given-names>BB</given-names></name> <name><surname>Nieuwenhuijse</surname> <given-names>DF</given-names></name> <name><surname>Stein</surname> <given-names>M</given-names></name> <name><surname>O'Toole</surname> <given-names>&#x00C1;</given-names></name> <name><surname>Haverkate</surname> <given-names>M</given-names></name> <name><surname>Mollers</surname> <given-names>M</given-names></name> <etal/></person-group>. <article-title>Rapid SARS-CoV-2 whole-genome sequencing and analysis for informed public health decision-making in the Netherlands</article-title>. <source>Nat Med</source>. (<year>2020</year>) <volume>26</volume>:<fpage>1405</fpage>&#x2013;<lpage>10</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41591-020-0997-y</pub-id>, PMID: <pub-id pub-id-type="pmid">32678356</pub-id></citation></ref>
<ref id="ref6"><label>6.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cuypers</surname> <given-names>L</given-names></name> <name><surname>Dellicour</surname> <given-names>S</given-names></name> <name><surname>Hong</surname> <given-names>SL</given-names></name> <name><surname>Potter</surname> <given-names>BI</given-names></name> <name><surname>Verhasselt</surname> <given-names>B</given-names></name> <name><surname>Vereecke</surname> <given-names>N</given-names></name> <etal/></person-group>. <article-title>Two years of genomic surveillance in Belgium during the SARS-CoV-2 pandemic to attain country-wide coverage and monitor the introduction and spread of emerging variants</article-title>. <source>Viruses</source>. (<year>2022</year>) <volume>14</volume>:<fpage>2301</fpage>. doi: <pub-id pub-id-type="doi">10.3390/v14102301</pub-id>, PMID: <pub-id pub-id-type="pmid">36298856</pub-id></citation></ref>
<ref id="ref7"><label>7.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Khare</surname> <given-names>S</given-names></name> <name><surname>Gurry</surname> <given-names>C</given-names></name> <name><surname>Freitas</surname> <given-names>L</given-names></name> <name><surname>Schultz</surname> <given-names>MB</given-names></name> <name><surname>Bach</surname> <given-names>G</given-names></name> <name><surname>Diallo</surname> <given-names>A</given-names></name> <etal/></person-group>. <article-title>GISAID's role in pandemic response</article-title>. <source>China CDC Wkly</source>. (<year>2021</year>) <volume>3</volume>:<fpage>1049</fpage>&#x2013;<lpage>51</lpage>. doi: <pub-id pub-id-type="doi">10.46234/ccdcw2021.255</pub-id>, PMID: <pub-id pub-id-type="pmid">34934514</pub-id></citation></ref>
<ref id="ref8"><label>8.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Brister</surname> <given-names>JR</given-names></name> <name><surname>Ako-Adjei</surname> <given-names>D</given-names></name> <name><surname>Bao</surname> <given-names>Y</given-names></name> <name><surname>Blinkova</surname> <given-names>O</given-names></name></person-group>. <article-title>NCBI viral genomes resource</article-title>. <source>Nucleic Acids Res</source>. (<year>2015</year>) <volume>43</volume>:<fpage>D571</fpage>&#x2013;<lpage>7</lpage>. doi: <pub-id pub-id-type="doi">10.1093/nar/gku1207</pub-id></citation></ref>
<ref id="ref9"><label>9.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ma</surname> <given-names>L</given-names></name> <name><surname>Zhao</surname> <given-names>W</given-names></name> <name><surname>Huang</surname> <given-names>T</given-names></name> <name><surname>Jin</surname> <given-names>E</given-names></name> <name><surname>Wu</surname> <given-names>G</given-names></name> <name><surname>Zhao</surname> <given-names>W</given-names></name> <etal/></person-group>. <article-title>On the collection and integration of SARS-CoV-2 genome data</article-title>. <source>Biosafety and Health</source>. (<year>2023</year>) <volume>5</volume>:<fpage>204</fpage>&#x2013;<lpage>10</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.bsheal.2023.07.004</pub-id></citation></ref>
<ref id="ref10"><label>10.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tanizawa</surname> <given-names>Y</given-names></name> <name><surname>Fujisawa</surname> <given-names>T</given-names></name> <name><surname>Kodama</surname> <given-names>Y</given-names></name> <name><surname>Kosuge</surname> <given-names>T</given-names></name> <name><surname>Mashima</surname> <given-names>J</given-names></name> <name><surname>Tanjo</surname> <given-names>T</given-names></name> <etal/></person-group>. <article-title>DNA data Bank of Japan (DDBJ) update report 2022</article-title>. <source>Nucleic Acids Res</source>. (<year>2023</year>) <volume>51</volume>:<fpage>D101</fpage>&#x2013;<lpage>5</lpage>. doi: <pub-id pub-id-type="doi">10.1093/nar/gkac1083</pub-id>, PMID: <pub-id pub-id-type="pmid">36420889</pub-id></citation></ref>
<ref id="ref11"><label>11.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Okido</surname> <given-names>T</given-names></name> <name><surname>Kodama</surname> <given-names>Y</given-names></name> <name><surname>Mashima</surname> <given-names>J</given-names></name> <name><surname>Kosuge</surname> <given-names>T</given-names></name> <name><surname>Fujisawa</surname> <given-names>T</given-names></name> <name><surname>Ogasawara</surname> <given-names>O</given-names></name></person-group>. <article-title>DNA data Bank of Japan (DDBJ) update report 2021</article-title>. <source>Nucleic Acids Res</source>. (<year>2022</year>) <volume>50</volume>:<fpage>D102</fpage>&#x2013;<lpage>5</lpage>. doi: <pub-id pub-id-type="doi">10.1093/nar/gkab995</pub-id>, PMID: <pub-id pub-id-type="pmid">34751405</pub-id></citation></ref>
<ref id="ref12"><label>12.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bu</surname> <given-names>C</given-names></name> <name><surname>Zheng</surname> <given-names>X</given-names></name> <name><surname>Zhao</surname> <given-names>X</given-names></name> <name><surname>Xu</surname> <given-names>T</given-names></name> <name><surname>Bai</surname> <given-names>X</given-names></name> <name><surname>Jia</surname> <given-names>Y</given-names></name> <etal/></person-group>. <article-title>Gen Base: A Nucleotide Sequence Database</article-title>. <source>Genomics Proteomics Bioinformatics</source>. (<year>2024</year>) <volume>22</volume>. doi: <pub-id pub-id-type="doi">10.1093/gpbjnl/qzae047</pub-id>, PMID: <pub-id pub-id-type="pmid">38913867</pub-id></citation></ref>
<ref id="ref13"><label>13.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>FZ</given-names></name> <name><surname>You</surname> <given-names>LJ</given-names></name> <name><surname>Yang</surname> <given-names>F</given-names></name> <name><surname>Wang</surname> <given-names>LN</given-names></name> <name><surname>Guo</surname> <given-names>XQ</given-names></name> <name><surname>Gao</surname> <given-names>F</given-names></name> <etal/></person-group>. <article-title>CNGBdb: China national GeneBank DataBase</article-title>. <source>Yi Chuan</source>. (<year>2020</year>) <volume>42</volume>:<fpage>799</fpage>&#x2013;<lpage>809</lpage>. doi: <pub-id pub-id-type="doi">10.16288/j.yczz.20-080</pub-id>, PMID: <pub-id pub-id-type="pmid">32952115</pub-id></citation></ref>
<ref id="ref14"><label>14.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gangavarapu</surname> <given-names>K</given-names></name> <name><surname>Latif</surname> <given-names>AA</given-names></name> <name><surname>Mullen</surname> <given-names>JL</given-names></name> <name><surname>Alkuzweny</surname> <given-names>M</given-names></name> <name><surname>Hufbauer</surname> <given-names>E</given-names></name> <name><surname>Tsueng</surname> <given-names>G</given-names></name> <etal/></person-group>. <article-title>Outbreak.info genomic reports: scalable and dynamic surveillance of SARS-CoV-2 variants and mutations</article-title>. <source>Nat Methods</source>. (<year>2023</year>) <volume>20</volume>:<fpage>512</fpage>&#x2013;<lpage>22</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41592-023-01769-3</pub-id>, PMID: <pub-id pub-id-type="pmid">36823332</pub-id></citation></ref>
<ref id="ref15"><label>15.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>C</given-names></name> <name><surname>Nadeau</surname> <given-names>S</given-names></name> <name><surname>Yared</surname> <given-names>M</given-names></name> <name><surname>Voinov</surname> <given-names>P</given-names></name> <name><surname>Xie</surname> <given-names>N</given-names></name> <name><surname>Roemer</surname> <given-names>C</given-names></name> <etal/></person-group>. <article-title>CoV-Spectrum: analysis of globally shared SARS-CoV-2 data to identify and characterize new variants</article-title>. <source>Bioinformatics</source>. (<year>2022</year>) <volume>38</volume>:<fpage>1735</fpage>&#x2013;<lpage>7</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btab856</pub-id>, PMID: <pub-id pub-id-type="pmid">34954792</pub-id></citation></ref>
<ref id="ref16"><label>16.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Hodcroft</surname> <given-names>EB</given-names></name></person-group>. CoVariants: SARS-CoV-2 mutations and variants of interest. (<year>2021</year>). <comment>Available from:</comment> <ext-link xlink:href="https://covariants.org/" ext-link-type="uri">https://covariants.org/</ext-link> [Accessed October 7, 2024].</citation></ref>
<ref id="ref17"><label>17.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ansari</surname> <given-names>S</given-names></name> <name><surname>Gupta</surname> <given-names>N</given-names></name> <name><surname>Verma</surname> <given-names>R</given-names></name> <name><surname>Singh</surname> <given-names>ON</given-names></name> <name><surname>Gupta</surname> <given-names>J</given-names></name> <name><surname>Kumar</surname> <given-names>A</given-names></name> <etal/></person-group>. <article-title>Antiviral activity of the human endogenous retrovirus-R envelope protein against SARS-CoV-2</article-title>. <source>EMBO Rep</source>. (<year>2023</year>) <volume>24</volume>:<fpage>e55900</fpage>. doi: <pub-id pub-id-type="doi">10.15252/embr.202255900</pub-id></citation></ref>
<ref id="ref18"><label>18.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Periwal</surname> <given-names>N</given-names></name> <name><surname>Rathod</surname> <given-names>SB</given-names></name> <name><surname>Pal</surname> <given-names>R</given-names></name> <name><surname>Sharma</surname> <given-names>P</given-names></name> <name><surname>Nebhnani</surname> <given-names>L</given-names></name> <name><surname>Barnwal</surname> <given-names>RP</given-names></name> <etal/></person-group>. <article-title>In silico characterization of mutations circulating in SARS-CoV-2 structural proteins</article-title>. <source>J Biomol Struct Dyn</source>. (<year>2022</year>) <volume>40</volume>:<fpage>8216</fpage>&#x2013;<lpage>31</lpage>. doi: <pub-id pub-id-type="doi">10.1080/07391102.2021.1908170</pub-id>, PMID: <pub-id pub-id-type="pmid">33797336</pub-id></citation></ref>
<ref id="ref19"><label>19.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Periwal</surname> <given-names>N</given-names></name> <name><surname>Rathod</surname> <given-names>SB</given-names></name> <name><surname>Sarma</surname> <given-names>S</given-names></name> <name><surname>Johar</surname> <given-names>GS</given-names></name> <name><surname>Jain</surname> <given-names>A</given-names></name> <name><surname>Barnwal</surname> <given-names>RP</given-names></name> <etal/></person-group>. <article-title>Time series analysis of SARS-CoV-2 genomes and correlations among highly prevalent mutations</article-title>. <source>Microbiol Spectr</source>. (<year>2022</year>) <volume>10</volume>:<fpage>e0121922</fpage>. doi: <pub-id pub-id-type="doi">10.1128/spectrum.01219-22</pub-id>, PMID: <pub-id pub-id-type="pmid">36069583</pub-id></citation></ref>
<ref id="ref20"><label>20.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rice</surname> <given-names>AM</given-names></name> <name><surname>Castillo Morales</surname> <given-names>A</given-names></name> <name><surname>Ho</surname> <given-names>AT</given-names></name> <name><surname>Mordstein</surname> <given-names>C</given-names></name> <name><surname>M&#x00FC;hlhausen</surname> <given-names>S</given-names></name> <name><surname>Watson</surname> <given-names>S</given-names></name> <etal/></person-group>. <article-title>Evidence for strong mutation Bias toward, and selection against, U content in SARS-CoV-2: implications for vaccine design</article-title>. <source>Mol Biol Evol</source>. (<year>2021</year>) <volume>38</volume>:<fpage>67</fpage>&#x2013;<lpage>83</lpage>. doi: <pub-id pub-id-type="doi">10.1093/molbev/msaa188</pub-id>, PMID: <pub-id pub-id-type="pmid">32687176</pub-id></citation></ref>
<ref id="ref21"><label>21.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Periwal</surname> <given-names>N</given-names></name> <name><surname>Bhardwaj</surname> <given-names>U</given-names></name> <name><surname>Sarma</surname> <given-names>S</given-names></name> <name><surname>Arora</surname> <given-names>P</given-names></name> <name><surname>Sood</surname> <given-names>V</given-names></name></person-group>. <article-title>In silico analysis of SARS-CoV-2 genomes: insights from SARS encoded non-coding RNAs</article-title>. <source>Front Cell Infect Microbiol</source>. (<year>2022</year>) <volume>12</volume>:<fpage>966870</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fcimb.2022.966870</pub-id>, PMID: <pub-id pub-id-type="pmid">36519126</pub-id></citation></ref>
<ref id="ref22"><label>22.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>R</given-names></name> <name><surname>Chen</surname> <given-names>J</given-names></name> <name><surname>Gao</surname> <given-names>K</given-names></name> <name><surname>Hozumi</surname> <given-names>Y</given-names></name> <name><surname>Yin</surname> <given-names>C</given-names></name> <name><surname>Wei</surname> <given-names>GW</given-names></name></person-group>. <article-title>Analysis of SARS-CoV-2 mutations in the United States suggests presence of four substrains and novel variants</article-title>. <source>Commun Biol</source>. (<year>2021</year>) <volume>4</volume>:<fpage>228</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s42003-021-01754-6</pub-id>, PMID: <pub-id pub-id-type="pmid">33589648</pub-id></citation></ref>
<ref id="ref23"><label>23.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>Y</given-names></name> <name><surname>Li</surname> <given-names>S</given-names></name> <name><surname>Wu</surname> <given-names>W</given-names></name> <name><surname>Geng</surname> <given-names>S</given-names></name> <name><surname>Mao</surname> <given-names>M</given-names></name></person-group>. <article-title>Distinct mutations and lineages of SARS-CoV-2 virus in the early phase of COVID-19 pandemic and subsequent 1-year global expansion</article-title>. <source>J Med Virol</source>. (<year>2022</year>) <volume>94</volume>:<fpage>2035</fpage>&#x2013;<lpage>49</lpage>. doi: <pub-id pub-id-type="doi">10.1002/jmv.27580</pub-id>, PMID: <pub-id pub-id-type="pmid">35001392</pub-id></citation></ref>
<ref id="ref24"><label>24.</label><citation citation-type="other"><person-group person-group-type="author"><collab id="coll1">WHO</collab></person-group>. WHO guiding principles for pathogen genome data sharing. (<year>2022</year>). <comment>Available from:</comment> <ext-link xlink:href="https://iris.who.int/bitstream/handle/10665/364222/9789240061743-eng.pdf?sequence=1&#x0026;isAllowed=y" ext-link-type="uri">https://iris.who.int/bitstream/handle/10665/364222/9789240061743-eng.pdf?sequence=1&#x0026;isAllowed=y</ext-link> [Accessed October 7, 2024].</citation></ref>
<ref id="ref25"><label>25.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Khan</surname> <given-names>W</given-names></name> <name><surname>Kabir</surname> <given-names>F</given-names></name> <name><surname>Kanwar</surname> <given-names>S</given-names></name> <name><surname>Aziz</surname> <given-names>F</given-names></name> <name><surname>Muneer</surname> <given-names>S</given-names></name> <name><surname>Kalam</surname> <given-names>A</given-names></name> <etal/></person-group>. <article-title>Building up a genomic surveillance platform for SARS-CoV-2 in the middle of a pandemic: a true north-south collaboration</article-title>. <source>BMJ Glob Health</source>. (<year>2023</year>) <volume>8</volume>:<fpage>e012589</fpage>. doi: <pub-id pub-id-type="doi">10.1136/bmjgh-2023-012589</pub-id></citation></ref>
<ref id="ref26"><label>26.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ma</surname> <given-names>H</given-names></name> <name><surname>Wang</surname> <given-names>Z</given-names></name> <name><surname>Zhao</surname> <given-names>X</given-names></name> <name><surname>Han</surname> <given-names>J</given-names></name> <name><surname>Zhang</surname> <given-names>Y</given-names></name> <name><surname>Wang</surname> <given-names>H</given-names></name> <etal/></person-group>. <article-title>Long distance transmission of SARS-CoV-2 from contaminated cold Chain products to humans-Qingdao City, Shandong Province, China, September 2020</article-title>. <source>China CDC Wkly</source>. (<year>2021</year>) <volume>3</volume>:<fpage>637</fpage>&#x2013;<lpage>44</lpage>. doi: <pub-id pub-id-type="doi">10.46234/ccdcw2021.164</pub-id>, PMID: <pub-id pub-id-type="pmid">34594958</pub-id></citation></ref>
<ref id="ref27"><label>27.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nguinkal</surname> <given-names>JA</given-names></name> <name><surname>Zoclanclounon</surname> <given-names>YAB</given-names></name> <name><surname>Molina</surname> <given-names>A</given-names></name> <name><surname>Roba</surname> <given-names>A</given-names></name> <name><surname>Nyakio</surname> <given-names>NM</given-names></name> <name><surname>Lokamar</surname> <given-names>PN</given-names></name> <etal/></person-group>. <article-title>Assessment of the pathogen genomics landscape highlights disparities and challenges for effective AMR surveillance and outbreak response in the east African community</article-title>. <source>BMC Public Health</source>. (<year>2024</year>) <volume>24</volume>:<fpage>1500</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s12889-024-18990-0</pub-id>, PMID: <pub-id pub-id-type="pmid">38840103</pub-id></citation></ref>
<ref id="ref28"><label>28.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Walker</surname> <given-names>TM</given-names></name> <name><surname>Ip</surname> <given-names>CL</given-names></name> <name><surname>Harrell</surname> <given-names>RH</given-names></name> <name><surname>Evans</surname> <given-names>JT</given-names></name> <name><surname>Kapatai</surname> <given-names>G</given-names></name> <name><surname>Dedicoat</surname> <given-names>MJ</given-names></name> <etal/></person-group>. <article-title>Whole-genome sequencing to delineate <italic>Mycobacterium tuberculosis</italic> outbreaks: a retrospective observational study</article-title>. <source>Lancet Infect Dis</source>. (<year>2013</year>) <volume>13</volume>:<fpage>137</fpage>&#x2013;<lpage>46</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S1473-3099(12)70277-3</pub-id>, PMID: <pub-id pub-id-type="pmid">23158499</pub-id></citation></ref>
<ref id="ref29"><label>29.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jenkins</surname> <given-names>C</given-names></name> <name><surname>Dallman</surname> <given-names>TJ</given-names></name> <name><surname>Grant</surname> <given-names>KA</given-names></name></person-group>. <article-title>Impact of whole genome sequencing on the investigation of food-borne outbreaks of Shiga toxin-producing <italic>Escherichia coli</italic> serogroup O157: H7, England, 2013 to 2017</article-title>. <source>Euro Surveill</source>. (<year>2019</year>) <volume>24</volume>:<fpage>1800346</fpage>. doi: <pub-id pub-id-type="doi">10.2807/1560-7917.ES.2019.24.4.1800346</pub-id></citation></ref>
<ref id="ref30"><label>30.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>Z</given-names></name> <name><surname>Azman</surname> <given-names>AS</given-names></name> <name><surname>Chen</surname> <given-names>X</given-names></name> <name><surname>Zou</surname> <given-names>J</given-names></name> <name><surname>Tian</surname> <given-names>Y</given-names></name> <name><surname>Sun</surname> <given-names>R</given-names></name> <etal/></person-group>. <article-title>Global landscape of SARS-CoV-2 genomic surveillance and data sharing</article-title>. <source>Nat Genet</source>. (<year>2022</year>) <volume>54</volume>:<fpage>499</fpage>&#x2013;<lpage>507</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41588-022-01033-y</pub-id>, PMID: <pub-id pub-id-type="pmid">35347305</pub-id></citation></ref>
<ref id="ref31"><label>31.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cheng</surname> <given-names>Y</given-names></name> <name><surname>Ji</surname> <given-names>C</given-names></name> <name><surname>Zhou</surname> <given-names>HY</given-names></name> <name><surname>Zheng</surname> <given-names>H</given-names></name> <name><surname>Wu</surname> <given-names>A</given-names></name></person-group>. <article-title>Web resources for SARS-CoV-2 genomic database, annotation</article-title>. <source>Analysis and Variant Tracking Viruses</source>. (<year>2023</year>) <volume>15</volume>:<fpage>1158</fpage>. doi: <pub-id pub-id-type="doi">10.3390/v15051158</pub-id></citation></ref>
<ref id="ref32"><label>32.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gong</surname> <given-names>Z</given-names></name> <name><surname>Zhu</surname> <given-names>JW</given-names></name> <name><surname>Li</surname> <given-names>CP</given-names></name> <name><surname>Jiang</surname> <given-names>S</given-names></name> <name><surname>Ma</surname> <given-names>LN</given-names></name> <name><surname>Tang</surname> <given-names>BX</given-names></name> <etal/></person-group>. <article-title>An online coronavirus analysis platform from the National Genomics Data Center</article-title>. <source>Zool Res</source>. (<year>2020</year>) <volume>41</volume>:<fpage>705</fpage>&#x2013;<lpage>8</lpage>. doi: <pub-id pub-id-type="doi">10.24272/j.issn.2095-8137.2020.065</pub-id>, PMID: <pub-id pub-id-type="pmid">33045776</pub-id></citation></ref>
<ref id="ref33"><label>33.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>D</given-names></name> <name><surname>Yang</surname> <given-names>X</given-names></name> <name><surname>Tang</surname> <given-names>B</given-names></name> <name><surname>Pan</surname> <given-names>YH</given-names></name> <name><surname>Yang</surname> <given-names>J</given-names></name> <name><surname>Duan</surname> <given-names>G</given-names></name> <etal/></person-group>. <article-title>Coronavirus gen browser for monitoring the transmission and evolution of SARS-CoV-2</article-title>. <source>Brief Bioinform</source>. (<year>2022</year>) <volume>23</volume>. doi: <pub-id pub-id-type="doi">10.1093/bib/bbab583</pub-id>, PMID: <pub-id pub-id-type="pmid">35043153</pub-id></citation></ref>
<ref id="ref34"><label>34.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Canakoglu</surname> <given-names>A</given-names></name> <name><surname>Pinoli</surname> <given-names>P</given-names></name> <name><surname>Bernasconi</surname> <given-names>A</given-names></name> <name><surname>Alfonsi</surname> <given-names>T</given-names></name> <name><surname>Melidis</surname> <given-names>DP</given-names></name> <name><surname>Ceri</surname> <given-names>S</given-names></name></person-group>. <article-title>ViruSurf: an integrated database to investigate viral sequences</article-title>. <source>Nucleic Acids Res</source>. (<year>2021</year>) <volume>49</volume>:<fpage>D817</fpage>&#x2013;<lpage>24</lpage>. doi: <pub-id pub-id-type="doi">10.1093/nar/gkaa846</pub-id>, PMID: <pub-id pub-id-type="pmid">33045721</pub-id></citation></ref>
<ref id="ref35"><label>35.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>B</given-names></name> <name><surname>Liu</surname> <given-names>K</given-names></name> <name><surname>Zhang</surname> <given-names>H</given-names></name> <name><surname>Zhang</surname> <given-names>L</given-names></name> <name><surname>Bian</surname> <given-names>Y</given-names></name> <name><surname>Huang</surname> <given-names>L</given-names></name></person-group>. <article-title>CoV-Seq, a new tool for SARS-CoV-2 genome analysis and visualization: development and usability study</article-title>. <source>J Med Internet Res</source>. (<year>2020</year>) <volume>22</volume>:<fpage>e22299</fpage>. doi: <pub-id pub-id-type="doi">10.2196/22299</pub-id>, PMID: <pub-id pub-id-type="pmid">32931441</pub-id></citation></ref>
<ref id="ref36"><label>36.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Brito</surname> <given-names>AF</given-names></name> <name><surname>Semenova</surname> <given-names>E</given-names></name> <name><surname>Dudas</surname> <given-names>G</given-names></name> <name><surname>Hassler</surname> <given-names>GW</given-names></name> <name><surname>Kalinich</surname> <given-names>CC</given-names></name> <name><surname>Kraemer</surname> <given-names>MUG</given-names></name> <etal/></person-group>. <article-title>Global disparities in SARS-CoV-2 genomic surveillance</article-title>. <source>Nat Commun</source>. (<year>2022</year>) <volume>13</volume>:<fpage>7003</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-022-33713-y</pub-id>, PMID: <pub-id pub-id-type="pmid">36385137</pub-id></citation></ref>
<ref id="ref37"><label>37.</label><citation citation-type="other">China enters new phase of COVID response [press release]. (<year>2023</year>). <ext-link xlink:href="http://english.www.gov.cn/statecouncil/ministries/202302/24/content_WS63f7f52cc6d0a757729e726e.html" ext-link-type="uri">http://english.www.gov.cn/statecouncil/ministries/202302/24/content_WS63f7f52cc6d0a757729e726e.html</ext-link> [Accessed October 7, 2024]</citation></ref>
<ref id="ref38"><label>38.</label><citation citation-type="other"><person-group person-group-type="author"><collab id="coll2">CDC C</collab></person-group>. National situation of COVID-19 infection. (<year>2024</year>). <comment>Available from:</comment> <ext-link xlink:href="https://www.chinacdc.cn/jksj/xgbdyq/" ext-link-type="uri">https://www.chinacdc.cn/jksj/xgbdyq/</ext-link> [Accessed October 7, 2024].</citation></ref>
<ref id="ref39"><label>39.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Timme</surname> <given-names>RE</given-names></name> <name><surname>Wolfgang</surname> <given-names>WJ</given-names></name> <name><surname>Balkey</surname> <given-names>M</given-names></name> <name><surname>Venkata</surname> <given-names>SLG</given-names></name> <name><surname>Randolph</surname> <given-names>R</given-names></name> <name><surname>Allard</surname> <given-names>M</given-names></name> <etal/></person-group>. <article-title>Optimizing open data to support one health: best practices to ensure interoperability of genomic data from bacterial pathogens</article-title>. <source>One Health Outlook</source>. (<year>2020</year>) <volume>2</volume>:<fpage>20</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s42522-020-00026-3</pub-id>, PMID: <pub-id pub-id-type="pmid">33103064</pub-id></citation></ref>
<ref id="ref40"><label>40.</label><citation citation-type="other"><person-group person-group-type="author"><collab id="coll3">ECDC</collab></person-group>. Risk related to spread of new SARS-CoV-2 variants of concern in the EU/EEA. (<year>2020</year>). <comment>Available from:</comment> <ext-link xlink:href="https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-risk-related-to-spread-of-new-SARS-CoV-2-variants-EU-EEA.pdf" ext-link-type="uri">https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-risk-related-to-spread-of-new-SARS-CoV-2-variants-EU-EEA.pdf</ext-link> [Accessed October 7, 2024].</citation></ref>
<ref id="ref41"><label>41.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ye</surname> <given-names>S</given-names></name> <name><surname>Zhang</surname> <given-names>Y</given-names></name> <name><surname>Zhao</surname> <given-names>X</given-names></name> <name><surname>Yu</surname> <given-names>Z</given-names></name> <name><surname>Song</surname> <given-names>Y</given-names></name> <name><surname>Tan</surname> <given-names>Z</given-names></name> <etal/></person-group>. <article-title>Emerging variants of B.1.617 lineage identified among returning Chinese employees working in India-Chongqing municipality, China, April 2021</article-title>. <source>China CDC Wkly</source>. (<year>2021</year>) <volume>3</volume>:<fpage>409</fpage>&#x2013;<lpage>10</lpage>. doi: <pub-id pub-id-type="doi">10.46234/ccdcw2021.109</pub-id>, PMID: <pub-id pub-id-type="pmid">34594895</pub-id></citation></ref>
<ref id="ref42"><label>42.</label><citation citation-type="other"><person-group person-group-type="author"><collab id="coll4">National Center for Immunization and Respiratory Diseases (NCIRD), Division of Viral Diseases</collab></person-group>. <source>National Center for Immunization and Respiratory Diseases (NCIRD), Division of Viral Diseases. CDC COVID-19 Science Briefs [Internet]. Atlanta (GA): Centers for Disease Control and Prevention (US); 2020&#x2013;. Science Brief: Omicron (B.1.1.529) Variant</source>. (<year>2021</year>).</citation></ref>
<ref id="ref43"><label>43.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Grubaugh</surname> <given-names>ND</given-names></name> <name><surname>Hodcroft</surname> <given-names>EB</given-names></name> <name><surname>Fauver</surname> <given-names>JR</given-names></name> <name><surname>Phelan</surname> <given-names>AL</given-names></name> <name><surname>Cevik</surname> <given-names>M</given-names></name></person-group>. <article-title>Public health actions to control new SARS-CoV-2 variants</article-title>. <source>Cell</source>. (<year>2021</year>) <volume>184</volume>:<fpage>1127</fpage>&#x2013;<lpage>32</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cell.2021.01.044</pub-id>, PMID: <pub-id pub-id-type="pmid">33581746</pub-id></citation></ref>
<ref id="ref44"><label>44.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liang</surname> <given-names>J</given-names></name> <name><surname>Zeng</surname> <given-names>Z</given-names></name> <name><surname>Li</surname> <given-names>Q</given-names></name> <name><surname>Guan</surname> <given-names>W</given-names></name> <name><surname>Yang</surname> <given-names>Z</given-names></name> <name><surname>Hon</surname> <given-names>C</given-names></name></person-group>. <article-title>Challenge on prediction of influenza virus and SARS-CoV-2 virus co-circulation</article-title>. <source>Interdis Med</source>. (<year>2023</year>) <volume>1</volume>:<fpage>e20220006</fpage>. doi: <pub-id pub-id-type="doi">10.1002/INMD.20220006</pub-id></citation></ref>
</ref-list>
</back>
</article>