<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Microbiomes</journal-id>
<journal-title>Frontiers in Microbiomes</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Microbiomes</abbrev-journal-title>
<issn pub-type="epub">2813-4338</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frmbi.2022.1097124</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Microbiomes</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Identification and spatio-temporal tracking of ubiquitous phage families in the human microbiome</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Tadmor</surname>
<given-names>Arbel D.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2074955"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Mahmoudabadi</surname>
<given-names>Gita</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2118407"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Foley</surname>
<given-names>Helen B.</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Phillips</surname>
<given-names>Rob</given-names>
</name>
<xref ref-type="aff" rid="aff7">
<sup>7</sup>
</xref>
<xref ref-type="aff" rid="aff8">
<sup>8</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1314351"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>TRON - Translational Oncology at the University Medical Center of the Johannes Gutenberg University Mainz</institution>, <addr-line>Mainz</addr-line>, <country>Germany</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Biochemistry and Molecular Biophysics, California Institute of Technology</institution>, <addr-line>Pasadena, CA</addr-line>, <country>United States</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Department of Bioengineering, California Institute of Technology</institution>, <addr-line>Pasadena, CA</addr-line>, <country>United States</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Department of Bioengineering, Stanford University</institution>, <addr-line>Stanford, CA</addr-line>, <country>United States</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Department of Preventive Medicine, USC Keck School of Medicine</institution>, <addr-line>Los Angeles, CA</addr-line>, <country>United States</country>
</aff>
<aff id="aff6">
<sup>6</sup>
<institution>Department of Applied Physics, California Institute of Technology</institution>, <addr-line>Pasadena, CA</addr-line>, <country>United States</country>
</aff>
<aff id="aff7">
<sup>7</sup>
<institution>Department of Physics, California Institute of Technology</institution>, <addr-line>Pasadena, CA</addr-line>, <country>United States</country>
</aff>
<aff id="aff8">
<sup>8</sup>
<institution>Division of Biology and Biological Engineering, California Institute of Technology</institution>, <addr-line>Pasadena, CA</addr-line>, <country>United States</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Mina Rho, Hanyang University, Republic of Korea</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Eugeni Belda, Institut de Recherche pour le Development (IRD), France; Tasha M. Santiago-Rodriguez, Diversigen, United States</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Arbel D. Tadmor, <email xlink:href="mailto:arbel.tadmor@tron-mainz.de">arbel.tadmor@tron-mainz.de</email>; Gita Mahmoudabadi, <email xlink:href="mailto:arbel.tadmor@tron-mainz.de">gitam@stanford.edu</email>; Rob Phillips, <email xlink:href="mailto:arbel.tadmor@tron-mainz.de">phillips@pboc.caltech.edu</email>
</p>
</fn>
<fn fn-type="equal" id="fn003">
<p>&#x2020;These authors have contributed equally to this work</p>
</fn>
<fn fn-type="other" id="fn002">
<p>This article was submitted to Omics Approaches, a section of the journal Frontiers in Microbiomes</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>14</day>
<month>02</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>1</volume>
<elocation-id>1097124</elocation-id>
<history>
<date date-type="received">
<day>13</day>
<month>11</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>16</day>
<month>12</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Tadmor, Mahmoudabadi, Foley and Phillips</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Tadmor, Mahmoudabadi, Foley and Phillips</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Viruses are a major component of the human microbiome, yet their diversity, lifestyles, spatiotemporal dynamics, and functional impact are not well understood. Elucidating the ecology of human associated phages may have a major impact on human health due to the potential ability of phages to modulate the abundance and phenotype of commensal bacteria. Analyzing 690 Human Microbiome Project metagenomes from 103 subjects sampled across up to 18 habitats, we found that despite the great interpersonal diversity observed among human viromes, humans harbor distinct phage families characterized by their shared conserved hallmark genes known as large terminase subunit (TerL) genes. Phylogenetic analysis of these phage families revealed that different habitats in the oral cavity and gut have unique phage community structures. Over a ~7-month timescale most of these phage families persisted in the oral cavity and gut, however, presence in certain oral habitats appeared to be transitory, possibly due to host migration within the oral cavity. Interestingly, certain phage families were found to be highly correlated with pathogenic, carriage and disease-related isolates, and may potentially serve as novel biomarkers for disease. Our findings shed new light on the core human virome and offer a metagenomic-independent way to probe the core virome using widely shared conserved phage markers.</p>
</abstract>
<kwd-group>
<kwd>core human virome</kwd>
<kwd>human phage markers</kwd>
<kwd>phageome</kwd>
<kwd>human microbiome</kwd>
<kwd>oral virome</kwd>
<kwd>metagenome clustering</kwd>
<kwd>metagenomic clustering by reference library</kwd>
<kwd>MCRL</kwd>
</kwd-group>
<contract-num rid="cn001">R01-GM098465, Pioneer Award</contract-num>
<contract-sponsor id="cn001">National Institutes of Health<named-content content-type="fundref-id">10.13039/100000002</named-content>
</contract-sponsor>
<counts>
<fig-count count="4"/>
<table-count count="0"/>
<equation-count count="0"/>
<ref-count count="69"/>
<page-count count="18"/>
<word-count count="10314"/>
</counts>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<title>Introduction</title>
<p>Bacteriophages are a major component of the human microbiome, with saliva, for example, containing 10<sup>8</sup> virus-like particles per milliliter (<xref ref-type="bibr" rid="B46">Pride et&#xa0;al., 2012</xref>), and stool containing up to 10<sup>9</sup> virus-like particles per gram (<xref ref-type="bibr" rid="B51">Reyes et&#xa0;al., 2012</xref>). Viruses are also frequently encountered as prophages, with an estimated ~60% of sequenced bacterial genomes predicted to encode at least one integrated phage genetic element (<xref ref-type="bibr" rid="B10">Casjens, 2003</xref>; <xref ref-type="bibr" rid="B18">Edwards and Rohwer, 2005</xref>). The degree to which these pervasive phage genetic elements modulate the abundance and phenotype of commensal microbiota and impact human health is currently unknown. Phages, for example, have been shown to promote pathogenicity in bacteria, confer antibiotic resistance to hosts, and transduce genes that alter host fitness (<xref ref-type="bibr" rid="B61">Waldor and Mekalanos, 1996</xref>; <xref ref-type="bibr" rid="B6">Br&#xfc;ssow et&#xa0;al., 2004</xref>; <xref ref-type="bibr" rid="B64">Willner et&#xa0;al., 2011</xref>; <xref ref-type="bibr" rid="B46">Pride et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B48">Quir&#xf3;s et&#xa0;al., 2014</xref>; <xref ref-type="bibr" rid="B42">Navarro and Muniesa, 2017</xref>). Furthermore, commensal phages have been correlated with various medical conditions such as type I diabetes, chronic infection, and inflammatory bowel disease (<xref ref-type="bibr" rid="B68">Zhao et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B32">&#x141;usiak-Szelachowska et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B54">Secor et&#xa0;al., 2017</xref>). Phages may therefore potentially have a significant impact on human health.</p>
<p>Despite the abundance of phages in human microbial habitats and their postulated impact on human health, we have a very limited understanding of phage ecology in the human body, in particular the identity of their hosts, their lifestyles, their spatial distribution, their temporal dynamics, and their potential role in mediating disease. Applying standard metagenomic approaches to address such questions is challenging in part because of the staggering genomic diversity that is a hallmark of viruses (<xref ref-type="bibr" rid="B44">Paez-Espino et&#xa0;al., 2016a</xref>) and the fundamental plasticity of viral genomes, making it difficult to target and precisely track in space and time specific phage families. Indeed, with few exceptions (<xref ref-type="bibr" rid="B56">Stern et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B34">Manrique et&#xa0;al., 2016</xref>), previous metagenomic studies have largely focused on the heterogeneity of human viromes (<xref ref-type="bibr" rid="B50">Reyes et&#xa0;al., 2010</xref>; <xref ref-type="bibr" rid="B38">Minot et&#xa0;al., 2011</xref>; <xref ref-type="bibr" rid="B46">Pride et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B51">Reyes et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B41">Moreno-Gallego et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B55">Shkoporov et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B23">Gregory et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B69">Zuo et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B20">Garmaeva et&#xa0;al., 2021</xref>). Conversely, traditional methods that are based on targeting universally conserved genes such as the small subunit ribosomal RNA (SSU rRNA) gene for mapping microbial diversity are not applicable to phages because there is no analogous universally conserved gene in viruses (<xref ref-type="bibr" rid="B52">Rohwer and Edwards, 2002</xref>; <xref ref-type="bibr" rid="B18">Edwards and Rohwer, 2005</xref>).</p>
<p>In this study we aimed to combine the benefits of metagenomic and targeted sequencing approaches to discover phage families that may be widely present in the human virome. We were motivated by the hypothesis that - in analogy to the SSU rRNA marker - there would be core phage families (whether lytic or lysogenic) that could be represented and identified by conserved marker sequences. If we could find such markers, then in analogy to phylogenetic profiling of SSU rRNA markers, we could use phylogenetic analysis to explore intra-family sequence diversity and track such members across different body habitats, different subjects, and different time points. In this context, we use the term &#x201c;family&#x201d; to informally denote phages that have a high degree of sequence similarity across a shared marker gene, and within each family, we use the term &#x201c;sublineage&#x201d; to denote members that are more phylogenetically similar based on their shared marker gene. As such, in our framework, we do not necessarily expect that members of the same phage family share homology or similarity across their entire genomes.</p>
<p>We chose to focus our search for phage markers on the large terminase (TerL) subunit, one of the most powerful molecular machines in nature (<xref ref-type="bibr" rid="B57">Sun et&#xa0;al., 2008</xref>), a component of the DNA packaging and cleaving mechanism present in numerous double stranded DNA (dsDNA) phages (<xref ref-type="bibr" rid="B49">Rao and Feiss, 2008</xref>) and considered to be an important signature of dsDNA phage genomes (<xref ref-type="bibr" rid="B10">Casjens, 2003</xref>). Typically, TerL genes of different phages exhibit little overall sequence similarity (<xref ref-type="bibr" rid="B19">Eppler et&#xa0;al., 1991</xref>; <xref ref-type="bibr" rid="B11">Chai et&#xa0;al., 1992</xref>; <xref ref-type="bibr" rid="B40">Moore and Prevelige, 2002</xref>; <xref ref-type="bibr" rid="B49">Rao and Feiss, 2008</xref>) and contain only a handful of conserved functional amino acid residues (<xref ref-type="bibr" rid="B49">Rao and Feiss, 2008</xref>). However, we previously found that in the case of termites, the hindgut microbiomes of numerous termite species from different parts of the globe shared a certain TerL gene family that was conserved across most amino acid residues enabling us to construct a universal phage marker for this family of phages in termites (<xref ref-type="bibr" rid="B58">Tadmor et&#xa0;al., 2011</xref>). Therefore, while the TerL gene in and of itself is not universally conserved and therefore cannot serve as a general purpose universal marker for phages, our finding raised the possibility that other TerL gene families may exist in other species that are conserved and widely shared across members of those species, including humans. Adopting this marker-based approach to the human virome, we were indeed able to identify a set of unrelated TerL-based phage families that are ubiquitously shared across humans. Within each family, phylogenetic analysis enabled us to map with high resolution sublineages across different subjects, body habitats and time points (for an overview of our methodology see <xref ref-type="supplementary-material" rid="SM1">
<bold>Figure S1</bold>
</xref>).</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<title>Materials and methods</title>
<sec id="s2_1">
<title>Sample collection</title>
<p>Samples from nine orally healthy adults were kindly donated to us by Bik et&#xa0;al. who had collected these samples through a collaboration with a dentist and in accordance to the Stanford IRB protocols (<xref ref-type="bibr" rid="B5">Bik et&#xa0;al., 2010</xref>). For each subject, oral biofilm samples were collected from six oral sites using sterile curettes. These oral sites include the tongue ventral, tongue dorsum, buccal mucosa, sub-gingiva, supra-gingiva, and the hard palate. Upon collection, the samples were deposited in PBS buffer. For the viral fraction experiments, additional tongue dorsum samples were collected from a tenth subject that refrained from brushing their teeth or tongue for a minimum of 8 hours prior to sample collection to allow for a substantial buildup of plaque on the tongue dorsum. The samples were collected wearing gloves with a tongue scraper and deposited into a sterile collection tube. Exclusion criteria included: antibiotic use in the preceding three months, active cavities, or gum disease. Sample collection and processing protocols were approved by Caltech Institutional Review Board (IRB protocol 14-0430) and Institutional Biosafety Committee (IBC protocol 13-198).</p>
</sec>
<sec id="s2_2">
<title>Datasets analyzed</title>
<p>All metagenomes and viromes analyzed in this study were assembled by the original authors providing those datasets. Apart from the selection pressure analysis, which was performed on nucleotide sequences, analysis was performed on amino acid alignments. The following datasets and databases were analyzed in our study:</p>
<list list-type="simple">
<list-item>
<p>(1) The Mira dataset (<xref ref-type="bibr" rid="B4">Belda-Ferre et&#xa0;al., 2012</xref>) comprising six metagenomes corresponding to supragingival dental plaque collected from six patients in Spain and divided into three categories based on the number of caries per individual: two individuals who never developed caries in their lives (metagenomes M<sub>HA</sub>, M<sub>HB</sub>), two individuals who had been regularly treated for caries in the past and had a low number of active caries (1 and 4) at the time of sampling of sampling (metagenomes M<sub>PCA</sub>, M<sub>PCB</sub>), and two individuals who had a high number of active caries (8 and 15) and poor oral hygiene (metagenomes M<sub>AA</sub>, M<sub>AB</sub>). In all cases, plaque material from all teeth surfaces was pooled avoiding active cavities if present, and for each of the above six conditions a single metagenome was generated. The mean and median length of contigs in these metagenomes were 336 &#xb1; 167 nt (s.d.) and 409 nt, respectively. The mean genome size was 87.7 Mbases. Assembled translated metagenomes can be found on MG-RAST (<xref ref-type="bibr" rid="B21">Glass et&#xa0;al., 2010</xref>) with the following IDs: 4447192.3, 4447102.3, 4447103.3, 4447101.3, 4447943.3, 4447903.3.</p>
</list-item>
<list-item>
<p>(2) The Xie dataset (<xref ref-type="bibr" rid="B66">Xie et&#xa0;al., 2010</xref>) comprising a metagenome of supragingival and subgingival plaque collected and pooled from eight teeth of a caries-free and periodontally healthy individual from the United States. The mean and median length of contigs in this metagenome were 372 &#xb1; 126 nt s.d. and 411 nt, respectively. The genome size was 29.5 Mbases. The assembled translated metagenome can be found on MG-RAST with the ID 4446622.3.</p>
</list-item>
<list-item>
<p>(3) The HMP dataset (<xref ref-type="bibr" rid="B37">Meth&#xe9; et&#xa0;al., 2012</xref>) comprising contributions from 103 healthy individuals sampled from up to 15 body habitats, including: attached/keratinized gingiva, buccal mucosa, hard palate, palatine tonsils, saliva, subgingival plaque, supragingival plaque, throat, tongue dorsum, stool, anterior nares, posterior fornix, mid vagina, vaginal introitus, and the retroauricular crease. All subjects were subjugated to rigorous inclusion criteria to control for their health (<xref ref-type="bibr" rid="B1">Aagaard et&#xa0;al., 2013</xref>). 748 assembled metagenomes generated in Phase I of the HMP study were subjected to internal quality control assessment based on HMP study guidelines (<xref ref-type="bibr" rid="B37">Meth&#xe9; et&#xa0;al., 2012</xref>), remaining with 690 metagenomes that were used in the current analysis (<xref ref-type="supplementary-material" rid="SM1">
<bold>Table S8</bold>
</xref>). Metadata from the HMP cohort such as the Medical Record Number (MRN), collection site, visit number, and the replicate number were extracted as previously described (<xref ref-type="bibr" rid="B36">Markowitz et&#xa0;al., 2012</xref>). The mean and median length of contigs in HMP metagenomes passing HMP quality control were 582 &#xb1; 124 nt (s.d.) and 561 nt, respectively, and for oral metagenomes 529 &#xb1; 57 nt (s.d.) and 534 nt, respectively. The HMP metagenomes are available through the IMG/M database.</p>
</list-item>
<list-item>
<p>(4) The Pride dataset (<xref ref-type="bibr" rid="B46">Pride et&#xa0;al., 2012</xref>) comprising of viromes extracted from saliva samples of five subjects sampled at day 1, day 30 and day 60 or 90. Subjects were healthy and had not taken antibiotics for at least one year prior to donating samples. All subjects had good oral health based on rigorous inclusion criteria (<xref ref-type="bibr" rid="B46">Pride et&#xa0;al., 2012</xref>). The mean and median length of contigs in these metagenome were 328 &#xb1; 44 nt (s.d.) and 349 nt, respectively. Assembled translated metagenomes can be found on MG-RAST with the following IDs: 4445735.3, 4446121.3, 4445731.3, 4445728.3, 4446126.3, 4446075.3, 4445734.3, 4445729.3, 4446125.3, 4446124.3, 4445730.3, 4446122.3, 4446120.3, 4445737.3, and 4445736.3.</p>
</list-item>
<list-item>
<p>(5) The MetagenomesOnline (MgOl) portal (<xref ref-type="bibr" rid="B65">Wommack et&#xa0;al., 2012</xref>) hosted on the VIROME platform comprising 270 metagenomic libraries, including a large number of viromes. Environmental viromes in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1H</bold>
</xref> were selected to match the following filtering criteria: Genesis=natural, Environmental package=all excluding host-associated viromes, and considering only viromes of DNA viruses, resulting in 109 viromes. The mean and median length of contigs in these viromes were 377 &#xb1; 70 nt (s.d.) and 362 nt (range 319 &#x2013; 1362 nt), respectively.</p>
</list-item>
<list-item>
<p>(6) The Human Oral Microbiome Database (HOMD) dataset (<xref ref-type="bibr" rid="B12">Chen et&#xa0;al., 2010</xref>) comprising genomes of oral bacteria sequenced either as part of the HOMD project or as part of other sequencing projects, including the HMP study.</p>
</list-item>
<list-item>
<p>(7) NCBI&#x2019;s non-redundant (nr) protein database, comprising all non-redundant GenBank CDS translations, the protein data bank (PDB), SwissProt, the Protein Information Resource (PIR) database and the Protein Research Foundation (PRF) database, excluding environmental samples from WGS projects.</p>
</list-item>
<list-item>
<p>(8) The IMG/M database (<xref ref-type="bibr" rid="B13">Chen et&#xa0;al., 2018</xref>) comprising at the time of analysis 16338 bacterial and archaeal isolates, 475 viral isolates, and 1335 environmental metagenomes. Environmental metagenomes in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1G</bold>
</xref> were selected as follows: for each environmental &#x2018;family&#x2019; class a maximum of 50 metagenomes were randomly selected, limiting metagenomes to 5 GB due to the downloading limitation of the IMG platform, resulting in 448 metagenomes. Of these, we retained only metagenomes with constructed protein databases and excluded metatranscriptomes. In order for our comparison between HMP oral metagenomes and environmental metagenomes to be unbiased, we further controlled for the average contig length and the total genome size. To control for the average contig length we selected only environmental metagenomes whose average contig length exceeded the minimal contig length of assembled HMP metagenomes (300 bp) (<xref ref-type="bibr" rid="B37">Meth&#xe9; et&#xa0;al., 2012</xref>). To control for the genome size, we excluded environmental metagenomes whose genome size was below the minimal genome size of HMP oral metagenomes. Applying these selection criteria resulted in 233 environmental metagenomes analyzed in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1G</bold>
</xref>.</p>
</list-item>
<list-item>
<p>(9) The IMG/VR database (<xref ref-type="bibr" rid="B45">Paez-Espino et&#xa0;al., 2016b</xref>) (IMG_VR_2018-07-01_4) comprising at the time of analysis viral contigs from 3663 metagenomes available on IMG satisfying the constraint &#x201c;Ecosystem phylum =Environmental&#x201d;.</p>
</list-item>
<list-item>
<p>(10) NCBI&#x2019;s env_nr database containing nearly 10 million proteins sequences from whole genome sequencing (WGS) metagenomic projects.</p>
</list-item>
</list>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Prevalence of the TerL phage families in the human oral cavity and in natural environments. <bold>(A)</bold> Percent identity between the TerL markers and PCR-amplified TerL sequences obtained from the tongue dorsum, subgingival plaque and supragingival plaque of three orally healthy subjects (unless otherwise stated, percent identities in this study were calculated based on amino acid alignments). The heat map indicates the maximum percent identity across all PCR-amplified sequences. Striped cells indicate that the expected PCR band was present but sequencing failed. <bold>(B)</bold> Oral habitats analyzed by targeted sequencing. <bold>(C)</bold> Percent identity between the TerL markers and PCR-amplified TerL sequences across the oral habitats indicated in (B). Crossed out cells correspond to samples that were unavailable for testing. <bold>(D)</bold> Presence of the TerL phage families in salivary viromes obtained from five periodontally healthy subjects over a 60- to 90-day period (<xref ref-type="bibr" rid="B46">Pride et&#xa0;al., 2012</xref>). Heat map applies to panels d-h and shows the maximum percent identity across all BLAST alignments exceeding a predetermined optimal alignment length threshold (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supporting Text S4</bold>
</xref>). <bold>(E)</bold> Prevalence of the TerL phage families across 90 subjects based on 382 HMP oral metagenomes regardless of collection site, visit number, or replicate. <bold>(F)</bold> Prevalence of the TerL phage families across 206 HMP oral metagenomes corresponding to three oral habitats, taking into account one metagenome per subject. <bold>(G)</bold> Prevalence of the TerL phage families across 233 metagenomes from natural environments. AL, asphalt lakes; OR, oil reservoir. <bold>(H)</bold> Prevalence of the TerL phage families across 109 viromes of DNA viruses from natural environments.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frmbi-01-1097124-g001.tif"/>
</fig>
</sec>
<sec id="s2_3">
<title>DNA extraction</title>
<p>DNA extraction was performed on each sample using the MoBio PowerBiofilm<sup>&#xae;</sup> DNA Isolation Kit, which uses a DNA extraction and purification protocol optimized for biofilms. It combines the benefits of a chemical lysis treatment with the physical forces applied during a bead-beating process. Disposable lab coats and face masks were worn at all times.</p>
<sec id="s2_3_1">
<title>Degenerate primer design</title>
<p>Degenerate primers for the TerL markers were designed based on sequences obtained from the HMP dataset, the Xie dataset, the Mira dataset and HOMD as follows: candidate 3&#x2019; positions for primers were chosen when possible at positions achieving a bit score of at least 3.5 when RPS-BLASTing the amino acid sequence of the given TerL marker against the Conserved Domains Database (CDD) (<xref ref-type="bibr" rid="B35">Marchler-Bauer et&#xa0;al., 2016</xref>). Primers were then selected in regions spanned by all datasets, requiring that the percent identity of the majority consensus amino acid residue, when equally weighted across all datasets, was at least 90% while limiting the degeneracy of each primer to 64 fold. Primer sequences were then designed using the CODEHOP algorithm (<xref ref-type="bibr" rid="B53">Rose et&#xa0;al., 1998</xref>), with the core region maximally degenerate based on the genetic code, and the consensus clamp region chosen to match the codon bias present in the alignments. Primer nucleotide sequences were optimized to have a GC clamp at the 3&#x2019; end, minimize homodimers, heterodimers and hairpins, and have a melting temperature of 60&#xb0;C. Degenerate primer sequences and targeted conserved amino acid motifs are provided in <xref ref-type="supplementary-material" rid="SM1">
<bold>Table S3</bold>
</xref>.</p>
</sec>
</sec>
<sec id="s2_4">
<title>PCR preparation</title>
<p>PCR reactions using the degenerate primers described above were performed in a laminar flowhood. Each PCR reaction contained 10.5 &#xb5;L of RT-PCR Grade Water (Ambion<sup>&#xae;</sup>), 1 &#xb5;L of extracted DNA at 1 ng/&#xb5;L, a premix containing AccuStart&#x2122; Taq DNA polymerase, dNTPs, and MgCl<sub>2</sub>, and 0.5 &#xb5;L of reverse and 0.5 &#xb5;L of forward primers (at 50 ng/&#xb5;L). A higher than recommended concentration was used since the primes are 32-64 fold degenerate. For MiSeq sequencing, primers were barcoded using error-detecting barcodes (appended onto the forward primer sequence) and synthesized by IDT (<xref ref-type="bibr" rid="B25">Hamady et&#xa0;al., 2008</xref>). For each extraction protocol, we performed three negative controls that instead of biofilm sample contained RT-PCR Grade Water (Ambion<sup>&#xae;</sup>), free of any DNAase and RNAse. These three extraction controls along with five no template controls were used during each PCR session to ensure there is no contamination being introduced during either process. Disposable lab coats and face masks were worn at all times. After each session all surfaces were cleaned with DNA AWAY&#x2122; and 95% ethanol. The flowhood interior surfaces and the equipment inside were exposed to UV for one hour at the end of each session. The following PCR thermocycling protocol was used in accordance to PerfeCTa qPCR SuperMix recommendations: 1) 10-minute activation of AccuStart&#x2122; Taq DNA polymerase at 95&#xb0;C, 2) 10 seconds of DNA denaturation at 95&#xb0;C, 3) 20 seconds of annealing at 60&#xb0;C, 4) 30 seconds of extension at 72&#xb0;C, 40 cycles repeating steps 2 to 4, followed by 5 minutes of final extension at 72&#xb0;C.</p>
</sec>
<sec id="s2_5">
<title>Gel electrophoresis and PCR cleanup</title>
<p>2% agarose in TAE buffer was used to cast the gels. 5 &#xb5;L of PCR reaction was mixed with 1 &#xb5;L of 6X loading dye and set to run for 30&#xa0;min at 100V. PCR products were purified using the QIAquick PCR Purification Kit from QIAGEN in accordance to their manual.</p>
</sec>
<sec id="s2_6">
<title>Sequencing and sequence analysis</title>
<p>Double-stranded DNA concentration in PCR-purified products was measured and standardized using the Qubit instrument. Sequences amplified for the AB2, HB2 and PCA1 markers were sent for Sanger sequencing following the IDT standard protocol. Sequences amplified for the HB1, HA and PCA2 markers were sent for MiSeq sequencing. Because each sample for MiSeq sequencing was barcoded during the PCR reaction, the samples were mixed into one vial and sent to GENEWIZ, Inc for library preparation and Illumina MiSeq sequencing (2 &#xd7; 300bp Paired-End sequencing). <italic>join_paired_ends.py</italic> script from the Quantitative Insights Into Microbial Ecology (QIIME) package (<xref ref-type="bibr" rid="B9">Caporaso et&#xa0;al., 2010</xref>) was used to join paired-end reads. We then performed several quality control steps to eliminate any sequences that arose due to sequencing error. Paired reads that had any mismatches across their overlapping bases were eliminated. The overlap between the paired reads constituted the entire length of the sequence. Using an in-house script developed for this project, <italic>seqQualityFilters.py</italic>, we then eliminated sequences with any bases with Phred scores of 29 or below (excluded from this step were the first and last two bases which are generally associated with low Phred scores for all sequences). Using the same in-house script (i) sequences were assigned to their respective TerL markers based on their primer sequences; (ii) sequences with incorrect barcode lengths or incorrect primer sequences were eliminated; (iii) the primer and barcode sequences were removed and the barcode sequences were written to a separate file for a later step; (iv) sequences with incorrect lengths were removed. <italic>split_libraries_fastq.py</italic> from QIIME was used to demultiplex the reads based on their barcode sequence, while further eliminating reads with any errors in their barcodes. MiSeq sequences analyzed in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1A</bold>
</xref> and <xref ref-type="supplementary-material" rid="SM1">
<bold>Table S5</bold>
</xref> were clustered using QIIME&#x2019;s <italic>pick_otus.py</italic> script, based on their sequence similarity into operational taxonomic units (OTUs) (<xref ref-type="bibr" rid="B17">Edgar, 2010</xref>) using an OTU cutoff of 95% for HA and PCA2, and 98% for HB1.</p>
</sec>
<sec id="s2_7">
<title>Viral fraction protocol</title>
<p>To test if oral phages carrying close homologs of HB1 are lytic we tested the bacterial and viral fractions derived from an oral sample for the presence of the HB1 marker. Saliva samples were defrosted from storage at -20&#xb0;C. Samples and an extraction control were vortexed for 2 minutes at half-speed, followed by centrifugation at 8000g for 10 minutes. The supernatant was removed to a fresh tube and the pellet was resuspended in sterile filtered PBS. Supernatant and pellet were re-centrifuged (8000g, 5 minutes). 200&#xb5;L of the original supernatant (putative viral fraction, VF) were filtered through a PBS-rinsed 0.2&#xb5;m 13mm tuffryn filter. Original pellet (putative bacterial fraction, BF) was rinsed and resuspended 200&#xb5;L PBS. BF and VF, as well as extraction controls, were extracted according to standard protocol with PowerBiofilm DNA Isolation Kit (MoBio). TerL markers HB1 and HA were amplified as described above. Markers were amplified from 1 &#xb5;L template using 0.8 &#xb5;L of 10 micromolar forward and reverse primers, with PerfeCTa MasterMix. Marker HB1 was also amplified using 2 &#xb5;L of template and 0.8 &#xb5;L of 100&#xb5;M primers. PCR products were assayed for presence or absence on 2% agarose gel (<xref ref-type="supplementary-material" rid="SM1">
<bold>Figure S7</bold>
</xref>). Six replicates of the same VF extract were amplified to test for low-copy templates in the viral fraction.</p>
</sec>
<sec id="s2_8">
<title>Identifying shared TerL markers in the human oral cavity</title>
<sec id="s2_8_1">
<title>Identifying viral gene families in the Mira metagenomes</title>
<p>To identify TerL markers core to the human oral cavity we focused our analysis on the six plaque metagenomes from the Mira dataset reflecting human subjects with varying degrees of oral hygiene. We applied to each of these metagenomes a clustering algorithm called Metagenomic Clustering by Reference Library (MCRL) that was developed by the current authors (<xref ref-type="bibr" rid="B59">Tadmor and Phillips, 2022</xref>). Briefly, MCRL uses a reference library containing a set of reference sequences (in this case the viral RefSeq database v48 (<xref ref-type="bibr" rid="B47">Pruitt et&#xa0;al., 2007</xref>) containing ~97,000 viral genes) to initially identify and retain all reference sequences that have putative homologs in the given input metagenome. MCRL then proceeds to apply an iterative greedy clustering algorithm to the list of retained reference sequences and, upon convergence, reports the subset of reference sequences that are homologous to minimally overlapping sets of contigs in the metagenome. Thus, the final output of MCRL is the list of reference sequences with putative homologs in the input metagenome that have minimally overlapping &#x201c;signatures&#x201d; in the metagenome. A &#x201c;signature&#x201d; of a reference sequence in a metagenome is the list of contigs in the metagenome yielding an E value below 0.001 when BLASTing the amino acid sequence of reference sequence against the translated metagenome. Reference sequences reported by MCRL therefore reflect potential different and unrelated gene families present in the metagenome.</p>
<p>To maximize detection sensitivity, we applied MCRL using its default parameters and a &#x201c;stringent overlap&#x201d; condition wherein two reference sequences are determined to overlap if their signatures overlap by more than 50% regardless of the reference sequence. In terms of sensitivity, we have previously shown that when using a stringent overlap condition MCRL achieves a sensitivity of at least 95% for detecting TerL gene families that exhibit up to 30% divergence compared to the viral RefSeq database, and overall has better sensitivity compared to conventional metagenomic clustering methods (<xref ref-type="bibr" rid="B59">Tadmor and Phillips, 2022</xref>). A detailed discussion of MCRL&#x2019;s default parameters, robustness to changes in parameters or presence of noise, and a benchmark comparing MCRL to standard metagenomic clustering methods in terms of sensitivity and accuracy is provided in (<xref ref-type="bibr" rid="B59">Tadmor and Phillips, 2022</xref>). When applying MCRL to each of the six Mira metagenomes, analyzing in total 1.75 &#xd7; 10<sup>6</sup> translated contigs, MCRL reported in total 7411 viral RefSeq genes (as depicted in <xref ref-type="supplementary-material" rid="SM1">
<bold>Figure S2A</bold>
</xref>).</p>
</sec>
</sec>
<sec id="s2_9">
<title>Screening for shared TerL sequences</title>
<p>To enrich for TerL candidates with significant putative homologs in the metagenomes and to remove spurious solutions, we retained from the list of 7411 viral RefSeq genes reported by MCRL a total of 76 reference genes encoding TerL genes (based on the RefSeq annotation provided by MCRL) that have a signature size of 5 or higher and that share at least 10% identical amino acid residues when aligned against their representative contig (the representative contig of a reference sequence is the contig yielding the lowest E value when BLASTing that reference sequence against the metagenome).</p>
<p>To enrich for closely related TerL lineages that are potentially shared across humans we BLASTed the amino acid sequence of the representative contig corresponding to each of the 76 homologous TerL RefSeq genes identified by MCRL in the Mira dataset against the translated oral metagenome from the Xie study - an oral metagenome of a healthy individual from a different continent participating in an independent study &#x2013; and retained only candidates that yielded at least 75% identity at the amino acid level. A 75% identity threshold was empirically motivated based on our previous experimental results in the termite hindgut system (<xref ref-type="bibr" rid="B58">Tadmor et&#xa0;al., 2011</xref>) where we found that the universally shared TerL lineage in this ecosystem exhibited 81.1% &#xb1; 7.8 identity at the amino acid level across different termite species. Indeed, this threshold was justified in retrospect given that the diversity of HMP metagenomic sequences closely related to the markers was captured using a 70% identity threshold at the amino acid level, as shown in <xref ref-type="supplementary-material" rid="SM1">
<bold>Figure S9</bold>
</xref> and discussed in <xref ref-type="supplementary-material" rid="SM1">
<bold>Supporting Text S8</bold>
</xref>. This final filtering step left us with 11 TerL gene fragments (<xref ref-type="supplementary-material" rid="SM1">
<bold>Table S2</bold>
</xref>). We then BLASTed all 11 TerL gene fragments against each other at the protein level and removed redundant sequences, leaving us with seven non-homologous independent candidates for shared TerL markers (<xref ref-type="supplementary-material" rid="SM1">
<bold>Table S3</bold>
</xref>).</p>
</sec>
<sec id="s2_10">
<title>Obtaining full-length TerL markers</title>
<p>Since the metagenomes used to obtain the TerL marker candidates have relatively short contigs (with a mean contig length of 336 nt), the seven candidate TerL markers identified in the Mira dataset span only a fragment of the TerL gene length, which spans on average 1650 nt. To obtain shared TerL markers that span the entire length of a TerL gene we collected and aligned for each of the seven TerL candidate markers closely related amino acid sequences from the Xie, Mira, HOMD and the HMP datasets yielding at least 70% identity at the amino acid level. For each of the seven alignments we then selected the sequence that maximized the average percent identity across all other sequences (applying equal weights to each database), penalizing shorter sequences by setting the alignment score in positions containing gaps to 0. In this manner, we identified for each of the seven TerL candidates a closely related sequence spanning the entire length of the TerL gene. Contigs carrying the full-length TerL genes are listed in <xref ref-type="supplementary-material" rid="SM1">
<bold>Table S3</bold>
</xref> and annotation for these contigs is provided in <xref ref-type="supplementary-material" rid="SM1">
<bold>Figure S5</bold>
</xref>.</p>
</sec>
<sec id="s2_11">
<title>BLAST alignments</title>
<p>All BLAST analyses were performed locally using blastp v2.2.22+ with default settings on amino acid alignments. Alignment thresholds are discussed in <xref ref-type="supplementary-material" rid="SM1">
<bold>Supporting Text S4</bold>
</xref> and <xref ref-type="supplementary-material" rid="SM1">
<bold>S8</bold>
</xref>.</p>
</sec>
<sec id="s2_12">
<title>Collection of TerL marker homologs present in bacterial and phage isolates</title>
<p>To exhaustively identify all close homologs of the TerL markers in bacterial and phage isolates, each of the seven TerL markers were BLASTed against all available genomes on the IMG platform, NCBI&#x2019;s non-redundant (nr) protein database, and the HOMD database. For our phylogenetic analysis we included all TerL sequences that yielded at least 70% identity at the amino acid level across at least 90% of the TerL marker length, remaining with approximately 2300 hits (<xref ref-type="supplementary-material" rid="SM1">
<bold>Table S10</bold>
</xref>).</p>
</sec>
<sec id="s2_13">
<title>Determining health-related status of isolates</title>
<p>Each isolate harboring a close homolog of a TerL marker was assigned a &#x201c;health-related status&#x201d; to reflect its pathogenicity or potential association with disease. The decision regarding the health-related status was determined as follows: when information about the pathogenicity of the isolate or details about the bacterium&#x2019;s isolation were provided in IMG annotation or in annotation from another public database this information was used to determine the health-related status of the isolate. When public annotation was not available or not sufficiently detailed, original publications describing the isolation of the bacterium were sought. When the information provided in the original publication was not sufficiently detailed, the original authors were consulted. Based on the above information, the health-related status isolates was assigned to one of the following categories: &#x201c;P&#x201d;=the bacterial isolate/strain was designated as a pathogen by the author and/or the bacterium was isolated from a sick individual with a diagnosed disease or from a diseased organ, a diseased body site, a sterile body site, or a diseased animal. Sterile body sites include, for example, blood, cerebral spinal fluid, lymph nodes, peritoneal fluid, synovial fluid, and internal organs. &#x201c;C&#x201d;=the bacterial isolate was designated as a carriage strain by the author. &#x201c;H&#x201d;=the bacterial isolate/strain is not considered to be pathogenic by the author and/or was isolated from a healthy subject, healthy tissue or a healthy animal. When the required information was insufficient or unavailable to determine the health-related status of the isolate, the health-related status was designated &#x201c;n.a.&#x201d;. In case of phage isolates, the health-related status pertains to the bacterium strain from which the phage was induced. The health-related status for all isolates is provided in <xref ref-type="supplementary-material" rid="SM1">
<bold>Table S10</bold>
</xref> along with appropriate references.</p>
</sec>
<sec id="s2_14">
<title>Phylogenetic analysis</title>
<p>Phylogenetic analysis was performed on translated TerL sequences obtained from all 690 HMP metagenomes passing HMP quality control criteria as well as all bacterial and phage isolates harboring close homologs of the markers listed in <xref ref-type="supplementary-material" rid="SM1">
<bold>Table S10</bold>
</xref>, taking one representative per OTU as described below (OTU assignment for all isolates is provided in <xref ref-type="supplementary-material" rid="SM1">
<bold>Table S10</bold>
</xref>). Phylogenetic analysis was performed based on sequence alignments spanning at least 400 amino acids and yielding at least 70% identity at the amino acid level compared to the TerL markers, resulting in alignments spanning on average 69.2% of the TerL gene length (range: 62.7% to 88.9%). In the case of human bacterial isolates, one representative strain was selected per species per body region and per given health-related status, using a 3% OTU threshold at the amino acid level with alignments spanning at least 98% of the TerL marker length. For non-human bacterial isolates, one representative strain per species was selected. Translated nucleotide sequences were then aligned with MUSCLE (<xref ref-type="bibr" rid="B16">Edgar, 2004</xref>) in MEGA (<xref ref-type="bibr" rid="B60">Tamura et&#xa0;al., 2013</xref>). The optimal amino acid substitution model was estimated with ProtTest3.4 (<xref ref-type="bibr" rid="B14">Darriba et&#xa0;al., 2011</xref>) using the AIC criterion allowing for 48 model combinations permitted in SplitsTree4 (<xref ref-type="bibr" rid="B26">Huson and Bryant, 2006</xref>) with +G and +I options (amino acid frequencies are hard-coded in SplitsTree4). Models tested include: WAG (<xref ref-type="bibr" rid="B63">Whelan and Goldman, 2001</xref>), JTT (<xref ref-type="bibr" rid="B28">Jones et&#xa0;al., 1992</xref>), mtREV (<xref ref-type="bibr" rid="B2">Adachi and Hasegawa, 1996</xref>), mtMam (<xref ref-type="bibr" rid="B8">Cao et&#xa0;al., 1998</xref>), Dayhoff (<xref ref-type="bibr" rid="B15">Dayhoff and Schwartz, 1978</xref>), CpREV (<xref ref-type="bibr" rid="B3">Adachi et&#xa0;al., 2000</xref>). Optimal model-averaged parameters using Akaike weights were estimated with ProtTest3.4 for the shape parameter of the gamma distribution (&#x3b1;), and the proportion of invariant sites (Pinv). Neighbor-Net networks were estimated with SplitsTree4 (<xref ref-type="bibr" rid="B26">Huson and Bryant, 2006</xref>) based on amino acid sequence alignments using maximum likelihood distances estimated with optimal model-averaged parameters.</p>
</sec>
<sec id="s2_15">
<title>Selection pressure analysis</title>
<p>Selection pressure analysis was performed using codeml codon models included in the PAML package (<xref ref-type="bibr" rid="B67">Yang, 2007</xref>). Sequence alignments were generated using Geneious global alignment with free end gaps with default gap open and gap extension penalties, using an identity cost matrix (<xref ref-type="bibr" rid="B29">Kearse et&#xa0;al., 2012</xref>). Phylogenetic trees were created using SeaView GTR model with default parameters (<xref ref-type="bibr" rid="B22">Gouy et&#xa0;al., 2009</xref>). We tested NSsite models with different number of site classes: M0 (one site class with constant &#x3c9;, where &#x3c9; = dN/dS), M1a (two site classes: &#x3c9;=1, &#x3c9;&lt;1) and M2a (three site classes: &#x3c9;=1, &#x3c9;&lt;1, &#x3c9;&gt;1). The CodonFreq parameter was set to F3x4. Models M0 and M1a were compared against each other as were M1a and M2a. The models were compared using the likelihood ratio test and the statistical significance of the outcome was determined based on the chi-squared distribution (<xref ref-type="bibr" rid="B67">Yang, 2007</xref>).</p>
</sec>
</sec>
<sec id="s3">
<title>Results and discussion</title>
<sec id="s3_1">
<title>Hunting for shared phage families in the human oral virome</title>
<p>The habitat we chose to begin our search for ubiquitous phage families in humans was the oral cavity due to its rich microbial diversity (<xref ref-type="bibr" rid="B27">Huttenhower et&#xa0;al., 2012</xref>), presence of many unique niches that can be explored, and its relevance to human health as a gateway to the human body (<xref ref-type="bibr" rid="B31">Li et&#xa0;al., 2000</xref>). The most straightforward way to find a TerL marker core to the human oral virome would be to perform a joint phylogenetic analysis of all TerL sequences across multiple oral metagenomes obtained from different individuals. Such an approach, however, is impractical due to the highly divergent nature of TerL sequences, the relatively short lengths of contigs, and limitations of metagenomic annotation (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supporting Text S1</bold>
</xref>). To circumvent these challenges we devised a method based on a combination of clustering and filtering steps. To this end, we applied a novel metagenomic clustering method that we developed that uses a reference library of annotated viral sequences to extract putative unrelated viral gene families from a metagenome (<xref ref-type="bibr" rid="B59">Tadmor and Phillips, 2022</xref>) (see Materials and Methods). This approach enabled us to examine the putative viral gene families present in six metagenomes of supragingival dental plaque samples obtained from six individuals from Spain with varying degrees of oral hygiene (<xref ref-type="bibr" rid="B4">Belda-Ferre et&#xa0;al., 2012</xref>), referred to as the Mira dataset (see <xref ref-type="supplementary-material" rid="SM1">
<bold>Figure S2A</bold>
</xref> and the Materials and Methods section for a summary of our search strategy). Analzying in total nearly two million contigs, our search algorithm identified an average of 1236 viral gene families per metagenome (<xref ref-type="supplementary-material" rid="SM1">
<bold>Table S1</bold>
</xref>), of which 76 encoded TerL genes (<xref ref-type="supplementary-material" rid="SM1">
<bold>Table S2</bold>
</xref>). Since our goal was to establish whether the majority of healthy humans share certain conserved phage markers, we narrowed the list of TerL candidates to those that were conserved across the majority of the TerL gene in at least two human subjects from two independent studies from different parts of the world. The second study we selected, which we refer to as the Xie dataset, was obtained from the oral cavity a healthy individual from the United States (<xref ref-type="bibr" rid="B66">Xie et&#xa0;al., 2010</xref>). This final screening step left us with seven non-homologous TerL gene fragments labeled <italic>H</italic>A, <italic>H</italic>B1, <italic>H</italic>B2, <italic>PC</italic>A2, <italic>PC</italic>A1, <italic>A</italic>B1, <italic>A</italic>B2, with the prefix corresponding to the oral health of the subject in which the marker discovered, indicating good (<italic>H</italic>), mediocre (<italic>PC</italic>), or poor (<italic>A</italic>) oral hygiene (<xref ref-type="supplementary-material" rid="SM1">
<bold>Figure S2</bold>
</xref>). Such a labeling scheme enabled us to correlate marker prevalence with oral hygiene (see below). Lastly, each TerL gene fragment was swapped with a closely related homologous full-length TerL sequence, using the Human Oral Microbiome Database (HOMD) (<xref ref-type="bibr" rid="B12">Chen et&#xa0;al., 2010</xref>) and the Human Microbiome Project (HMP) dataset (<xref ref-type="bibr" rid="B37">Meth&#xe9; et&#xa0;al., 2012</xref>) to expand our sequence search space to include full length sequences (see Material and Methods for objective search strategy). The HMP dataset was excluded from the step of identifying shared phage markers in order to avoid introduction of biases in subsequent analyses of this dataset.</p>
<p>Our full-length phage markers corresponded to HK97-associated COG4626/pfam03354 Terminase_1 (HA, HB1, HB2, PCA2, AB1), and SPP1-associated COG1783/pfam04466 Terminase_3 (PCA1), with AB2 not corresponding to any known pfam/COG (<xref ref-type="supplementary-material" rid="SM1">
<bold>Table S3</bold>
</xref>). These results were consistent with phylogenetic analysis of the TerL markers in the broader context of TerL genes observed in nature (<xref ref-type="supplementary-material" rid="SM1">
<bold>Figure S4</bold>
</xref>). The seven full-length TerL marker genes we obtained represent unrelated lineages since any pair of TerL markers exhibited little or no sequence similarity at the amino acid level (<xref ref-type="supplementary-material" rid="SM1">
<bold>Table S4</bold>
</xref>), as is typically the case for TerL genes. Going back to the Mira study, we BLASTed the full-length TerL markers against the six oral metagenomes and found that apart from PCA2, all markers achieved alignments exceeding 70% identity at the amino acid level in 3 to 5 of the six subjects, confirming the shared presence of these markers in this small cohort (<xref ref-type="supplementary-material" rid="SM1">
<bold>Figure S2B</bold>
</xref>).</p>
</sec>
<sec id="s3_2">
<title>Experimental validation of phage families derived bioinformatically from metagenomic datasets</title>
<p>To confirm that our bioinformatically-derived TerL-based phage families can also be verified experimentally we tested for the presence of TerL markers in oral samples collected from orally healthy subjects using targeted sequencing. Using amino acid alignments from multiple public datasets we designed degenerate primers (<xref ref-type="bibr" rid="B53">Rose et&#xa0;al., 1998</xref>) targeting conserved amino acid motifs (<xref ref-type="supplementary-material" rid="SM1">
<bold>Table S3, Figure S3</bold>
</xref>). Sequencing the resulting PCR products, we were indeed able to experimentally identify the presence of all but one (AB1) of the phage families in at least two of the three tested individuals (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1A</bold>
</xref>, <xref ref-type="supplementary-material" rid="SM1">
<bold>Table S5</bold>
</xref>). Using the same targeted sequencing approach we then tested for the presence of three of the phage families (HB1, HA, and PCA2) across six oral habitats collected from nine additional subjects (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1B</bold>
</xref>). We found all three phage families in this cohort were robustly present in the oral cavity (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1C</bold>
</xref>). In a companion paper we discuss in greater depth TerL sequence diversity obtained by targeted sequencing, including HB1 sequences obtained from 61 individuals across three continents (<xref ref-type="bibr" rid="B33">Mahmoudabadi et&#xa0;al., 2019</xref>).</p>
</sec>
<sec id="s3_3">
<title>Evidence for the functionality of sequences retrieved by the phage markers</title>
<p>Although whole community metagenomes provide a snapshot into both lytic and lysogenic phage families, it has the drawback that it does not provide direct evidence that the sequences we recover are part of functional phages. However, several indirect lines of evidence suggest that the shared TerL lineages we identified encode functional genes associated with genuine phage elements. First, we confirmed that the original contigs encoding the TerL markers harbored larger phage-like elements (<xref ref-type="supplementary-material" rid="SM1">
<bold>Figure S5</bold>
</xref>), and that close homologs of most of the markers can be found in extended prophage-like elements (<xref ref-type="supplementary-material" rid="SM1">
<bold>Figure S6, Supporting Text S2</bold>
</xref>), helping to rule out non-genuine phage elements such as gene transfer agents (GTAs) and bacteriocins (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supporting Text S3</bold>
</xref>). Second, we confirmed that sequences retrieved using the markers or primers were under substantial negative selection (<xref ref-type="supplementary-material" rid="SM1">
<bold>Table S6</bold>
</xref>), lacked premature stop codons or frameshift mutations and functional signatures typical of TerL genes were strictly conserved in these sequences (see <xref ref-type="supplementary-material" rid="SM1">
<bold>Figure S3</bold>
</xref> for alignments and <xref ref-type="supplementary-material" rid="SM1">
<bold>Table S7</bold>
</xref> for a summary of conserved functional signatures). Finally, we showed that the markers can be detected in virus-like particles (VLPs) using a fourth metagenomic dataset comprising 15 salivary viromes obtained from five periodontally healthy human subjects (<xref ref-type="bibr" rid="B46">Pride et&#xa0;al., 2012</xref>) (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1D</bold>
</xref>). In the case of HB1, we further experimentally verified these results by showing that this marker could be detected by PCR amplification in virus-like particles extracted from a tenth oral sample from our own cohort of oral samples (<xref ref-type="supplementary-material" rid="SM1">
<bold>Figure S7A</bold>
</xref>). Taken together, the evidence above suggests that, overall, TerL sequences retrieved using our markers encode functional genes that have either been active in recent evolutionary history and/or are part of a population of functional phages, and thus we speculate are not degenerating pseudogenes experiencing random drift (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supporting Text S3</bold>
</xref>).</p>
</sec>
<sec id="s3_4">
<title>Prevalence of the phage markers in the HMP oral metagenomes</title>
<p>We next explored the prevalence of these phage families within the HMP oral cohort, which comprises 90 subjects sampled from up to eight oral sites spanning in total 382 metagenomes (<xref ref-type="supplementary-material" rid="SM1">
<bold>Table S8</bold>
</xref>). We found that remarkably virtually all 90 subjects were positive for the HB1 phage family with at least 70% identity, and 76% of subjects were positive for the HB1 phage family with at least 95% identity (see <xref ref-type="fig" rid="f1">
<bold>Figures&#xa0;1E</bold>
</xref>, <xref ref-type="fig" rid="f2">
<bold>2A</bold>
</xref>, for alignment criteria see <xref ref-type="supplementary-material" rid="SM1">
<bold>Supporting Text S4</bold>
</xref>). Likewise, more than 85% of subjects were positive for the HA and PCA1 phage families with at least 70% identity at the amino acid level, and 72% and 63% of subjects were positive for the HA and PCA1 phage families, respectively, with at least 95% identity (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2A</bold>
</xref>). In addition, nearly all subjects were positive for any pair combination of HB1, HB2, HA and PCA1 (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2B</bold>
</xref>), however, presence of any specific pair of phage families was only weakly correlated (absolute Spearman&#x2019;s rank correlation &#x2264;0.24), consistent with these markers representing independent TerL phage families. Since all subjects participating in the HMP study were orally healthy, perhaps expectedly, we found that markers obtained from metagenomes of orally healthy subjects in the Mira dataset (HB1, HB2, HA) were more prevalent than markers obtained from metagenomes of subjects with oral health problems (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supporting Text S5</bold>
</xref>). Given the high prevalence of TerL phage families in the HMP, Mira, and Xie oral metagenomes, the salivary VLP metagenomes, and our own oral cohort interrogated by targeted sequencing suggests that these TerL phage families are ubiquitous in humans and contribute to a widely shared human virome. In <xref ref-type="supplementary-material" rid="SM1">
<bold>Supporting Text S2</bold>
</xref> we summarize the requirements we propose a ubiquitous viral marker should satisfy.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Prevalence of the TerL phage families across human habitats. <bold>(A)</bold> Percent of subjects that were positive for the TerL phage families in the oral cavity regardless of collection site, visit number, or replicate evaluated across 90 subjects based on 382 HMP oral metagenomes passing HMP quality control criteria. For all panels, a TerL phage family was considered present in a subject if the maximum percent identity of its TerL sequence across all BLAST alignments spanning at least 150 amino acids exceeded the indicated percent identity threshold. <bold>(B)</bold> Percent of subjects positive for any pair combination of TerL phage families. <bold>(C)</bold> Presence of TerL phage families across seven body habitats taking into account one metagenome per subject. Alignments in all panels were performed on amino acid sequences.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frmbi-01-1097124-g002.tif"/>
</fig>
</sec>
<sec id="s3_5">
<title>Prevalence of the phage families in natural environments</title>
<p>To check whether the TerL phage families that we identified are in fact specific to the human virome or also prevalent in natural environments, we compared the prevalence of the TerL phage families across three oral habitats (206 metagenomes) with their prevalence across 233 environmental metagenomes from the IMG/M database (<xref ref-type="bibr" rid="B13">Chen et&#xa0;al., 2018</xref>) collected from over 70 unique sites across 13 countries, selected to have comparable genome sizes (number of assembled coding contigs) and contig lengths to HMP oral metagenomes (<xref ref-type="supplementary-material" rid="SM1">
<bold>Table S9</bold>
</xref>). Our comparison indicates that members of the TerL phage families were mostly prevalent in human oral metagenomes and relatively sparse in environmental metagenomes, with most markers, except for HB1 and to a lesser extent HB2, displaying relatively remote homologs in a small subset of environmental metagenomes (<xref ref-type="fig" rid="f1">
<bold>Figures&#xa0;1F, G</bold>
</xref>). In <xref ref-type="supplementary-material" rid="SM1">
<bold>Supporting Text S6</bold>
</xref> we show that members of the HB1 and HB2 phage families appearing in environmental samples are phylogenetically distinct from their respective human-associated counterparts. To rule out potential sampling bias, we repeated this analysis in 3663 environmental metagenomes from the IMG/VR database spanning 35 distinct ecosystems (listed in <xref ref-type="supplementary-material" rid="SM1">
<bold>Table S9</bold>
</xref>), comprising in total nearly 20 million viral contigs (<xref ref-type="bibr" rid="B45">Paez-Espino et&#xa0;al., 2016b</xref>). Indeed, this analysis revealed a similar pattern of prevalence, as shown in <xref ref-type="supplementary-material" rid="SM1">
<bold>Figure S8</bold>
</xref>. An analysis of 109 environmental viromes (metagenomes of VLPs) deposited in the VIROME portal (<xref ref-type="bibr" rid="B65">Wommack et&#xa0;al., 2012</xref>) also revealed similar patterns (<xref ref-type="supplementary-material" rid="SM1">
<bold>Table S9</bold>
</xref> and <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1H</bold>
</xref>). In addition, we confirmed that the env_nr database did not contain more divergent homologs when using PSI-BLAST, and ruled out potential biases related to contig length, genome size, community complexity, read depth, method of assembly and sequencing technology (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supporting Text S7</bold>
</xref>). Lastly, we performed an exhaustive search for TerL phage families in bacterial and viral genomes deposited in the IMG/M and non-redundant (nr) protein databases (<xref ref-type="bibr" rid="B47">Pruitt et&#xa0;al., 2007</xref>). Consistent with our analyses of whole community and VLP metagenomes, we found that except for six genomes originating from environmental bacteria that were positive for HB1, and two genomes positive for HB2 isolated from sewage and industrial environments, all remaining ~2300 genomes were obtained from bacteria isolated from human, animal, or insect (HB1) hosts (<xref ref-type="supplementary-material" rid="SM1">
<bold>Table S10</bold>
</xref>). These results agree with our previous finding and show that aside from HB1 and to a lesser extent HB2, the shared TerL phage families were quite specific to the viromes of humans and animals.</p>
</sec>
<sec id="s3_6">
<title>Distribution of the phage families across the human body</title>
<p>To elucidate the spatial distribution of the TerL phage families across the human body we mapped the presence of members of these families across seven body sites collected from 94 healthy individuals spanning 379 HMP metagenomes. Presence was determined based on a 70% identity threshold because this threshold captured the majority of phage family members (<xref ref-type="supplementary-material" rid="SM1">
<bold>Figure S9</bold>
</xref>), however, our findings did not depend on the applied percent identity threshold, as further discussed in <xref ref-type="supplementary-material" rid="SM1">
<bold>Supporting Text S8</bold>
</xref>.</p>
<p>We found that most TerL phage families (HA, PCA1, PCA2, AB1, AB2) were indeed prevalent in the oral cavity and generally absent from stool, the nasal cavity, the female urogenital (UG) tract, and skin, except for a mild presence of PCA1 in skin (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2C</bold>
</xref>). HB1 and HB2 phage families, however, were exceptional and were found to be widespread not only in the oral cavity, but also in a considerable fraction of stool samples (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2C</bold>
</xref>), with up to ~90% and ~60% of subjects containing HB1 and HB2 TerL phage families in stool samples, respectively, when controlling for genome size (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supporting Text S7</bold>
</xref>). To confirm the distribution of these phage families in stool samples, we tested for their presence in 14 metagenomic studies investigating stool samples obtained from heathy individuals included in the Gut Virome Database (GVD) (<xref ref-type="bibr" rid="B23">Gregory et&#xa0;al., 2020</xref>). We found the HB1 phage family in nearly all studies, including 11 viromes (metagenomes of VLPs), showing that HB1 was present in stool samples of individuals across four continents. With few exceptions, the remaining markers were either not detected in the gut studies, or present only as remote homologs, confirming the distribution we observed in the HMP metagenomes (<xref ref-type="supplementary-material" rid="SM1">
<bold>Table S11</bold>
</xref>). HB2 phage family was present in all three whole community studies, and to a lesser extent in viromes. The remaining phage families were largely absent from the gut studies, confirming the spatial patterns of distribution we had observed in the HMP metagenomes (<xref ref-type="supplementary-material" rid="SM1">
<bold>Table S11</bold>
</xref>).</p>
<p>We next contrasted our findings in the HMP dataset with the presence of the markers in bacteria and phages isolated from different human body habitats. To this end we exhaustively searched the IMG, HOMD and the non-redundant (nr) protein databases for close homologs of the markers, carefully determining for each isolate its health-related status, for example, was it isolated from a healthy human subject or a human subject diagnosed with a certain disease, was the isolate designated as a human pathogen, a carriage strain, or was the isolate obtained from a non-human host (see Materials and Methods for precise criteria and <xref ref-type="supplementary-material" rid="SM1">
<bold>Table S10</bold>
</xref> for a comprehensive list of isolates). Focusing on bacterial isolates obtained from healthy individuals, we indeed found that the HA and PCA1 phage families were present in oral and/or airway bacterial isolates from the <italic>Streptococcus</italic> genus, a genus known to be highly abundant in the oral cavity of healthy humans (<xref ref-type="bibr" rid="B27">Huttenhower et&#xa0;al., 2012</xref>). Likewise, AB2 was found in an oral bacterial isolate from the <italic>Actinomyces</italic> genus, a genus also known for its abundance in the oral cavity of healthy humans (<xref ref-type="bibr" rid="B27">Huttenhower et&#xa0;al., 2012</xref>) (phylogenetic placement of all bacterial hosts is summarized in <xref ref-type="supplementary-material" rid="SM1">
<bold>Table S12</bold>
</xref>). No oral bacterial isolates were found for PCA2, however, PCA2 was found in colon and gastric isolates, the latter suspected to be a swallowed oral bacterium caught in the act of transiting (see <xref ref-type="supplementary-material" rid="SM1">
<bold>Table S10</bold>
</xref> for further details).</p>
<p>Finally, members of the HB1 and HB2 phage families were found in multiple gut bacterial isolates from the widespread Firmicutes phylum (<xref ref-type="bibr" rid="B27">Huttenhower et&#xa0;al., 2012</xref>), in agreement with our metagenomic analysis. Interestingly, however, no bacterial isolate from the oral cavity or airways, including carriage and pathogenic strains, was found to contain even distant homologs of the HB1 marker despite the overwhelming abundance of HB1 in the oral cavity of healthy humans. One possible explanation for this intriguing result could be that in healthy humans, the HB1 phage family found in the oral cavity is predominately lytic, a prediction that we were able to experimentally confirm, as we further discuss below. Despite the high prevalence of HB1 and HB2 phage families in stool samples, they were not related to the crAss-like phage family (<xref ref-type="bibr" rid="B24">Guerin et&#xa0;al., 2018</xref>), a recently identified widespread family of phages in gut viromes.</p>
</sec>
<sec id="s3_7">
<title>Phylogenetic analysis of TerL phage families</title>
<p>Thus far our attention has been focused on the prevalence of each phage family. However, within each family, members display incredible inter-and intra-subject sequence diversity (<xref ref-type="supplementary-material" rid="SM1">
<bold>Tables S5, S13</bold>
</xref>). To better characterize this sequence diversity, we wished to understand whether each phage family was comprised of a single indivisible TerL lineage, or, conversely, multiple distinct TerL sublineages, in which case we aimed to determine how different body sites were associated with different sublineages. For our marker-based phylogenetic analysis we chose to use phylogenetic networks (<xref ref-type="bibr" rid="B7">Bryant and Moulton, 2004</xref>; <xref ref-type="bibr" rid="B26">Huson and Bryant, 2006</xref>) to account for possible viral recombination events, events which cannot be represented by phylogenetic trees (<xref ref-type="bibr" rid="B30">Lemey et&#xa0;al., 2009</xref>).</p>
<p>A phylogenetic analysis of the HB1 TerL phage family revealed that it is comprised of three main sublineages: (i) a sublineage consisting primarily of gut metagenomic sequences and gut bacterial isolates (the &#x201c;GI clade&#x201d; in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3A</bold>
</xref>), (ii) a sublineage consisting nearly exclusively of oral metagenomic sequences and completely devoid of bacterial isolates (the &#x201c;oral clade&#x201d; in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3A</bold>
</xref>), and (iii) a sublineage consisting primarily of environmental sequences (the &#x201c;Environmental clade&#x201d; in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3A</bold>
</xref> and <xref ref-type="supplementary-material" rid="SM1">
<bold>Supporting Text S6</bold>
</xref>). The phylogenetic distinction between gut and oral sequences was supported with 98% bootstrap support by a maximum likelihood phylogenetic tree after removing potentially recombinant sequences (<xref ref-type="supplementary-material" rid="SM1">
<bold>Figure S10</bold>
</xref>). The finding that metagenomic HB1 gut-derived sequences grouped with 16 human-associated bacterial isolates from the gut is consistent with the notion that the human gut is generally dominated by phages exhibiting a lysogenic lifestyle (<xref ref-type="bibr" rid="B50">Reyes et&#xa0;al., 2010</xref>; <xref ref-type="bibr" rid="B51">Reyes et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B43">Ogilvie and Jones, 2015</xref>). In contrast, the oral HB1 clade was devoid of bacterial isolates and grouped with the lytic <italic>Lactococcus lactis</italic> phage 1706, further supporting our prediction that oral phages positive for the HB1 marker should be predominately lytic. To further explore this hypothesis, we filtered oral samples obtained from an orally healthy subject through a 0.2 <italic>&#x3bc;m</italic> pore size filter and performed multiple PCRs on the bacterial and the viral fractions. We were unable to amplify HB1 from any of the PCRs performed on the bacterial fraction, however, we were able to amplify HB1 from the majority of samples corresponding to viral fractions (<xref ref-type="supplementary-material" rid="SM1">
<bold>Figure S7A</bold>
</xref>). When the same experiment was performed on the HA marker, the opposite result was obtained: we could amplify HA from all samples originating from the bacterial fraction, yet we could not amplify HA from any of the samples originating from the viral fraction (<xref ref-type="supplementary-material" rid="SM1">
<bold>Figure S7B</bold>
</xref>). These experiments support our hypothesis that the HB1 phage family in the oral cavity is likely predominately lytic.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Phylogenetic analysis of TerL phage families. Neighbor-Net analysis for <bold>(A)</bold> HB1, <bold>(B)</bold> HB2 and <bold>(C)</bold> AB2 phage families based on 386, 341, and 350 unambiguous amino acid residues, respectively including sequences obtained from the HMP metagenomes (circular nodes) and sequenced bacterial and phage isolates (square nodes). Pathogenic bacteria, bacteria isolated from diseased body sites, sterile organs, individuals with diagnosed diseases or diseased animals are marked with an asterisk, otherwise &#x201c;&#xd7;&#x201d; denotes suspected pathogenicity, &#x201c;c&#x201d; denotes a carriage strain, and &#x201c;na&#x201d; denotes unknown health-related status. Bacterial isolates belonging to the same species, sampled from the same body region (mouth, skin, nose, the gastrointestinal (GI) tract or the UG tract), and with the same health-related status were consolidated using a 3% OTU threshold at the amino acid level (OTU assignment for all isolates is provided in <xref ref-type="supplementary-material" rid="SM1">
<bold>Table S10</bold>
</xref>). <italic>n</italic> denotes the number of HMP subjects contributing sequences to a given clade color coded by the body habitat indicated in the legend, <italic>P</italic> denotes the total number of disease or carriage associated human bacterial isolates within a &#x201c;P&#x201d; clade out of all isolates in the given clade. In the &#x201c;P&#x201d; clade of HB2, <italic>i</italic> denotes the total number of human bacterial isolates represented by the given OTU (shown for <italic>I &#x2265;</italic> 10). If unstated, bacterial isolates were obtained from humans. See Materials and Methods for precise inclusion criteria of sequences. Neighbor-Net networks were calculated with SplitsTree4 (<xref ref-type="bibr" rid="B26">Huson and Bryant, 2006</xref>). Phylogenetic analysis of HB1, HB2 and AB2 was based on 176, 139 and 57 sequences, respectively, using optimal models determined by the AIC criterion (WAG+I+G) with optimal &#x3b1; and Pinv parameters. BAL, bronchoalveolar lavage; STR, sterile body site.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frmbi-01-1097124-g003.tif"/>
</fig>
</sec>
<sec id="s3_8">
<title>Spatial distribution of phage family sublineages</title>
<p>Our phylogenetic analysis further revealed TerL sublineages that displayed remarkable specificity to certain oral habitats. For example, the oral clade of the HB1 phage family contained distinct sublineages uniquely associated with the tongue dorsum, and different sublineages that were uniquely associated with supragingival plaque (<xref ref-type="supplementary-material" rid="SM1">
<bold>Figure S11A</bold>
</xref>). The HB2 phage family followed a similar oral/gut organization as HB1 (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3B</bold>
</xref>), and like HB1 also displayed sublineages uniquely associated with either the tongue dorsum or supragingival plaque. Similar site-specific sublineages were found for the AB2, HA, and PCA1 phage families (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3C</bold>
</xref> and <xref ref-type="supplementary-material" rid="SM1">
<bold>Figures S11B</bold>
</xref>, <xref ref-type="supplementary-material" rid="SM1">
<bold>S12A</bold>
</xref>, respectively). Such exclusive associations between certain TerL phage sublineages and specific oral habitats suggests that proximal habitats within the oral cavity can comprise unique phage communities that remain localized despite constant contact between these habitats mediated by the tongue and saliva. These findings are in line with the site-specialist worldview of the oral cavity microbiome where most microbes in the mouth are found in specific oral habitats (<xref ref-type="bibr" rid="B62">Welch et&#xa0;al., 2019</xref>). However, most phage families also contained sublineages obtained from a mixture of oral habitats (denoted as &#x201c;M&#x201d; clades, highlighted in green in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3B</bold>
</xref> and <xref ref-type="supplementary-material" rid="SM1">
<bold>Figures S11</bold>
</xref>, <xref ref-type="supplementary-material" rid="SM1">
<bold>S12</bold>
</xref>), possibly an indication that the bacterial hosts of these specific phage family members colonize multiple oral habitats, a hypothesis we further explore below.</p>
</sec>
<sec id="s3_9">
<title>Phage family sublineages potentially associated with pathogenicity</title>
<p>Interestingly, most phage families contained certain clades that were not found in the HMP study. These clades, denoted as &#x201c;P&#x201d; clades, are highlighted in yellow in the phylogenetic networks (<xref ref-type="fig" rid="f3">
<bold>Figures&#xa0;3B, C</bold>
</xref> and <xref ref-type="supplementary-material" rid="SM1">
<bold>Figures S11B</bold>
</xref>, <xref ref-type="supplementary-material" rid="SM1">
<bold>S12</bold>
</xref>). The absence of HMP metagenomic sequences from &#x201c;P&#x201d; clades was statistically significant (<xref ref-type="supplementary-material" rid="SM1">
<bold>Table S14</bold>
</xref>), and confirmed by targeted sequencing in our own cohort of oral samples (see below). This observation can possibly be explained by the fact that the vast majority of human-associated bacterial isolates in &#x201c;P&#x201d; clades were either pathogens, were isolated from diseased body sites, were isolated from individuals with a diagnosed disease, or were carriage strains, as indicated in <xref ref-type="supplementary-material" rid="SM1">
<bold>Table S14</bold>
</xref>, whereas the subjects participating in the HMP study and in our cohort were healthy (all bacterial isolates belonging to &#x201c;P&#x201d; clades are highlighted in <xref ref-type="supplementary-material" rid="SM1">
<bold>Table S10</bold>
</xref>). Since &#x201c;P&#x201d; clades were absent in healthy individuals, &#x201c;P&#x201d; clades could possibly serve as specific biomarkers for detection of potential pathogens in humans. Another intriguing feature of &#x201c;P&#x201d; clades was the presence of bacteria isolated from animals (HP2, HA, PCA1), potentially revealing a phage signature of animal-to-human transmission. For example, the &#x201c;P&#x201d; clade of HB2 (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3B</bold>
</xref>) contains a mixture of human pathogens, carriage strains and sequences isolated from animals, including <italic>Streptococcus suis sv.</italic> JS14 and <italic>Streptococcus porcinus Jelinkova</italic> 176, two human pathogens originally isolated from pigs (<xref ref-type="supplementary-material" rid="SM1">
<bold>Table S10</bold>
</xref>).</p>
</sec>
<sec id="s3_10">
<title>Phylogenetic analysis of PCR-amplified sequences supports metagenomic results</title>
<p>To independently confirm phylogenies that were based on HMP metagenomic sequences, we also inferred phylogenies based on PCR-amplified TerL sequences together with HMP metagenomic sequences. In <xref ref-type="supplementary-material" rid="SM1">
<bold>Supporting Text S9</bold>
</xref> we show that PCR-amplified alleles obtained from specific oral sites for HB1, HB2, HA, PCA1, PCA2 and AB2 were generally intermixed and indistinguishable from metagenomic alleles obtained from the same body sites. Our analysis also showed that none of the PCR-amplified TerL sequences mapped to &#x201c;P&#x201d; clades, further supporting our observation that heathy subjects did not contribute TerL alleles to &#x201c;P&#x201d; clades. These results show that our metagenomic-based phylogenetic inferences could be confirmed by targeted sequencing, indicating that the phylogenic patterns we observed in metagenomic datasets were not a result of sequencing or assembly artifacts.</p>
</sec>
<sec id="s3_11">
<title>Temporal stability of phage families</title>
<p>Finally, to explore the temporal dynamics of phage families we estimated their persistence across specific body habitats in subjects sampled between two consecutive visits, separated on average by 219 &#xb1; 69 (s.d.) days (<xref ref-type="bibr" rid="B27">Huttenhower et&#xa0;al., 2012</xref>). We quantified this persistence by measuring the fraction of subjects for which a phage family was detected in the first visit but was absent in the second visit, or vice versa, denoted by <italic>f<sub>switch</sub>
</italic> (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>). We found that presence of most families (HB1, HB2, PCA1, and AB1) was stable in the oral cavity (<italic>f<sub>switch</sub>
</italic>=0), with HB1 and HB2 also stable in the gut (<italic>f<sub>switch</sub>
</italic> &#x2264; 0.08). Indeed, members of a phage family that were present in both visits often had identical amino acid sequences (<xref ref-type="supplementary-material" rid="SM1">
<bold>Figure S13</bold>
</xref>), consistent with previous studies that showed that salivary and fecal viromes are genetically stable (<xref ref-type="bibr" rid="B50">Reyes et&#xa0;al., 2010</xref>; <xref ref-type="bibr" rid="B46">Pride et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B39">Minot et&#xa0;al., 2013</xref>; <xref ref-type="bibr" rid="B43">Ogilvie and Jones, 2015</xref>; <xref ref-type="bibr" rid="B55">Shkoporov et&#xa0;al., 2019</xref>). However, when considering specific oral habitats, most families exhibited considerable temporal variability, with variability highest in the buccal mucosa (<italic>f<sub>switch</sub>
</italic> = 0.36 &#xb1; 0.06, omitting AB1). One possible explanation for habitat variability could be host migration within the oral cavity. For example, the fact that buccal mucosa-derived sequences typically mapped to &#x201c;M&#x201d; clades (clades containing a mixture of sequences from different oral habitats) may indicate that the buccal mucosa contains bacterial hosts that can colonize multiple oral habitats that possibly migrate between different compartments (see examples for potential host migration events in <xref ref-type="supplementary-material" rid="SM1">
<bold>Figure S13</bold>
</xref>).</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Temporal stability of phage families. Presence of a phage family was determined in two consecutive visits of the same subject considering one metagenome per habitat. In the case of the oral cavity, oral habitats were considered both separately and as a single ecosystem (top row). In the latter case, presence was required in any oral habitat, and absence was required for all oral habitats. A phage family was considered present if any alignment against the corresponding TerL marker sequence spanning at least 150 aa exceeded 70% identity at the amino acid level (see <xref ref-type="supplementary-material" rid="SM1">
<bold>Supporting Text S4</bold>
</xref> for optimal alignment length criteria for the HMP metagenomes). Reducing the percent identity threshold to 55% did not have a significant impact on results. To minimize potential coverage bias, a marker was determined to be absent if no alignment spanning a minimum of 75 aa exceeded 40% identity, allowing us to also rule out remote homologs and homologs on short contigs. Blue lines denote unchanged state (presence in both visits or absence in both visits). Red lines denote a change (presence in visit 1 and absence in visit 2, or vice versa). Line widths are proportional to the fraction of subjects that share the same transition. <italic>n</italic> denotes the total number of subjects. Diagrams for habitats for which a marker was found to be always absent were omitted.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frmbi-01-1097124-g004.tif"/>
</fig>
</sec>
</sec>
<sec id="s4" sec-type="conclusions">
<title>Conclusions</title>
<p>Much like classical SSU rRNA studies, we found that by focusing our analysis on TerL markers we were able to identify certain TerL phage families that were both conserved and widely shared across the human oral microbiome. This finding is intriguing in light of the tremendous genetic diversity of viruses in nature (<xref ref-type="bibr" rid="B18">Edwards and Rohwer, 2005</xref>; <xref ref-type="bibr" rid="B44">Paez-Espino et&#xa0;al., 2016a</xref>), the lack of conservation of the TerL gene (<xref ref-type="bibr" rid="B19">Eppler et&#xa0;al., 1991</xref>; <xref ref-type="bibr" rid="B11">Chai et&#xa0;al., 1992</xref>; <xref ref-type="bibr" rid="B40">Moore and Prevelige, 2002</xref>; <xref ref-type="bibr" rid="B49">Rao and Feiss, 2008</xref>), and the individualized nature of human viromes established by previous studies (<xref ref-type="bibr" rid="B50">Reyes et&#xa0;al., 2010</xref>; <xref ref-type="bibr" rid="B38">Minot et&#xa0;al., 2011</xref>; <xref ref-type="bibr" rid="B46">Pride et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B51">Reyes et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B55">Shkoporov et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B41">Moreno-Gallego et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B23">Gregory et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B69">Zuo et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B20">Garmaeva et&#xa0;al., 2021</xref>). Overall, the shared TerL lineages we identified accounted for, on average, about 25% of all nonredundant TerL gene families (<xref ref-type="supplementary-material" rid="SM1">
<bold>Table S15</bold>
</xref>), adding to the growing body of evidence of the existence of widely shared members of the human virome (<xref ref-type="bibr" rid="B56">Stern et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B34">Manrique et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B41">Moreno-Gallego et&#xa0;al., 2019</xref>).</p>
<p>Although our marker-based approach provided a relatively narrow genomic window into the core human virome, focusing on a single gene enabled us to perform a comparative analysis of this gene across different subjects, different habitats and different time points. Furthermore, our markers, through the use of primers that we developed, enable sequence diversity analysis that is independent of metagenome sequencing. It would therefore be interesting to complement this study with single cell sequencing and genome assembly approaches, which could help shed light on the covariation between different phage families and their bacterial hosts across different body habitats. Furthermore, our analysis focused only on shared phage families within the oral cavity, however, our approach can be extended to other sites to create a comprehensive atlas of shared TerL phage families across the entire human body. More broadly, the fact that we have identified to date phage families with shared TerL lineages in both humans and termites (<xref ref-type="bibr" rid="B58">Tadmor et&#xa0;al., 2011</xref>) suggests that phage families with shared TerL lineages across species of organisms may be a common theme in the animal kingdom. Consequently, a comprehensive catalog of ubiquitous TerL phage families could potentially be expanded to encompass other organisms, possibly serving as a useful means for classifying and cataloging recurrent viral diversity core to different organisms.</p>
</sec>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>Experimental sequences used in the current study are available at: <uri xlink:href="https://github.com/gitamahm/human_virome">https://github.com/gitamahm/human_virome</uri>.</p>
</sec>
<sec id="s6" sec-type="ethics-statement">
<title>Ethics statement</title>
<p>The human samples collected in this study followed Caltech Institutional Review Board IRB protocol 14-0430 and Institutional Biosafety Committee IBC protocol 13-198 with subjects providing written consent. Additional human samples analyzed in this study were provided to us by Bik et&#xa0;al. [The ISME journal 4, 962 (2010)] and were collected in accordance to Stanford IRB protocols.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>AT and RP conceived the study, AT devised and performed the bioinformatic analysis as well as designed the degenerate primers for the markers, GM designed and executed experiments and performed the selection pressure analysis, HF and GM performed the experiments testing bacterial and viral fractions of oral samples, GM and AT performed data analysis related to experiments, and RP scientifically oversaw the project and advised. The paper was written by AT and critically reviewed and edited by all authors. All authors contributed to the article and approved the submitted version.</p>
</sec>
</body>
<back>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>This work was supported by the NIH Director&#x2019;s Pioneer Award, the NIH&#x2019;s Eureka grant no. R01-GM098465, and the National Science Foundation Graduate Research Fellowship Program (GRFP).</p>
</sec>
<ack>
<title>Acknowledgments</title>
<p>We wish to thank S. R. Quake, D. A. Relman, and P. C. Blainey for their initial advice on the project and D. A. Relman for donating to us oral samples, E. M. Rubin, N. C. Kyrpides, V. M. Markowitz, T. B. K. Reddy and H. Huot-Creasy, A. Clum, and N. Ivanova for providing support with the IMG and HMP datasets, E. Allen-Vercoe, D. W. Verner-Jeffreys, C. Michel, N. J. Croucher, M. Kilian, M. J. Loessner, L. Ikryannikova, J. Izard, M. Hilty, M. Sizova, P. Glaser, M. R. Davies, O. L. Franco, M. J. Wolin, M. Gottschalk and S. Moineau for providing information regarding bacterial isolates, and D. H. Huson and D. Bryant for providing support for SplitsTree4. We further wish to thank J. Boedicker, F. Weinert, and K. Homyk for their involvement in the initial experimental verification of the markers, and A. Debnath for assisting with initial investigations. The IMG/VR sequence data were produced by the US Department of Energy Joint Genome Institute (<uri xlink:href="https://www.jgi.doe.gov/">https://www.jgi.doe.gov/</uri>) in collaboration with the user community and was authorized to use in this study.</p>
</ack>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s11" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/frmbi.2022.1097124/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/frmbi.2022.1097124/full#supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet_1.pdf" id="SM1" mimetype="application/pdf"/>
<supplementary-material xlink:href="DataSheet_2.zip" id="SM2" mimetype="application/zip"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Aagaard</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Petrosino</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Keitel</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Watson</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Katancik</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Garcia</surname> <given-names>N.</given-names>
</name>
<etal/>
</person-group>. (<year>2013</year>). <article-title>The human microbiome project strategy for comprehensive sampling of the human microbiome and why it matters</article-title>. <source>FASEB J.</source> <volume>27</volume>, <fpage>1012</fpage>&#x2013;<lpage>1022</lpage>. doi: <pub-id pub-id-type="doi">10.1096/fj.12-220806</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Adachi</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Hasegawa</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>1996</year>). <article-title>Model of amino acid substitution in proteins encoded by mitochondrial DNA</article-title>. <source>J. Mol. Evol.</source> <volume>42</volume>, <fpage>459</fpage>&#x2013;<lpage>468</lpage>. doi: <pub-id pub-id-type="doi">10.1007/BF02498640</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Adachi</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Waddell</surname> <given-names>P. J.</given-names>
</name>
<name>
<surname>Martin</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Hasegawa</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>Plastid genome phylogeny and a model of amino acid substitution for proteins encoded by chloroplast DNA</article-title>. <source>J. Mol. Evol.</source> <volume>50</volume>, <fpage>348</fpage>&#x2013;<lpage>358</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s002399910038</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Belda-Ferre</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Alcaraz</surname> <given-names>L. D.</given-names>
</name>
<name>
<surname>Cabrera-Rubio</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Romero</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Sim&#xf3;n-Soro</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Pignatelli</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>The oral metagenome in health and disease</article-title>. <source>ISME J.</source> <volume>6</volume>, <fpage>46</fpage>&#x2013;<lpage>56</lpage>. doi: <pub-id pub-id-type="doi">10.1038/ismej.2011.85</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bik</surname> <given-names>E. M.</given-names>
</name>
<name>
<surname>Long</surname> <given-names>C. D.</given-names>
</name>
<name>
<surname>Armitage</surname> <given-names>G. C.</given-names>
</name>
<name>
<surname>Loomer</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Emerson</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Mongodin</surname> <given-names>E. F.</given-names>
</name>
<etal/>
</person-group>. (<year>2010</year>). <article-title>Bacterial diversity in the oral cavity of 10 healthy individuals</article-title>. <source>ISME J.</source> <volume>4</volume>, <fpage>962</fpage>. doi: <pub-id pub-id-type="doi">10.1038/ismej.2010.30</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Br&#xfc;ssow</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Canchaya</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Hardt</surname> <given-names>W.-D.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Phages and the evolution of bacterial pathogens: From genomic rearrangements to lysogenic conversion</article-title>. <source>Microbiol. Mol. Biol. Rev.</source> <volume>68</volume>, <fpage>560</fpage>&#x2013;<lpage>602</lpage>. doi: <pub-id pub-id-type="doi">10.1128/MMBR.68.3.560-602.2004</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bryant</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Moulton</surname> <given-names>V.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Neighbor-net: An agglomerative method for the construction of phylogenetic networks</article-title>. <source>Mol. Biol. Evol.</source> <volume>21</volume>, <fpage>255</fpage>. doi: <pub-id pub-id-type="doi">10.1093/molbev/msh018</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Janke</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Waddell</surname> <given-names>P. J.</given-names>
</name>
<name>
<surname>Westerman</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Takenaka</surname> <given-names>O.</given-names>
</name>
<name>
<surname>Murata</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>1998</year>). <article-title>Conflict among individual mitochondrial proteins in resolving the phylogeny of eutherian orders</article-title>. <source>J. Mol. Evol.</source> <volume>47</volume>, <fpage>307</fpage>&#x2013;<lpage>322</lpage>. doi: <pub-id pub-id-type="doi">10.1007/PL00006389</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Caporaso</surname> <given-names>J. G.</given-names>
</name>
<name>
<surname>Kuczynski</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Stombaugh</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Bittinger</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Bushman</surname> <given-names>F. D.</given-names>
</name>
<name>
<surname>Costello</surname> <given-names>E. K.</given-names>
</name>
<etal/>
</person-group>. (<year>2010</year>). <article-title>QIIME allows analysis of high-throughput community sequencing data</article-title>. <source>Nat. Methods</source> <volume>7</volume>, <fpage>335</fpage>. doi: <pub-id pub-id-type="doi">10.1038/nmeth.f.303</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Casjens</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>Prophages and bacterial genomics: What have we learned so far</article-title>? <source>Mol. Microbiol.</source> <volume>49</volume>, <fpage>277</fpage>&#x2013;<lpage>300</lpage>. doi: <pub-id pub-id-type="doi">10.1046/j.1365-2958.2003.03580.x</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chai</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Bravo</surname> <given-names>A.</given-names>
</name>
<name>
<surname>L&#xfc;der</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Nedlin</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Trautner</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Alonso</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>1992</year>). <article-title>Molecular analysis of the bacillus subtilis bacteriophage SPP 1 region encompassing genes 1 to 6: The products of gene 1 and gene 2 are required for pac cleavage</article-title>. <source>J. Mol. Biol.</source> <volume>224</volume>, <fpage>87</fpage>&#x2013;<lpage>102</lpage>. doi: <pub-id pub-id-type="doi">10.1016/0022-2836(92)90578-8</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>W.-H.</given-names>
</name>
<name>
<surname>Izard</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Baranova</surname> <given-names>O. V.</given-names>
</name>
<name>
<surname>Lakshmanan</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Dewhirst</surname> <given-names>F. E.</given-names>
</name>
<etal/>
</person-group>. (<year>2010</year>). <article-title>The human oral microbiome database: A web accessible resource for investigating oral microbe taxonomic and genomic information</article-title>. <source>Database: J. Biol. Database Curation</source> <volume>2010</volume>, <elocation-id>baq013</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/database/baq013</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>I.-M. A.</given-names>
</name>
<name>
<surname>Gulyaeva</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Sinha</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Shkoporov</surname> <given-names>A. N.</given-names>
</name>
<name>
<surname>Clooney</surname> <given-names>A. G.</given-names>
</name>
<name>
<surname>Stockdale</surname> <given-names>S. R.</given-names>
</name>
<etal/>
</person-group>. (<year>2018</year>). <article-title>IMG/M v. 5.0: An integrated data management and comparative analysis system for microbial genomes and microbiomes</article-title>. <source>Nucleic Acids Res.</source> <volume>47</volume>, <fpage>D666</fpage>&#x2013;<lpage>D677</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nar/gky901</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Darriba</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Taboada</surname> <given-names>G. L.</given-names>
</name>
<name>
<surname>Doallo</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Posada</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>ProtTest 3: Fast selection of best-fit models of protein evolution</article-title>. <source>Bioinformatics</source> <volume>27</volume>, <fpage>1164</fpage>&#x2013;<lpage>1165</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btr088</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Dayhoff</surname> <given-names>M. O.</given-names>
</name>
<name>
<surname>Schwartz</surname> <given-names>R. M.</given-names>
</name>
</person-group> (<year>1978</year>). <source>In atlas of protein sequence and structure</source> (<publisher-loc>Washington DC</publisher-loc>: <publisher-name>Citeseer</publisher-name>), <volume>5</volume>(<issue>3</issue>), <page-range>345&#x2013;352</page-range>.</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Edgar</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>MUSCLE: A multiple sequence alignment method with reduced time and space complexity</article-title>. <source>BMC Bioinf.</source> <volume>5</volume>, <fpage>113</fpage>. doi: <pub-id pub-id-type="doi">10.1186/1471-2105-5-113</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Edgar</surname> <given-names>R. C.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Search and clustering orders of magnitude faster than BLAST</article-title>. <source>Bioinformatics</source> <volume>26</volume>, <fpage>2460</fpage>&#x2013;<lpage>2461</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btq461</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Edwards</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Rohwer</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Viral metagenomics</article-title>. <source>Nat. Rev. Microbiol.</source> <volume>3</volume>, <fpage>504</fpage>&#x2013;<lpage>510</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nrmicro1163</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Eppler</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Wyckoff</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Goates</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Parr</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Casjens</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>1991</year>). <article-title>Nucleotide sequence of the bacteriophage P22 genes required for DNA packaging</article-title>. <source>Virology</source> <volume>183</volume>, <fpage>519</fpage>&#x2013;<lpage>538</lpage>. doi: <pub-id pub-id-type="doi">10.1016/0042-6822(91)90981-G</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Garmaeva</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Gulyaeva</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Sinha</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Shkoporov</surname> <given-names>A. N.</given-names>
</name>
<name>
<surname>Clooney</surname> <given-names>A. G.</given-names>
</name>
<name>
<surname>Stockdale</surname> <given-names>S. R.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Stability of the human gut virome and effect of gluten-free diet</article-title>. <source>Cell Rep.</source> <volume>35</volume>, <fpage>109132</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.celrep.2021.109132</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Glass</surname> <given-names>E. M.</given-names>
</name>
<name>
<surname>Wilkening</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wilke</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Antonopoulos</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Meyer</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Using the metagenomics RAST server (MG-RAST) for analyzing shotgun metagenomes</article-title>. <source>Cold Spring Harbor Protoc.</source> <volume>2010</volume>, <fpage>prot5368</fpage>. doi: <pub-id pub-id-type="doi">10.1101/pdb.prot5368</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gouy</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Guindon</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Gascuel</surname> <given-names>O.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>SeaView version 4: A multiplatform graphical user interface for sequence alignment and phylogenetic tree building</article-title>. <source>Mol. Biol. Evol.</source> <volume>27</volume> (<issue>2</issue>), <fpage>221</fpage>&#x2013;<lpage>224</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/molbev/msp259</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gregory</surname> <given-names>A. C.</given-names>
</name>
<name>
<surname>Zablocki</surname> <given-names>O.</given-names>
</name>
<name>
<surname>Zayed</surname> <given-names>A. A.</given-names>
</name>
<name>
<surname>Howell</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Bolduc</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Sullivan</surname> <given-names>M. B.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>The gut virome database reveals age-dependent patterns of virome diversity in the human gut</article-title>. <source>Cell Host Microbe</source> <volume>28</volume>, <fpage>724</fpage>&#x2013;<lpage>740. e728</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.chom.2020.08.003</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guerin</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Shkoporov</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Stockdale</surname> <given-names>S. R.</given-names>
</name>
<name>
<surname>Clooney</surname> <given-names>A. G.</given-names>
</name>
<name>
<surname>Ryan</surname> <given-names>F.J.</given-names>
</name>
<name>
<surname>Sutton</surname> <given-names>T. D.</given-names>
</name>
<etal/>
</person-group>. (<year>2018</year>). <article-title>Biology and taxonomy of crAss-like bacteriophages, the most abundant virus in the human gut</article-title>. <source>Cell Host Microbe</source> <volume>24</volume>, <fpage>653</fpage>&#x2013;<lpage>664. e656</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.chom.2018.10.002</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hamady</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Walker</surname> <given-names>J. J.</given-names>
</name>
<name>
<surname>Harris</surname> <given-names>J. K.</given-names>
</name>
<name>
<surname>Gold</surname> <given-names>N. J.</given-names>
</name>
<name>
<surname>Knight</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Error-correcting barcoded primers for pyrosequencing hundreds of samples in multiplex</article-title>. <source>Nat. Methods</source> <volume>5</volume>, <fpage>235</fpage>. doi: <pub-id pub-id-type="doi">10.1038/nmeth.1184</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huson</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Bryant</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Application of phylogenetic networks in evolutionary studies</article-title>. <source>Mol. Biol. Evol.</source> <volume>23</volume>, <fpage>254</fpage>. doi: <pub-id pub-id-type="doi">10.1093/molbev/msj030</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huttenhower</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Gevers</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Knight</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Abubucker</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Badger</surname> <given-names>J. H.</given-names>
</name>
<name>
<surname>Chinwalla</surname> <given-names>A. T.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>Structure, function and diversity of the healthy human microbiome</article-title>. <source>Nature</source> <volume>486</volume>, <fpage>207</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nature11234</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="other">
<person-group person-group-type="author">
<name>
<surname>Jones</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Taylor</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Thornton</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>1992</year>) <volume>8</volume>, <fpage>275</fpage>&#x2013;<lpage>282</lpage>.</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kearse</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Moir</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Wilson</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Stones-Havas</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Cheung</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Sturrock</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>Geneious basic: An integrated and extendable desktop software platform for the organization and analysis of sequence data</article-title>. <source>Bioinformatics</source> <volume>28</volume>, <fpage>1647</fpage>&#x2013;<lpage>1649</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/bts199</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Lemey</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Salemi</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Vandamme</surname> <given-names>A. M.</given-names>
</name>
</person-group> (<year>2009</year>). <source>The phylogenetic handbook: A practical approach to phylogenetic analysis and hypothesis testing</source> (<publisher-loc>Cambridge</publisher-loc>: <publisher-name>Cambridge University Press</publisher-name>).</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Kolltveit</surname> <given-names>K. M.</given-names>
</name>
<name>
<surname>Tronstad</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Olsen</surname> <given-names>I.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>Systemic diseases caused by oral infection</article-title>. <source>Clin. Microbiol. Rev.</source> <volume>13</volume>, <fpage>547</fpage>&#x2013;<lpage>558</lpage>. doi: <pub-id pub-id-type="doi">10.1128/CMR.13.4.547</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>&#x141;usiak-Szelachowska</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Weber-D&#x105;browska</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Jo&#x144;czyk-Matysiak</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Wojciechowska</surname> <given-names>R.</given-names>
</name>
<name>
<surname>G&#xf3;rski</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Bacteriophages in the gastrointestinal tract and their implications</article-title>. <source>Gut Pathog.</source> <volume>9</volume>, <fpage>44</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s13099-017-0196-7</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mahmoudabadi</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Homyk</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Foley</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Catching</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Mahmoudabadi</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Cheung</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Human phageprints: A high-resolution exploration of oral phages reveals globally-distributed phage families with individual-specific and temporally-stable community compositions</article-title>. <source>BioRxiv</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.1101/516864</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Manrique</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Bolduc</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Walk</surname> <given-names>S. T.</given-names>
</name>
<name>
<surname>Oost der van</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Vos</surname> <given-names>W. M.</given-names>
</name>
<name>
<surname>Young</surname> <given-names>M. J.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Healthy human gut phageome</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>113</volume>, <fpage>10400</fpage>&#x2013;<lpage>10405</lpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.1601060113</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marchler-Bauer</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Bo</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>L.</given-names>
</name>
<name>
<surname>He</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Lanczycki</surname> <given-names>C. J.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2016</year>). <article-title>CDD/SPARCLE: Functional classification of proteins <italic>via</italic> subfamily domain architectures</article-title>. <source>Nucleic Acids Res.</source> <volume>45</volume>, <fpage>D200</fpage>&#x2013;<lpage>D203</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nar/gkw1129</pub-id>
</citation>
</ref> <ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Markowitz</surname> <given-names>V. M.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Chu</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Szeto</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Palaniappan</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Jacob</surname> <given-names>B.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>IMG/M-HMP: A metagenome comparative analysis system for the human microbiome project</article-title>. <source>PloS One</source> <volume>7</volume>, <fpage>1</fpage>&#x2013;<lpage>7</lpage>. doi: <pub-id pub-id-type="doi">10.1371/journal.pone.0040151</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meth&#xe9;</surname> <given-names>B. A.</given-names>
</name>
<name>
<surname>Nelson</surname> <given-names>K. E.</given-names>
</name>
<name>
<surname>Pop</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Creasy</surname> <given-names>H. H.</given-names>
</name>
<name>
<surname>Giglio</surname> <given-names>M. G.</given-names>
</name>
<name>
<surname>Huttenhower</surname> <given-names>C.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>A framework for human microbiome research</article-title>. <source>Nature</source> <volume>486</volume>, <fpage>215</fpage>&#x2013;<lpage>221</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nature11209</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Minot</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Sinha</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>H. S.</given-names>
</name>
<name>
<surname>Keilbaugh</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>G.D.</given-names>
</name>
<etal/>
</person-group>. (<year>2011</year>). <article-title>The human gut virome: Inter-individual variation and dynamic response to diet</article-title>. <source>Genome Res.</source> <volume>21</volume>, <fpage>1616</fpage>&#x2013;<lpage>1625</lpage>. doi: <pub-id pub-id-type="doi">10.1101/gr.122705.111</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Minot</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Bryson</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Chehoud</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>G. D.</given-names>
</name>
<name>
<surname>Lewis</surname> <given-names>J.  D.</given-names>
</name>
<name>
<surname>Bushman</surname> <given-names>F. D.</given-names>
</name>
<etal/>
</person-group>. (<year>2013</year>). <article-title>Rapid evolution of the human gut virome</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>110</volume>, <fpage>12450</fpage>&#x2013;<lpage>12455</lpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.1300833110</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Moore</surname> <given-names>S. D.</given-names>
</name>
<name>
<surname>Prevelige</surname> <given-names>P. E.</given-names>
<suffix>Jr.</suffix>
</name>
</person-group> (<year>2002</year>). <article-title>DNA Packaging: A new class of molecular motors</article-title>. <source>Curr. Biol.</source> <volume>12</volume>, <fpage>R96</fpage>&#x2013;<lpage>R98</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S0960-9822(02)00670-X</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Moreno-Gallego</surname> <given-names>J. L.</given-names>
</name>
<name>
<surname>Chou</surname> <given-names>S.-P.</given-names>
</name>
<name>
<surname>Rienzi Di</surname> <given-names>S. C.</given-names>
</name>
<name>
<surname>Goodrich</surname> <given-names>J. K.</given-names>
</name>
<name>
<surname>Spector</surname> <given-names>T. D.</given-names>
</name>
<name>
<surname>Bell</surname> <given-names>J. T.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Virome diversity correlates with intestinal microbiome diversity in adult monozygotic twins</article-title>. <source>Cell Host Microbe</source> <volume>25</volume>, <fpage>261</fpage>&#x2013;<lpage>272. e265</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.chom.2019.01.019</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Navarro</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Muniesa</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Phages in the human body</article-title>. <source>Front. Microbiol.</source> <volume>8</volume>, <elocation-id>566</elocation-id>. doi: <pub-id pub-id-type="doi">10.3389/fmicb.2017.00566</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ogilvie</surname> <given-names>L. A.</given-names>
</name>
<name>
<surname>Jones</surname> <given-names>B. V.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>The human gut virome: A multifaceted majority</article-title>. <source>Front. Microbiol.</source> <volume>6</volume>, <elocation-id>918</elocation-id>. doi: <pub-id pub-id-type="doi">10.3389/fmicb.2015.00918</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Paez-Espino</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>I.-M. A.</given-names>
</name>
<name>
<surname>Palaniappan</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Ratner</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Chu</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Szeto</surname> <given-names>E.</given-names>
</name>
<etal/>
</person-group>. (<year>2016</year>a). <article-title>Uncovering earth&#x2019;s virome</article-title>. <source>Nature</source> <volume>536</volume>, <fpage>425</fpage>. doi: <pub-id pub-id-type="doi">10.1038/nature19094</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Paez-Espino</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Pavlopoulos</surname> <given-names>G. A.</given-names>
</name>
<name>
<surname>Thomas</surname> <given-names>A. D.</given-names>
</name>
<name>
<surname>Huntemann</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Mikhailova</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2016</year>b). <article-title>IMG/VR: A database of cultured and uncultured DNA viruses and retroviruses</article-title>. <source>Nucleic Acids Res.</source> <volume>45</volume> (<issue>D1</issue>), <fpage>gkw1030</fpage>. doi: <pub-id pub-id-type="doi">10.1093/nar/gkw1030</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pride</surname> <given-names>D. T.</given-names>
</name>
<name>
<surname>Salzman</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Haynes</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Rohwer</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Davis-Long</surname> <given-names>C.</given-names>
</name>
<name>
<surname>White</surname> <given-names>R. A.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>Evidence of a robust resident bacteriophage population revealed through analysis of the human salivary virome</article-title>. <source>ISME J.</source> <volume>6</volume>, <fpage>915</fpage>&#x2013;<lpage>926</lpage>. doi: <pub-id pub-id-type="doi">10.1038/ismej.2011.169</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pruitt</surname> <given-names>K. D.</given-names>
</name>
<name>
<surname>Tatusova</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Maglott</surname> <given-names>D. R.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>NCBI reference sequences (RefSeq): A curated non-redundant sequence database of genomes, transcripts and proteins</article-title>. <source>Nucleic Acids Res.</source> <volume>35</volume>, <fpage>D61</fpage>&#x2013;<lpage>D65</lpage>. doi: <pub-id pub-id-type="doi">10.1093/nar/gkl842</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Quir&#xf3;s</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Colomer-Lluch</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Mart&#xed;nez-Castillo</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Mir&#xf3;</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Argente</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Jofre</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2014</year>). <article-title>Antibiotic resistance genes in the bacteriophage DNA fraction of human fecal samples</article-title>. <source>Antimicrob. Agents Chemother.</source> <volume>58</volume>, <fpage>606</fpage>&#x2013;<lpage>609</lpage>. doi: <pub-id pub-id-type="doi">10.1128/AAC.01684-13</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rao</surname> <given-names>V. B.</given-names>
</name>
<name>
<surname>Feiss</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>The bacteriophage DNA packaging motor</article-title>. <source>Annu. Rev. Genet.</source> <volume>42</volume>, <fpage>647</fpage>&#x2013;<lpage>681</lpage>. doi: <pub-id pub-id-type="doi">10.1146/annurev.genet.42.110807.091545</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reyes</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Haynes</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Hanson</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Angly</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Heath</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Rohwer</surname> <given-names>F.</given-names>
</name>
<etal/>
</person-group>. (<year>2010</year>). <article-title>Viruses in the faecal microbiota of monozygotic twins and their mothers</article-title>. <source>Nature</source> <volume>466</volume>, <fpage>334</fpage>&#x2013;<lpage>338</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nature09199</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reyes</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Semenkovich</surname> <given-names>N. P.</given-names>
</name>
<name>
<surname>Whiteson</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Rohwer</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Gordon</surname> <given-names>J. I.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Going viral: Next-generation sequencing applied to phage populations in the human gut</article-title>. <source>Nat. Rev. Microbiol.</source> <volume>10</volume>, <fpage>607</fpage>&#x2013;<lpage>617</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nrmicro2853</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rohwer</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Edwards</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>The phage proteomic tree: a genome-based taxonomy for phage</article-title>. <source>J. Bacteriol</source> <volume>184</volume>, <fpage>4529</fpage>&#x2013;<lpage>4535</lpage>. doi: <pub-id pub-id-type="doi">10.1128/JB.184.16.4529-4535.2002</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rose</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Schultz</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Henikoff</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Pietrokovski</surname> <given-names>S.</given-names>
</name>
<name>
<surname>McCallum</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Henikoff</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>1998</year>). <article-title>Consensus-degenerate hybrid oligonucleotide primers for amplification of distantly related sequences</article-title>. <source>Nucleic Acids Res.</source> <volume>26</volume>, <fpage>1628</fpage>. doi: <pub-id pub-id-type="doi">10.1093/nar/26.7.1628</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Secor</surname> <given-names>P. R.</given-names>
</name>
<name>
<surname>Michaels</surname> <given-names>L. A.</given-names>
</name>
<name>
<surname>Smigiel</surname> <given-names>K. S.</given-names>
</name>
<name>
<surname>Rohani</surname> <given-names>M. G.</given-names>
</name>
<name>
<surname>Jennings</surname> <given-names>L. K.</given-names>
</name>
<name>
<surname>Hisert</surname> <given-names>K. B.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>Filamentous bacteriophage produced by pseudomonas aeruginosa alters the inflammatory response and promotes noninvasive infection in vivo</article-title>. <source>Infection Immun.</source> <volume>85</volume>, <fpage>e00648</fpage>&#x2013;<lpage>e00616</lpage>. doi: <pub-id pub-id-type="doi">10.1128/IAI.00648-16</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shkoporov</surname> <given-names>A. N.</given-names>
</name>
<name>
<surname>Clooney</surname> <given-names>A. G.</given-names>
</name>
<name>
<surname>Sutton</surname> <given-names>T. D.</given-names>
</name>
<name>
<surname>Ryan</surname> <given-names>F. J.</given-names>
</name>
<name>
<surname>Daly</surname> <given-names>K. M.</given-names>
</name>
<name>
<surname>Nolan</surname> <given-names>J. A.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>The human gut virome is highly diverse, stable, and individual specific</article-title>. <source>Cell Host Microbe</source> <volume>26</volume>, <fpage>527</fpage>&#x2013;<lpage>541. e525</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.chom.2019.09.009</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stern</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Mick</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Tirosh</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Sagy</surname> <given-names>O.</given-names>
</name>
<name>
<surname>Sorek</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>CRISPR targeting reveals a reservoir of common phages associated with the human gut microbiome</article-title>. <source>Genome Res.</source> <volume>22</volume>, <fpage>1985</fpage>&#x2013;<lpage>1994</lpage>. doi: <pub-id pub-id-type="doi">10.1101/gr.138297.112</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Kondabagil</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Draper</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Alam</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Bowman</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Z.</given-names>
</name>
<etal/>
</person-group>. (<year>2008</year>). <article-title>The structure of the phage T4 DNA packaging motor suggests a mechanism dependent on electrostatic forces</article-title>. <source>Cell</source> <volume>135</volume>, <fpage>1251</fpage>&#x2013;<lpage>1262</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cell.2008.11.015</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tadmor</surname> <given-names>A. D.</given-names>
</name>
<name>
<surname>Ottesen</surname> <given-names>E. A.</given-names>
</name>
<name>
<surname>Leadbetter</surname> <given-names>J. R.</given-names>
</name>
<name>
<surname>Phillips</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Probing individual environmental bacteria for viruses by using microfluidic digital PCR</article-title>. <source>Science</source> <volume>333</volume>, <fpage>58</fpage>&#x2013;<lpage>62</lpage>. doi: <pub-id pub-id-type="doi">10.1126/science.1200758</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tadmor</surname> <given-names>A. D.</given-names>
</name>
<name>
<surname>Phillips</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>MCRL: Using a reference library to compress a metagenome into a non-redundant list of sequences, considering viruses as a case study</article-title>. <source>Bioinformatics</source> <volume>38</volume>, <fpage>631</fpage>&#x2013;<lpage>647</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btab703</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tamura</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Stecher</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Peterson</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Filipski</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Kumar</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>MEGA6: Molecular evolutionary genetics analysis version 6.0</article-title>. <source>Mol. Biol. Evol.</source> <volume>30</volume>, <fpage>2725</fpage>&#x2013;<lpage>2729</lpage>. doi: <pub-id pub-id-type="doi">10.1093/molbev/mst197</pub-id>
</citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Waldor</surname> <given-names>M. K.</given-names>
</name>
<name>
<surname>Mekalanos</surname> <given-names>J. J.</given-names>
</name>
</person-group> (<year>1996</year>). <article-title>Lysogenic conversion by a filamentous phage encoding cholera toxin</article-title>. <source>Science</source> <volume>272</volume>, <fpage>1910</fpage>&#x2013;<lpage>1914</lpage>. doi: <pub-id pub-id-type="doi">10.1126/science.272.5270.1910</pub-id>
</citation>
</ref>
<ref id="B62">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Welch</surname> <given-names>J. L. M.</given-names>
</name>
<name>
<surname>Dewhirst</surname> <given-names>F. E.</given-names>
</name>
<name>
<surname>Borisy</surname> <given-names>G. G.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Biogeography of the oral microbiome: The site-specialist hypothesis</article-title>. <source>Annu. Rev. Microbiol.</source> <volume>73</volume>, <fpage>335</fpage>&#x2013;<lpage>358</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1146/annurev-micro-090817-062503</pub-id>
</citation>
</ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Whelan</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Goldman</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>A general empirical model of protein evolution derived from multiple protein families using a maximum-likelihood approach</article-title>. <source>Mol. Biol. Evol.</source> <volume>18</volume>, <fpage>691</fpage>&#x2013;<lpage>699</lpage>. doi: <pub-id pub-id-type="doi">10.1093/oxfordjournals.molbev.a003851</pub-id>
</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Willner</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Furlan</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Schmieder</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Grasis</surname> <given-names>J. A.</given-names>
</name>
<name>
<surname>Pride</surname> <given-names>D. T.</given-names>
</name>
<name>
<surname>Relman</surname> <given-names>D. A.</given-names>
</name>
<etal/>
</person-group>. (<year>2011</year>). <article-title>Metagenomic detection of phage-encoded platelet-binding factors in the human oral cavity</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>108</volume>, <fpage>4547</fpage>&#x2013;<lpage>4553</lpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.1000089107</pub-id>
</citation>
</ref>
<ref id="B65">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wommack</surname> <given-names>K. E.</given-names>
</name>
<name>
<surname>Bhavsar</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Polson</surname> <given-names>S. W.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Dumas</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Srinivasiah</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>VIROME: A standard operating procedure for analysis of viral metagenome sequences</article-title>. <source>Standards genomic Sci.</source> <volume>6</volume>, <fpage>421</fpage>. doi: <pub-id pub-id-type="doi">10.4056/sigs.2945050</pub-id>
</citation>
</ref>
<ref id="B66">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xie</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Chain</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Lo</surname> <given-names>C. C.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>K. L.</given-names>
</name>
<name>
<surname>Gans</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Merritt</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2010</year>). <article-title>Community and gene composition of a human dental plaque microbiota obtained by metagenomic sequencing</article-title>. <source>Mol. Oral. Microbiol.</source> <volume>25</volume>, <fpage>391</fpage>&#x2013;<lpage>405</lpage>. doi: <pub-id pub-id-type="doi">10.1111/j.2041-1014.2010.00587.x</pub-id>
</citation>
</ref>
<ref id="B67">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>PAML 4: Phylogenetic analysis by maximum likelihood</article-title>. <source>Mol. Biol. Evol.</source> <volume>24</volume>, <fpage>1586</fpage>&#x2013;<lpage>1591</lpage>. doi: <pub-id pub-id-type="doi">10.1093/molbev/msm088</pub-id>
</citation>
</ref>
<ref id="B68">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Vatanen</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Droit</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Park</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Kostic</surname> <given-names>A. D.</given-names>
</name>
<name>
<surname>Poon</surname> <given-names>T. W.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>Intestinal virome changes precede autoimmunity in type I diabetes-susceptible children</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>114</volume>, <fpage>E6166</fpage>&#x2013;<lpage>E6175</lpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.1706359114</pub-id>
</citation>
</ref>
<ref id="B69">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zuo</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wan</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Yeoh</surname> <given-names>Y. K.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Cheung</surname> <given-names>C. P.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Human-Gut-DNA virome variations across geography, ethnicity, and urbanization</article-title>. <source>Cell Host Microbe</source> <volume>28</volume>, <fpage>741</fpage>&#x2013;<lpage>751. e744</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.chom.2020.08.005</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>