<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Cell. Infect. Microbiol.</journal-id>
<journal-title>Frontiers in Cellular and Infection Microbiology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Cell. Infect. Microbiol.</abbrev-journal-title>
<issn pub-type="epub">2235-2988</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fcimb.2017.00088</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Microbiology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>The Bioinformatics Analysis of Comparative Genomics of <italic>Mycobacterium tuberculosis</italic> Complex (MTBC) Provides Insight into Dissimilarities between Intraspecific Groups Differing in Host Association, Virulence, and Epitope Diversity</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Jia</surname> <given-names>Xinmiao</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn003"><sup>&#x02020;</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Yang</surname> <given-names>Li</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn003"><sup>&#x02020;</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Dong</surname> <given-names>Mengxing</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn003"><sup>&#x02020;</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Chen</surname> <given-names>Suting</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="author-notes" rid="fn003"><sup>&#x02020;</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Lv</surname> <given-names>Lingna</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="author-notes" rid="fn003"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/413700/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Cao</surname> <given-names>Dandan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Fu</surname> <given-names>Jing</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Yang</surname> <given-names>Tingting</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhang</surname> <given-names>Ju</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhang</surname> <given-names>Xiangli</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Shang</surname> <given-names>Yuanyuan</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname> <given-names>Guirong</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Sheng</surname> <given-names>Yongjie</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Huang</surname> <given-names>Hairong</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x0002A;</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Chen</surname> <given-names>Fei</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/394714/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>CAS Key Laboratory of Genome Sciences &#x00026; Information, Beijing Institute of Genomics, Chinese Academy of Sciences</institution> <country>Beijing, China</country></aff>
<aff id="aff2"><sup>2</sup><institution>College of Life Sciences, University of Chinese Academy of Sciences</institution> <country>Beijing, China</country></aff>
<aff id="aff3"><sup>3</sup><institution>National Clinical Laboratory on Tuberculosis, Beijing Key Laboratory on Drug-resistant Tuberculosis Research, Beijing Chest Hospital, Capital Medical University, Beijing Tuberculosis and Thoracic Tumor Institute</institution> <country>Beijing, China</country></aff>
<aff id="aff4"><sup>4</sup><institution>Key Laboratory for Molecular Enzymology and Engineering of Ministry of Education, Jilin University</institution> <country>Changchun, China</country></aff>
<aff id="aff5"><sup>5</sup><institution>Sino-Danish College, University of Chinese Academy of Sciences</institution> <country>Beijing, China</country></aff>
<aff id="aff6"><sup>6</sup><institution>Collaborative Innovation Center for Genetics and Development</institution> <country>Shanghai, China</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Adel M. Talaat, University of Wisconsin-Madison, USA</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Srinand Sreevatsan, University of Minnesota, USA; Andrey P. Anisimov, State Research Center for Applied Microbiology and Biotechnology, Russia</p></fn>
<fn fn-type="corresp" id="fn001"><p>&#x0002A;Correspondence: Fei Chen <email>chenfei&#x00040;big.ac.cn</email></p></fn>
<fn fn-type="corresp" id="fn002"><p>Hairong Huang <email>huanghairong&#x00040;tb123.org</email></p></fn>
<fn fn-type="other" id="fn003"><p>&#x02020;These authors have contributed equally to this work.</p></fn></author-notes>
<pub-date pub-type="epub">
<day>21</day>
<month>03</month>
<year>2017</year>
</pub-date>
<pub-date pub-type="collection">
<year>2017</year>
</pub-date>
<volume>7</volume>
<elocation-id>88</elocation-id>
<history>
<date date-type="received">
<day>25</day>
<month>11</month>
<year>2016</year>
</date>
<date date-type="accepted">
<day>06</day>
<month>03</month>
<year>2017</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2017 Jia, Yang, Dong, Chen, Lv, Cao, Fu, Yang, Zhang, Zhang, Shang, Wang, Sheng, Huang and Chen.</copyright-statement>
<copyright-year>2017</copyright-year>
<copyright-holder>Jia, Yang, Dong, Chen, Lv, Cao, Fu, Yang, Zhang, Zhang, Shang, Wang, Sheng, Huang and Chen</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) or licensor are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract><p>Tuberculosis now exceeds HIV as the top infectious disease cause of mortality, and is caused by the <italic>Mycobacterium tuberculosis</italic> complex (MTBC). MTBC strains have highly conserved genome sequences (similarity &#x0003E;99%) but dramatically different phenotypes. To analyze the relationship between genotype and phenotype, we conducted the comparative genomic analysis on 12 MTBC strains representing different lineages (i.e., <italic>Mycobacterium bovis</italic>; <italic>M. bovis</italic> BCG; <italic>M. microti</italic>; <italic>M. africanum</italic>; <italic>M. tuberculosis</italic> H37Rv; <italic>M. tuberculosis</italic> H37Ra, and six <italic>M. tuberculosis</italic> clinical isolates). The analysis focused on the three aspects of pathogenicity: host association, virulence, and epitope variations. Host association analysis indicated that eight <italic>mce3</italic> genes, two enoyl-CoA hydratases, and five PE/PPE family genes were present only in human isolates; these may have roles in host-pathogen interactions. There were 15 SNPs found on virulence factors (including five SNPs in three ESX secretion proteins) only in the Beijing strains, which might be related to their more virulent phenotype. A comparison between the virulent H37Rv and non-virulent H37Ra strains revealed three SNPs that were likely associated with the virulence attenuation of H37Ra: S219L (PhoP), A219E (MazG) and a newly identified I228M (EspK). Additionally, a comparison of animal-associated MTBC strains showed that the deletion of the first four genes (i.e., <italic>pe35, ppe68, esxB, esxA</italic>), rather than all eight genes of RD1, might play a central role in the virulence attenuation of animal isolates. Finally, by comparing epitopes among MTBC strains, we found that four epitopes were lost only in the Beijing strains; this may render them better capable of evading the human immune system, leading to enhanced virulence. Overall, our comparative genomic analysis of MTBC strains reveals the relationship between the highly conserved genotypes and the diverse phenotypes of MTBC, provides insight into pathogenic mechanisms, and facilitates the development of potential molecular targets for the prevention and treatment of tuberculosis.</p></abstract>
<kwd-group>
<kwd><italic>Mycobacterium tuberculosis</italic> complex (MTBC)</kwd>
<kwd>tuberculosis (TB)</kwd>
<kwd>host association</kwd>
<kwd>virulence</kwd>
<kwd>epitope</kwd>
<kwd>comparative genomics</kwd>
<kwd>pathogenicity</kwd>
<kwd>PacBio</kwd>
</kwd-group>
<counts>
<fig-count count="5"/>
<table-count count="4"/>
<equation-count count="0"/>
<ref-count count="52"/>
<page-count count="14"/>
<word-count count="8911"/>
</counts>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>Introduction</title>
<p>According to a 2016 World Health Organization (WHO) report, tuberculosis (TB) has surpassed HIV as the infectious disease causing the highest number of mortalities, with an estimated 1.8 million deaths and 10.4 million new TB cases worldwide in 2015 (World Health Organization, <xref ref-type="bibr" rid="B25">2016</xref>). The primary cause of TB, <italic>Mycobacterium tuberculosis</italic> (Mtb), belongs to the <italic>M. tuberculosis</italic> complex (MTBC). MTBC is a genetically related group of <italic>Mycobacterium</italic> species that can cause TB in humans or other organisms (Galagan, <xref ref-type="bibr" rid="B21">2014</xref>). In general, MTBC can be classified into eight main lineages (Lineage 1-8, L1-8): L1 (The Philippines and Indian Ocean), L2 (East Asia), L3 (India and East Africa), L4 (Europe and Americas), L5 (West Africa 1), L6 (West Africa 2), L7 (Ethiopia), and L8 (animal-adapted isolates). Among these, L1, L2, L3, L4, and L7 comprise Mtb. MTBC can also be divided into ancient (L1, L5-8) and modern strains (L2-4) according to the presence or absence of an <italic>M. tuberculosis</italic> specific deletion (TbD1; Brosch et al., <xref ref-type="bibr" rid="B5">2002</xref>; Galagan, <xref ref-type="bibr" rid="B21">2014</xref>).</p>
<p>After the first published genome of <italic>M. tuberculosis</italic> in 1998 (Cole et al., <xref ref-type="bibr" rid="B9">1998</xref>), there were many large-scale genomic studies involving many MTBC strains (&#x0003E;3,500) (<ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/genome/genomes">http://www.ncbi.nlm.nih.gov/genome/genomes</ext-link>). Analysis of worldwide sequencing data shows that MTBC members share more than 99% identity at the nucleotide level (Brosch et al., <xref ref-type="bibr" rid="B6">2000</xref>; Rodriguez-Campos et al., <xref ref-type="bibr" rid="B38">2014</xref>). Also, large genomic rearrangements are infrequent in MTBC strains (Galagan, <xref ref-type="bibr" rid="B21">2014</xref>).</p>
<p>Although MTBC members have almost identical genome sequences/genotypes (similarity &#x0003E;99%) (Brosch et al., <xref ref-type="bibr" rid="B6">2000</xref>), they exhibit diverse pathogenic phenotypes, which are thought to be the result of long-term co-evolution of MTBC strains with a diverse group of humans and animals (Comas et al., <xref ref-type="bibr" rid="B11">2013</xref>; Galagan, <xref ref-type="bibr" rid="B21">2014</xref>). Long-term geographical isolation gave rise to the accumulation of &#x0201C;genotype isolation,&#x0201D; thus resulting in different phenotypes (Ghebremichael et al., <xref ref-type="bibr" rid="B22">2010</xref>).</p>
<p>Firstly, the MTBC members have diverse host associations. The L1 to L7 lineages mainly infect humans and rarely infect animals. Among these, the L5 and L6 lineages (<italic>M. africanum</italic> I and II) only cause human TB in West Africa (Bentley et al., <xref ref-type="bibr" rid="B4">2012</xref>; Winglee et al., <xref ref-type="bibr" rid="B50">2016</xref>), and the L7 lineage has only been reported in Ethiopia or in Ethiopian emigrants located in Djibouti (Firdessa et al., <xref ref-type="bibr" rid="B17">2013</xref>). The other human-associated MTBC lineages (L1-4) also exhibit relatively high levels of distinct geographic distributions (Galagan, <xref ref-type="bibr" rid="B21">2014</xref>). These findings suggest that the MTBC lineages have coevolved with diverse hominid ancestors since ancient times, which has lead to their diverse adaptations to specific human populations (Gagneux et al., <xref ref-type="bibr" rid="B20">2006</xref>; Comas et al., <xref ref-type="bibr" rid="B11">2013</xref>; Galagan, <xref ref-type="bibr" rid="B21">2014</xref>). In addition, the L8 lineage (consisting of different animal-adapted MTBC isolates: <italic>M. bovis, M. bovis</italic> BCG, and <italic>M. microti</italic>, among others) has diverse animal-host adaptations, likely for the same reason (Brosch et al., <xref ref-type="bibr" rid="B5">2002</xref>; Rodriguez-Campos et al., <xref ref-type="bibr" rid="B38">2014</xref>). <italic>M. bovis</italic> rarely causes TB in humans but it is highly virulent for cattle (Brosch et al., <xref ref-type="bibr" rid="B6">2000</xref>). Similarly, <italic>M. microti</italic> has been reported to mainly infect rodents, such as voles, and rarely infects humans (Brosch et al., <xref ref-type="bibr" rid="B6">2000</xref>).</p>
<p>Secondly, MTBC strains have different levels of virulence [i.e., the degree of pathogenicity or the ability of the organism to invade the tissues of the host (Pirofski and Casadevall, <xref ref-type="bibr" rid="B35">2012</xref>)]. The modern strains (L2-L4) are more virulent to humans than the ancient strains (L1; L5-8), and are responsible for the vast majority of today&#x00027;s TB cases (Brosch et al., <xref ref-type="bibr" rid="B5">2002</xref>; Gagneux, <xref ref-type="bibr" rid="B19">2012</xref>). Among these, the Beijing sub-lineage of the L2 lineage appears to be more virulent than the other modern isolates due to its enhanced resistance and adaptation (Ida et al., <xref ref-type="bibr" rid="B26">2010</xref>).</p>
<p>The virulence of MTBC strains is not set in stone. For example, <italic>M. tuberculosis</italic> H37Ra, isolated by William Steenken in 1935 (Steenken and Gardner, <xref ref-type="bibr" rid="B44">1946</xref>), is usually regarded as the non-virulent counterpart of the virulent H37Rv strain. Historically, both <italic>M. tuberculosis</italic> H37Ra and H37Rv are derived from the same parent strain (H37). H37Ra and H37Rv are the most common standard strains of <italic>M. tuberculosis</italic> in laboratories. Another famous example is <italic>M. bovis</italic> BCG, which was isolated from virulent <italic>M. bovis</italic> after 239 passages (Liu et al., <xref ref-type="bibr" rid="B32">2009</xref>); this is the most widely used TB vaccine in the world.</p>
<p>Thirdly, some studies have shown a different immune response to different MTBC strains (Gagneux, <xref ref-type="bibr" rid="B19">2012</xref>). It is reported that hypervirulent strains induce lower immune responses than low-pathogenic strains (Portevin et al., <xref ref-type="bibr" rid="B36">2011</xref>). The distinct capacity of virulent strains to remain sequestered in host macrophages seems to be a reason for their ability to evade immune responses (Chen et al., <xref ref-type="bibr" rid="B8">2006</xref>; Grace and Ernst, <xref ref-type="bibr" rid="B23">2016</xref>).</p>
<p>In recent years, some genomic studies concerning the relationship between the genotype and phenotype of MTBC strains have been reported. A number of mycobacterial virulence genes (i.e., virulence factors) have been identified; most of these encode for cell surface proteins or lipid and fatty acid metabolism proteins (Forrellad et al., <xref ref-type="bibr" rid="B18">2013</xref>). Some comparative genomic analyses between the virulent H37Rv and the non-virulent H37Ra strains revealed that the mutation of Ser219 into leucine of the PhoP virulence factor might result in the attenuation of virulence in H37Ra (Ryndak et al., <xref ref-type="bibr" rid="B40">2008</xref>; Zheng et al., <xref ref-type="bibr" rid="B51">2008</xref>). In addition, it is also known that the deletion of region of difference 1 (RD1) in the genome of <italic>M. bovis</italic> BCG led to its attenuation (based on a comparison with <italic>M. bovis</italic>) (Pym et al., <xref ref-type="bibr" rid="B37">2002</xref>; Lewis et al., <xref ref-type="bibr" rid="B27">2003</xref>).</p>
<p>To date, many of the comparative genomic studies only have been focused on the comparison between virulent and non-virulent MTBC strains; thus, comparative genomic studies covering the host association, virulence, and epitope variations are still lacking. This is mainly due to the fact that, on one hand, precise comparisons and analyses of almost identical MTBC genome sequences (similarity &#x0003E;99%) are needed to obtain precise and complete MTBC genome sequences; meanwhile, on the other hand, it is very difficult to obtain complete MTBC genome sequences using second generation sequencing technologies, such as the Illumina Hiseq platform, due to the high GC content and repetitive sequences of the PE/PPE multi-gene family.</p>
<p>Herein, the 12 MTBC genomes, we finished using Pacbio single-molecule real-time (SMRT) technology (Zhu et al., <xref ref-type="bibr" rid="B52">2016</xref>), provided a groundwork for analyzing the relationship between the genotype and phenotype of MTBC. To obtain a precise analysis, we re-sequenced the genomes of 12 MTBC strains using an Illumina Hiseq. Based on the 12 precise and complete MTBC genomes, we performed a comparative genomic analysis looking at three aspects of pathogenicity: host association, virulence, and epitope diversity.</p>
</sec>
<sec sec-type="materials and methods" id="s2">
<title>Materials and methods</title>
<sec>
<title>Isolates and genotyping</title>
<p>The 12 MTBC isolates, which included six reference strains and six clinical isolates, have been previously described (Table <xref ref-type="supplementary-material" rid="SM1">S1</xref>; Zhu et al., <xref ref-type="bibr" rid="B52">2016</xref>). Six standard reference strains were obtained from American Type Culture Collection (ATCC) specifically for the purpose of genome sequencing. The strains were grown in either Lowenstein&#x02013;Jenden media or Middlebrook 7H10 media supplemented with 10% OADC (Oleic Albumin Dextrose Catalase, Becton Dickinson), glycerol, and 0.05% Tween 80. We used the VNTR-15 scheme as described in MIRU-VNTRplus (<ext-link ext-link-type="uri" xlink:href="http://www.miru-vntrplus.org/">http://www.miru-vntrplus.org/</ext-link>) for genotyping, which uses the following markers: Mtub04; ETRC; MIRU04; MIRU40; MIRU10; MIRU16; Mtub21; QUB11b; ETRA; Mtub30; MIRU26; MIRU31; Mtub39; QUB26, and QUB4156. Each MIRU-VNTR locus was individually amplified, and electrophoresis of products on agarose gels was conducted as previously described (Fabre et al., <xref ref-type="bibr" rid="B15">2004</xref>). The copy number at each locus was calculated in BioNumerics. A large sequence polymorphism (LSP) (pks15/1) was used to differentiate between the clinical TB strains using the primers shown in Table <xref ref-type="supplementary-material" rid="SM1">S2</xref>. A deletion of a 7-bp region in the polyketide synthase gene pks15/1 is present in the Euro-American lineage of <italic>M. tuberculosis</italic>. Whole genome SNP typing was also done using MEGA 6.06 (maximum likelihood method) as previously described (Tamura et al., <xref ref-type="bibr" rid="B46">2013</xref>; Zhu et al., <xref ref-type="bibr" rid="B52">2016</xref>). The results of this analysis are consistent with those of the VNTR analysis (Figure <xref ref-type="supplementary-material" rid="SM1">S1</xref>).</p>
<p>The 12 MTBC strains covered five lineages. There were two clinical L2 strains (Mtb 2242 and 2279), one clinical L3 strain (Mtb 26105), five L4 strains (three clinical strains-Mtb 22115, Mtb 22103, and Mtb 37004 and two reference strains-Mtb F1 and Mtb F28), one L6 reference strain (<italic>M. africanum</italic> 25), and three L8 reference strains (<italic>M. microti</italic> 12, <italic>M. bovis</italic> 30, and <italic>M. bovis</italic> BCG 26). Of the 12 strains, Mtb 2242, Mtb 2279, Mtb 26105, Mtb 22115, Mtb 22103, Mtb 37004, Mtb F1, Mtb F28, and <italic>M. africanum</italic> 25 are human isolates; meanwhile, <italic>M. microti</italic> 12, <italic>M. bovis</italic> 30, and <italic>M. bovis</italic> BCG 26 are animal isolates. In addition, the 12 MTBC isolates contained two homologous pairs of virulent/non-virulent strains: virulent <italic>M. tuberculosis</italic> H37Rv (Mtb F1: ATCC27294) and its non-virulent counterpart H37Ra (Mtb F28: ATCC25177), and virulent <italic>M. bovis</italic> (<italic>M. bovis</italic> 30: ATCC19210) and its non-virulent counterpart <italic>M. bovis</italic> BCG (<italic>M. bovis</italic> BCG 26: ATCC35735).</p>
</sec>
<sec>
<title>Genomic DNA extraction, sequencing, correction and re-annotation</title>
<p>Genomic DNA from the 12 MTBC strains was extracted using a TIANamp Bacteria Genomic DNA Kit (Tiangen BiotechCo. Ltd., Beijing, China), and was sequenced using PacBio Single-Molecule Real-Time (SMRT) Technology as previously described (Zhu et al., <xref ref-type="bibr" rid="B52">2016</xref>).</p>
<p>To correct the polymer errors produced during PacBio sequencing, we re-sequenced the 12 isolates using next-generation sequencing. The genomes of the 12 isolates were shotgun sequenced using an Illumina Genome Analyzer 2X. Paired-end libraries were prepared from 5 ug of isolated genomic DNA using the TruSeq DNA sample prep kit A (Illumina Inc., San Diego) according to the manufacturer&#x00027;s instructions. Genomic paired-end libraries were sequenced with a read length of 2 &#x000D7; 150 nucleotides using an Illumina GAIIx instrument according to the manufacturer&#x00027;s instructions. Image analysis and base calling were done in the standard Illumina pipeline. The raw Illumina sequencing reads were trimmed at a threshold of 0.01 (Phred score of 20). Filtered reads were mapped onto the genome sequences, which were assembled by the Hierarchical Genome Assembly Process (HGAP.3) algorithm in SMRT Portal (version 2.2.0) using BWA version 0.5.9 (Li and Durbin, <xref ref-type="bibr" rid="B28">2010</xref>), and converted to sorted BAM format using SAMtools v0.1.9 (Li et al., <xref ref-type="bibr" rid="B29">2009</xref>). The coverage ranged between 157&#x000D7; and 394&#x000D7; with an average of 255&#x000D7;. Pilon v1.13 (Walker et al., <xref ref-type="bibr" rid="B49">2014</xref>) was then used to polish the genome sequences using these alignments, which resulted in a total of 9,493 insertions and 133 deletions. All the raw Illumina sequencing reads have also been deposited in the National Center for Biotechnology Information (NCBI) Sequence Read Archive (SRA) database (SRP064893) and the Genome Sequence Archive (GSA) of the BIG Data Center (BIGD) (PRJCA000307).</p>
<p>All 12 genome sequences were re-annotated with the Rapid Annotation tool in the Subsystem Technology (RAST) pipeline, which is a fully automated annotation engine for complete or draft archaeal and bacterial genomes (Aziz et al., <xref ref-type="bibr" rid="B3">2008</xref>).</p>
</sec>
<sec>
<title>Genome structure and identification of specific genes and single-nucleotide polymorphisms (SNPs)</title>
<p>The average nucleotide identity (ANI) was calculated through ANI on EzGenome (<ext-link ext-link-type="uri" xlink:href="http://www.ezbiocloud.net/tools/ani">http://www.ezbiocloud.net/tools/ani</ext-link>). Multiple alignments of genomic sequences were performed using the Mauve multiple alignment software with the progressive alignment option (Darling et al., <xref ref-type="bibr" rid="B13">2010</xref>). The output file produced by Mauve was parsed using a custom Perl script (Supplementary Data) to retrieve multiple aligned sequences for SNP loci. SNPs called in repetitive regions of the genome, which were defined as exact repetitive sequences &#x02265;25 bp in length, and were identified using Repeat-Masker (Tarailo-Graovac and Chen, <xref ref-type="bibr" rid="B47">2009</xref>), were excluded.</p>
<p>Gene sequences were downloaded from the RAST server after annotation. To identify specific genes, ortholog clustering was performed using orthoMCL (Li et al., <xref ref-type="bibr" rid="B30">2003</xref>). Each cluster is a homology group. If the genes in a homology group covered all species in a group but no strains in other groups, we considered the genes in this group to be group specific genes.</p>
</sec>
<sec>
<title>Epitopes, virulence factors and PE/PPE gene family distribution</title>
<p>Epitopes were obtained from the Immune Epitope Database (IEDB; Vita et al., <xref ref-type="bibr" rid="B48">2015</xref>), and those that had been positively experimentally identified were selected and renamed. Only epitopes with 100% identical BLAT matches were considered to be the same epitope. The epitope was classified as an absence epitope if it was located in an absence gene or in a deleted/mutated region of a non-absence gene. Epitopes with the same distribution pattern were clustered into one epitope_cluster. Furthermore, BLASTn was used to search for differences between 12 genomes in terms of single nucleotide variations, insertions, and deletions in corresponding antigen genes.</p>
<p>Virulence genes were downloaded from the Virulence Factor Database (VFDB; Chen et al., <xref ref-type="bibr" rid="B7">2005</xref>; Table <xref ref-type="supplementary-material" rid="SM1">S3</xref>). All of the PE/PPE genes analyzed in this study (Table <xref ref-type="supplementary-material" rid="SM1">S4</xref>) were based on a search of the <italic>M. tuberculosis</italic> H37Rv genes (<ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="NC_000962">NC_000962</ext-link>) from the NCBI gene database (<ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/gene">www.ncbi.nlm.nih.gov/gene</ext-link>). FASTA sequences of these genes were used to search for corresponding genes in the 12 genomes using BLAST (50% coverage and 90% identity thresholds).</p>
</sec>
</sec>
<sec id="s3">
<title>Results and discussion</title>
<sec>
<title>Conserved genomes (genotypes) and diverse phenotypes of 12 MTBC strains</title>
<p>The MTBC genomes obtained by SMRT sequencing (Zhu et al., <xref ref-type="bibr" rid="B52">2016</xref>) were re-sequenced to correct the homo-polymer errors using Illumina sequencing. Based on an average of 5,394,126 paired-end reads (255X coverage) (SRP064893), we resolved a total of 9,493 insertions and 133 deletions compared with the original genomes (Table <xref ref-type="supplementary-material" rid="SM1">S5</xref>). The precise genomes provided general information (Table <xref ref-type="table" rid="T1">1</xref>), including the size of the genomes (4.34&#x02013;4.43 Mb), the number of predicted protein-coding genes (&#x0007E;4,400), and the gene length (&#x0007E;900bp). Importantly, the ANI and SNP analyses showed that the MTBC genomes were highly conserved, as the maximum number of SNPs and the minimum ANI were 2,356 and 99.75%, respectively (Table <xref ref-type="table" rid="T1">1</xref>). Pan-genomic analysis was further implemented and used to identified 3,761 core gene clusters (Figure <xref ref-type="supplementary-material" rid="SM1">S2A</xref>), which accounted for &#x0007E;87% of all genes. This also demonstrates that MTBC genomes are highly conserved.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p><bold>Conserved genomes (genotypes) and diverse phenotypes of 12 MTBC strains</bold>.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th/>
<th valign="top" align="center"><bold>ATCC No./Lineage</bold></th>
<th valign="top" align="center"><bold>Strain No</bold>.</th>
<th valign="top" align="left"><bold>Species</bold></th>
<th valign="top" align="left"><bold>Host</bold></th>
<th valign="top" align="center"><bold>Completed genome size (bp)</bold></th>
<th valign="top" align="center"><bold>ANI<xref ref-type="table-fn" rid="TN1"><sup>a</sup></xref> (%)</bold></th>
<th valign="top" align="center"><bold>Coding gene number</bold></th>
<th valign="top" align="center"><bold>Core gene number</bold></th>
<th valign="top" align="center"><bold>SNPs<xref ref-type="table-fn" rid="TN2"><sup>b</sup></xref></bold></th>
<th valign="top" align="center"><bold>Virulence factor</bold></th>
<th valign="top" align="center"><bold>Epitope</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Modern strains</td>
<td valign="top" align="center">27294/L4</td>
<td valign="top" align="center">F1</td>
<td valign="top" align="left"><italic>M. tuberculosis</italic> H37Rv</td>
<td valign="top" align="left">Human</td>
<td valign="top" align="center">4,429,062</td>
<td valign="top" align="center">99.9845</td>
<td valign="top" align="center">4,400</td>
<td valign="top" align="center">3,834</td>
<td valign="top" align="center">110</td>
<td valign="top" align="center">253</td>
<td valign="top" align="center">2,208</td>
</tr>
<tr>
<td/>
<td valign="top" align="center">25177/L4</td>
<td valign="top" align="center">F28</td>
<td valign="top" align="left"><italic>M. tuberculosis</italic> H37Ra</td>
<td/>
<td valign="top" align="center">4,421,992</td>
<td valign="top" align="center">99.9893</td>
<td valign="top" align="center">4,366</td>
<td valign="top" align="center">3,837</td>
<td valign="top" align="center">92</td>
<td valign="top" align="center">252</td>
<td valign="top" align="center">2,205</td>
</tr>
<tr>
<td/>
<td valign="top" align="center">L4</td>
<td valign="top" align="center">22,115</td>
<td valign="top" align="left"><italic>M. tuberculosis</italic></td>
<td/>
<td valign="top" align="center">4,402,103</td>
<td valign="top" align="center">99.9112</td>
<td valign="top" align="center">4,356</td>
<td valign="top" align="center">3,827</td>
<td valign="top" align="center">840</td>
<td valign="top" align="center">253</td>
<td valign="top" align="center">2,201</td>
</tr>
<tr>
<td/>
<td valign="top" align="center">L4</td>
<td valign="top" align="center">37,004</td>
<td valign="top" align="left"><italic>M. tuberculosis</italic></td>
<td/>
<td valign="top" align="center">4,417,474</td>
<td valign="top" align="center">99.8955</td>
<td valign="top" align="center">4,375</td>
<td valign="top" align="center">3,820</td>
<td valign="top" align="center">855</td>
<td valign="top" align="center">252</td>
<td valign="top" align="center">2,189</td>
</tr>
<tr>
<td/>
<td valign="top" align="center">L4</td>
<td valign="top" align="center">22,103</td>
<td valign="top" align="left"><italic>M. tuberculosis</italic></td>
<td/>
<td valign="top" align="center">4,399,638</td>
<td valign="top" align="center">99.8498</td>
<td valign="top" align="center">4,345</td>
<td valign="top" align="center">3,809</td>
<td valign="top" align="center">1,033</td>
<td valign="top" align="center">253</td>
<td valign="top" align="center">2,175</td>
</tr>
<tr>
<td/>
<td valign="top" align="center">L3</td>
<td valign="top" align="center">26,105</td>
<td valign="top" align="left"><italic>M. tuberculosis</italic></td>
<td/>
<td valign="top" align="center">4,426,728</td>
<td valign="top" align="center">99.8796</td>
<td valign="top" align="center">4,393</td>
<td valign="top" align="center">3,833</td>
<td valign="top" align="center">1,504</td>
<td valign="top" align="center">251</td>
<td valign="top" align="center">2,261</td>
</tr>
<tr>
<td/>
<td valign="top" align="center">L2</td>
<td valign="top" align="center">2,242</td>
<td valign="top" align="left"><italic>M. tuberculosis</italic></td>
<td/>
<td valign="top" align="center">4,420,756</td>
<td valign="top" align="center">99.8322</td>
<td valign="top" align="center">4,428</td>
<td valign="top" align="center">3,839</td>
<td valign="top" align="center">1,434</td>
<td valign="top" align="center">251</td>
<td valign="top" align="center">2,128</td>
</tr>
<tr style="border-bottom: thin solid #000000;">
<td/>
<td valign="top" align="center">L2</td>
<td valign="top" align="center">2,279</td>
<td valign="top" align="left"><italic>M. tuberculosis</italic></td>
<td/>
<td valign="top" align="center">4,406,429</td>
<td valign="top" align="center">99.8436</td>
<td valign="top" align="center">4,400</td>
<td valign="top" align="center">3,839</td>
<td valign="top" align="center">1,514</td>
<td valign="top" align="center">252</td>
<td valign="top" align="center">2,181</td>
</tr> <tr>
<td valign="top" align="left">Ancient strains</td>
<td valign="top" align="center">35711/L6</td>
<td valign="top" align="center">25</td>
<td valign="top" align="left"><italic>M. africanum</italic></td>
<td valign="top" align="left">Human (tropical Africa)</td>
<td valign="top" align="center">4,388,515</td>
<td valign="top" align="center">99.7731</td>
<td valign="top" align="center">4,385</td>
<td valign="top" align="center">3,819</td>
<td valign="top" align="center">2,336</td>
<td valign="top" align="center">246</td>
<td valign="top" align="center">2,135</td>
</tr>
<tr>
<td/>
<td valign="top" align="center">19422/L8</td>
<td valign="top" align="center">12</td>
<td valign="top" align="left"><italic>M. microti</italic></td>
<td valign="top" align="left">Voles and rodents</td>
<td valign="top" align="center">4,370,890</td>
<td valign="top" align="center">99.7712</td>
<td valign="top" align="center">4,360</td>
<td valign="top" align="center">3,815</td>
<td valign="top" align="center">2,158</td>
<td valign="top" align="center">232</td>
<td valign="top" align="center">1,756</td>
</tr>
<tr>
<td/>
<td valign="top" align="center">19210/L8</td>
<td valign="top" align="center">30</td>
<td valign="top" align="left"><italic>M. bovis</italic></td>
<td valign="top" align="left">Wide range of mammals especially cattle</td>
<td valign="top" align="center">4,336,684</td>
<td valign="top" align="center">99.7680</td>
<td valign="top" align="center">4,312</td>
<td valign="top" align="center">3,804</td>
<td valign="top" align="center">2,345</td>
<td valign="top" align="center">238</td>
<td valign="top" align="center">2,137</td>
</tr>
<tr>
<td/>
<td valign="top" align="center">35735/L8</td>
<td valign="top" align="center">26</td>
<td valign="top" align="left"><italic>M. bovis</italic> BCG</td>
<td/>
<td valign="top" align="center">4,353,641</td>
<td valign="top" align="center">99.7488</td>
<td valign="top" align="center">4,348</td>
<td valign="top" align="center">3,832</td>
<td valign="top" align="center">2,356</td>
<td valign="top" align="center">236</td>
<td valign="top" align="center">1,706</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TN1"><label>a</label>
<p><italic>ANI: Average Nucleotide Identity;</italic></p></fn>
<fn id="TN2"><label>b</label>
<p><italic>Reference genome: H37Rv (<ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="NC_000962">NC_000962</ext-link>)</italic>.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>The comparative genomic analysis also showed some subtle differences amongst the 12 MTBC genomes: the five L4 strains had fewer SNPs (&#x0003C;1,000) because the reference genome (<ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="NC_000962">NC_000962</ext-link>) belonged to the L4 lineage; the three animal-associated MTBC strains possessed more SNPs (&#x0003E;2,000), and the two L2 and one L3 strains all had &#x0007E;1,500 SNPs (Table <xref ref-type="table" rid="T1">1</xref>). The number of SNPs reflected the genetic distance from the reference L4 strain (Takezaki and Nei, <xref ref-type="bibr" rid="B45">1996</xref>). To lower the influence of the reference, pair-wise comparisons of SNPs were conducted (Figure <xref ref-type="fig" rid="F1">1</xref>). This comparison proved that the differences in SNPs between the animal- and human-associated strains were much greater than those between intra-lineage strains. On the other hand, the analyses for the pan-genomic and pair-wise comparisons of orthologous genes (Figure <xref ref-type="supplementary-material" rid="SM1">S2</xref> and Figure <xref ref-type="fig" rid="F1">1</xref>, respectively) indicated some strain-specific and lineage-specific genes.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p><bold>Pair-wise comparisons of SNPs and orthologous genes in 12 MTBC strains</bold>. The plum red region represents the SNP number, and the blue region indicates the orthologous gene number. The colors darken with increases in number.</p></caption>
<graphic xlink:href="fcimb-07-00088-g0001.tif"/>
</fig>
<p>It is well-known that genotype is a major factor influencing phenotype. Thus, the pathogenic phenotype differences amongst MTBC lineages can also be attributed to the small number of genomic variations, including the specific genes and SNPs described above. In the following sections, we investigate the correlation between these genetic variations and three essential phenotypic features (host association, virulence, and epitope variation).</p>
</sec>
<sec>
<title>Host association analysis</title>
<p>In an attempt to reveal the genetic basis for host association, we performed a comparative genomic study between the strains isolated from human (i.e., Mtb 2242, Mtb 2279, Mtb 26105, Mtb 22115, Mtb 22103, Mtb 37004, Mtb F1, and Mtb F28) and animal (i.e., <italic>M. microti</italic> 12, <italic>M. bovis</italic> 30, and <italic>M.bovis</italic> BCG 26). The average ANI was 99.76% (99.71&#x02013;99.80%; Table <xref ref-type="supplementary-material" rid="SM1">S6</xref>), and no large genome structure variation was observed. Minor differences in genome sequences between these two groups of strains might explain the differences in host association. We found 29 genes specific to human isolates (Table <xref ref-type="table" rid="T2">2</xref>), 16 genes specific to animal isolates (Table <xref ref-type="supplementary-material" rid="SM1">S7</xref>), and 579 SNPs (Table <xref ref-type="supplementary-material" rid="SM1">S8</xref>).</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p><bold>Human-associated strain specific genes<xref ref-type="table-fn" rid="TN3"><sup>a</sup></xref> [Reference genome: H37Rv (<ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="NC_000962">NC_000962</ext-link>)]</bold>.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>Gene</bold></th>
<th valign="top" align="left"><bold>Synonym</bold></th>
<th valign="top" align="center"><bold>Product length</bold></th>
<th valign="top" align="center"><bold>COG</bold></th>
<th valign="top" align="left"><bold>Annotation</bold></th>
<th valign="top" align="center"><bold>RD No</bold>.</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">&#x02013;</td>
<td valign="top" align="left">Rv0221</td>
<td valign="top" align="center">469</td>
<td valign="top" align="center">COG4908R</td>
<td valign="top" align="left">Diacyglycerol O-acyltransferase</td>
<td valign="top" align="center">RD10</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#00a54f">echA1</td>
<td valign="top" align="left" style="color:#00a54f">Rv0222</td>
<td valign="top" align="center" style="color:#00a54f">262</td>
<td valign="top" align="center" style="color:#00a54f">COG1024I</td>
<td valign="top" align="left" style="color:#00a54f">Enoyl-CoA hydratase EchA1</td>
<td valign="top" align="center" style="color:#00a54f">RD10</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#2e3092">PE_PGRS5</td>
<td valign="top" align="left" style="color:#2e3092">Rv0297</td>
<td valign="top" align="center" style="color:#2e3092">591</td>
<td valign="top" align="center" style="color:#2e3092">&#x02013;</td>
<td valign="top" align="left" style="color:#2e3092">PE-PGRS family protein PE_PGRS5</td>
<td valign="top" align="center" style="color:#2e3092">/</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#2e3092">PPE7</td>
<td valign="top" align="left" style="color:#2e3092">Rv0354c</td>
<td valign="top" align="center" style="color:#2e3092">141</td>
<td valign="top" align="center" style="color:#2e3092">&#x02013;</td>
<td valign="top" align="left" style="color:#2e3092">PPE family protein PPE7</td>
<td valign="top" align="center" style="color:#2e3092">/</td>
</tr>
<tr>
<td valign="top" align="left">galTb</td>
<td valign="top" align="left">Rv0619</td>
<td valign="top" align="center">181</td>
<td valign="top" align="center">COG1085G</td>
<td valign="top" align="left">Galactose-1-phosphate uridylyltransferase GalTb</td>
<td valign="top" align="center">/</td>
</tr>
<tr>
<td valign="top" align="left">&#x02013;</td>
<td valign="top" align="left">Rv1503c</td>
<td valign="top" align="center">182</td>
<td valign="top" align="center">COG0399M</td>
<td valign="top" align="left">TDP-4-oxo-6-deoxy-D-glucose aminotransferase</td>
<td valign="top" align="center">/</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#2e3092">PE_PGRS31</td>
<td valign="top" align="left" style="color:#2e3092">Rv1768</td>
<td valign="top" align="center" style="color:#2e3092">618</td>
<td valign="top" align="center" style="color:#2e3092">COG5164</td>
<td valign="top" align="left" style="color:#2e3092">PE-PGRS family protein PE_PGRS31</td>
<td valign="top" align="center" style="color:#2e3092">RD14</td>
</tr>
<tr>
<td valign="top" align="left">yrbE3A</td>
<td valign="top" align="left">Rv1964</td>
<td valign="top" align="center">265</td>
<td valign="top" align="center">COG0767Q</td>
<td valign="top" align="left">Integral membrane protein</td>
<td valign="top" align="center">RD7</td>
</tr>
<tr>
<td valign="top" align="left">yrbE3B</td>
<td valign="top" align="left">Rv1965</td>
<td valign="top" align="center">271</td>
<td valign="top" align="center">COG0767Q</td>
<td valign="top" align="left">Integral membrane protein</td>
<td valign="top" align="center">RD7</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#ed1c24">mce3A</td>
<td valign="top" align="left" style="color:#ed1c24">Rv1966</td>
<td valign="top" align="center" style="color:#ed1c24">425</td>
<td valign="top" align="center" style="color:#ed1c24">COG1463Q</td>
<td valign="top" align="left" style="color:#ed1c24">Mce family protein Mce3A</td>
<td valign="top" align="center" style="color:#ed1c24">RD7</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#ed1c24">mce3B</td>
<td valign="top" align="left" style="color:#ed1c24">Rv1967</td>
<td valign="top" align="center" style="color:#ed1c24">342</td>
<td valign="top" align="center" style="color:#ed1c24">COG1463Q</td>
<td valign="top" align="left" style="color:#ed1c24">Mce family protein Mce3B</td>
<td valign="top" align="center" style="color:#ed1c24">RD7</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#ed1c24">mce3C</td>
<td valign="top" align="left" style="color:#ed1c24">Rv1968</td>
<td valign="top" align="center" style="color:#ed1c24">410</td>
<td valign="top" align="center" style="color:#ed1c24">COG1463Q</td>
<td valign="top" align="left" style="color:#ed1c24">Mce family protein Mce3C</td>
<td valign="top" align="center" style="color:#ed1c24">RD7</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#ed1c24">mce3D</td>
<td valign="top" align="left" style="color:#ed1c24">Rv1969</td>
<td valign="top" align="center" style="color:#ed1c24">423</td>
<td valign="top" align="center" style="color:#ed1c24">COG1463Q</td>
<td valign="top" align="left" style="color:#ed1c24">Mce family protein Mce3D</td>
<td valign="top" align="center" style="color:#ed1c24">RD7</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#ed1c24">lprM</td>
<td valign="top" align="left" style="color:#ed1c24">Rv1970</td>
<td valign="top" align="center" style="color:#ed1c24">377</td>
<td valign="top" align="center" style="color:#ed1c24">COG1463Q</td>
<td valign="top" align="left" style="color:#ed1c24">Mce family lipoprotein LprM</td>
<td valign="top" align="center" style="color:#ed1c24">RD7</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#ed1c24">mce3F</td>
<td valign="top" align="left" style="color:#ed1c24">Rv1971</td>
<td valign="top" align="center" style="color:#ed1c24">437</td>
<td valign="top" align="center" style="color:#ed1c24">COG1463Q</td>
<td valign="top" align="left" style="color:#ed1c24">Mce family protein Mce3F</td>
<td valign="top" align="center" style="color:#ed1c24">RD7</td>
</tr>
<tr>
<td valign="top" align="left">&#x02013;</td>
<td valign="top" align="left" style="color:#ed1c24">Rv1972</td>
<td valign="top" align="center" style="color:#ed1c24">191</td>
<td valign="top" align="center" style="color:#ed1c24">&#x02013;</td>
<td valign="top" align="left" style="color:#ed1c24">Mce associated membrane protein</td>
<td valign="top" align="center" style="color:#ed1c24">RD7</td>
</tr>
<tr>
<td valign="top" align="left">&#x02013;</td>
<td valign="top" align="left" style="color:#ed1c24">Rv1973</td>
<td valign="top" align="center" style="color:#ed1c24">160</td>
<td valign="top" align="center" style="color:#ed1c24">&#x02013;</td>
<td valign="top" align="left" style="color:#ed1c24">Mce associated membrane protein</td>
<td valign="top" align="center" style="color:#ed1c24">RD7</td>
</tr>
<tr>
<td valign="top" align="left">&#x02013;</td>
<td valign="top" align="left">Rv1974</td>
<td valign="top" align="center">125</td>
<td valign="top" align="center">&#x02013;</td>
<td valign="top" align="left">Membrane protein</td>
<td valign="top" align="center">RD7</td>
</tr>
<tr>
<td valign="top" align="left">&#x02013;</td>
<td valign="top" align="left">Rv1975</td>
<td valign="top" align="center">221</td>
<td valign="top" align="center">COG2340S</td>
<td valign="top" align="left">Hypothetical protein</td>
<td valign="top" align="center">RD7</td>
</tr>
<tr>
<td valign="top" align="left">&#x02013;</td>
<td valign="top" align="left">Rv1976c</td>
<td valign="top" align="center">135</td>
<td valign="top" align="center">&#x02013;</td>
<td valign="top" align="left">Hypothetical protein</td>
<td valign="top" align="center">RD7</td>
</tr>
<tr>
<td valign="top" align="left">&#x02013;</td>
<td valign="top" align="left">Rv1977</td>
<td valign="top" align="center">348</td>
<td valign="top" align="center">COG0501O</td>
<td valign="top" align="left">Hypothetical protein</td>
<td valign="top" align="center">RD7</td>
</tr>
<tr>
<td valign="top" align="left">&#x02013;</td>
<td valign="top" align="left">Rv2073c</td>
<td valign="top" align="center">249</td>
<td valign="top" align="center">COG0300R</td>
<td valign="top" align="left">Oxidoreductase</td>
<td valign="top" align="center">RD9</td>
</tr>
<tr>
<td valign="top" align="left">&#x02013;</td>
<td valign="top" align="left">Rv2074</td>
<td valign="top" align="center">137</td>
<td valign="top" align="center">&#x02013;</td>
<td valign="top" align="left">Pyridoxamine 5&#x00027;-phosphate oxidase</td>
<td valign="top" align="center">RD9</td>
</tr>
<tr>
<td valign="top" align="left">&#x02013;</td>
<td valign="top" align="left">Rv2227</td>
<td valign="top" align="center">233</td>
<td valign="top" align="center">COG3826S</td>
<td valign="top" align="left">Hypothetical protein</td>
<td valign="top" align="center">/</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#00a54f">echA18</td>
<td valign="top" align="left" style="color:#00a54f">Rv3374</td>
<td valign="top" align="center" style="color:#00a54f">82</td>
<td valign="top" align="center" style="color:#00a54f">COG1024I</td>
<td valign="top" align="left" style="color:#00a54f">Enoyl-CoA hydratase</td>
<td valign="top" align="center" style="color:#00a54f">/</td>
</tr>
<tr>
<td valign="top" align="left">ephA</td>
<td valign="top" align="left">Rv3617</td>
<td valign="top" align="center">322</td>
<td valign="top" align="center">COG0596R</td>
<td valign="top" align="left">Epoxide hydrolase EphA</td>
<td valign="top" align="center">RD8</td>
</tr>
<tr>
<td valign="top" align="left">&#x02013;</td>
<td valign="top" align="left">Rv3618</td>
<td valign="top" align="center">395</td>
<td valign="top" align="center">COG2141C</td>
<td valign="top" align="left">Monooxygenase</td>
<td valign="top" align="center">RD8</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#2e3092">PPE65</td>
<td valign="top" align="left" style="color:#2e3092">Rv3621c</td>
<td valign="top" align="center" style="color:#2e3092">413</td>
<td valign="top" align="center" style="color:#2e3092">COG5651N</td>
<td valign="top" align="left" style="color:#2e3092">PPE family protein PPE65</td>
<td valign="top" align="center" style="color:#2e3092">RD8</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#2e3092">PE32</td>
<td valign="top" align="left" style="color:#2e3092">Rv3622c</td>
<td valign="top" align="center" style="color:#2e3092">99</td>
<td valign="top" align="center" style="color:#2e3092">&#x02013;</td>
<td valign="top" align="left" style="color:#2e3092">PE family protein PE32</td>
<td valign="top" align="center" style="color:#2e3092">RD8</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TN3"><label>a</label><p><italic>Eight mce3 family genes, two enoyl-CoA hydratases and five PE/PPE family genes are highlighted as red, green, and blue letters</italic>.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>Interestingly, in the 29 genes specific to human isolates, 14 in region of difference 7 (RD7; Table <xref ref-type="table" rid="T2">2</xref>) were clustered together (Figure <xref ref-type="supplementary-material" rid="SM1">S3</xref>). Among these, eight mammalian cell entry 3 (mce3) family genes appear to play roles in human host association of MTBC strains, since the Mce protein is an important protein family for entry and survival of bacteria in the host (Harboe et al., <xref ref-type="bibr" rid="B24">2002</xref>; Ahmad et al., <xref ref-type="bibr" rid="B1">2004</xref>). They are located on the cell surface and reside in the same <italic>mce3</italic> operon of <italic>M. tuberculosis</italic> (Harboe et al., <xref ref-type="bibr" rid="B24">2002</xref>). Additionally, the two genes upstream (<italic>yrbEA</italic> and <italic>yrbEB</italic>) from the eight <italic>mce3</italic> genes and the four genes downstream (<italic>Rv1974, Rv1975, Rv1976c</italic>, and <italic>Rv1977</italic>) from the eight <italic>mce3</italic> genes encode integral membrane proteins and signal sequences, respectively (Harboe et al., <xref ref-type="bibr" rid="B24">2002</xref>). Their absence in animal isolates might be related to host association.</p>
<p>It is also worth noting that two enoyl-coenzyme A (CoA) hydratases (EchA1 and EchA18) were only present in human isolates (Table <xref ref-type="table" rid="T2">2</xref>); these hydratases are key enzymes in the fatty acid &#x003B2;-oxidation pathway. Through &#x003B2;-oxidative reactions, host-cell lipids can be degraded and provide precursors for many metabolic processes, such as cell-wall synthesis (Cole et al., <xref ref-type="bibr" rid="B9">1998</xref>). Therefore, the absence of enoyl-CoA hydratase in animal-associated MTBC strains might impact fatty acid metabolism and cell-wall synthesis in host cells.</p>
<p>In addition, five PE/PPE family genes (i.e., <italic>ppe7, ppe65, pe32, pe_pgrs5</italic>, and <italic>pe_pgrs31</italic>) were found to exist only in the human isolates (Table <xref ref-type="table" rid="T2">2</xref>). Most of the PE/PPE proteins were localized or secreted to the cell surface; these proteins have been implicated in mycobacterial antigenic variation and host immune evasion (Akhter et al., <xref ref-type="bibr" rid="B2">2012</xref>). Thus, the absence of these five PE/PPE proteins in animal isolates might lead to differences in host association.</p>
<p>Of the 29 genes specific to human isolates, <italic>ppe65, echA1, ephA</italic>, and <italic>Rv1977</italic> are known antigen genes (Vita et al., <xref ref-type="bibr" rid="B48">2015</xref>). The loss of these four genes might lead to changes in the immune response of animal isolates (discussed in the epitope section).</p>
<p>Five hundred and Seventy nine SNPs were identified between the human- and animal-isolates; 315 non-synonymous SNPs were distributed on 287 genes (Table <xref ref-type="supplementary-material" rid="SM1">S8A</xref>). Among them, 26 SNPs were on virulence factors and seven SNPs were located on antigen genes (Table <xref ref-type="supplementary-material" rid="SM1">S8B</xref>). It is likely that they influence protein function and further affect mycobacterial host association.</p>
</sec>
<sec>
<title>Genomic analysis of virulence differences</title>
<p>Diverse MTBC lineages have different degrees of virulence (Brosch et al., <xref ref-type="bibr" rid="B5">2002</xref>; Ida et al., <xref ref-type="bibr" rid="B26">2010</xref>; Gagneux, <xref ref-type="bibr" rid="B19">2012</xref>) even though their genomes are highly conserved. In this study, we searched for virulence factors in the 12 MTBC genomes by performing a BLAST search against 257 virulence factors collected from the VFDB (Chen et al., <xref ref-type="bibr" rid="B7">2005</xref>; Table <xref ref-type="supplementary-material" rid="SM1">S3</xref>). Among these, 86 are experimentally validated (Table <xref ref-type="supplementary-material" rid="SM1">S3A</xref>), while the others are putative ones (Table <xref ref-type="supplementary-material" rid="SM1">S3B</xref>). The results showed that more than 80% (216: 56 experimentally validated and 160 putative) of the virulence factors were existed and conserved in all 12 strains (Figure <xref ref-type="supplementary-material" rid="SM1">S2B</xref>). Also, we found that the number of virulence factors in the four ancient strains was less than in the eight modern strains, which is in agreement with the more virulent phenotype of modern strains (Gagneux, <xref ref-type="bibr" rid="B19">2012</xref>).</p>
<p>It is well known that Beijing sub-lineage strains in the L2 lineage are more virulent than the other modern isolates (Ida et al., <xref ref-type="bibr" rid="B26">2010</xref>). There was no appreciable difference between Beijing sub-lineage strains (Mtb 2242 and Mtb 2279) and other modern strains (Mtb F1, Mtb F28, Mtb 22115, Mtb 37004, Mtb 22103, and Mtb 26105) in regards to the number and species of virulence factors (Table <xref ref-type="table" rid="T1">1</xref>). We further investigated the SNPs between the Beijing and other modern strains, and 18 non-synonymous SNPs were identified (Table <xref ref-type="table" rid="T3">3</xref>). They were then checked by performing a BLAST search against ten other Beijing strains with complete genomes from NCBI GenBank (Table <xref ref-type="supplementary-material" rid="SM1">S9A</xref>). 15 SNPs on 13 virulence factors (five validated and eight putative) were confirmed to exist in the 12 Beijing strains. Most noteworthy were the five SNPs located on the ESX secretion systems, which are very important for <italic>M. tuberculosis</italic> pathogenesis (Simeone et al., <xref ref-type="bibr" rid="B42">2015</xref>).</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p><bold>The non-synonymous mutations between Beijing strains and other MTBC strains on virulence factors</bold>.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>Gene</bold></th>
<th valign="top" align="left"><bold>Synonym</bold></th>
<th valign="top" align="center"><bold>Position</bold></th>
<th valign="top" align="center"><bold>SNPs</bold></th>
<th valign="top" align="left"><bold>Annotation</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">mce1D</td>
<td valign="top" align="left">Rv0172</td>
<td valign="top" align="center">563 (188)</td>
<td valign="top" align="center">T-&#x0003E;C (I-&#x0003E;T)</td>
<td valign="top" align="left">Mce family protein Mce1D</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#ed1c24">eccD3<xref ref-type="table-fn" rid="TN4"><sup>a</sup></xref></td>
<td valign="top" align="left" style="color:#ed1c24">Rv0290</td>
<td valign="top" align="center" style="color:#ed1c24">227 (76)</td>
<td valign="top" align="center" style="color:#ed1c24">G-&#x0003E;A (S-&#x0003E;N)</td>
<td valign="top" align="left" style="color:#ed1c24">ESX-3 secretion system protein EccD</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#ed1c24">eccD3</td>
<td valign="top" align="left" style="color:#ed1c24">Rv0290</td>
<td valign="top" align="center" style="color:#ed1c24">283 (95)</td>
<td valign="top" align="center" style="color:#ed1c24">G-&#x0003E;A (A-&#x0003E;T)</td>
<td valign="top" align="left" style="color:#ed1c24">ESX-3 secretion system protein EccD</td>
</tr>
<tr>
<td valign="top" align="left">mce2F</td>
<td valign="top" align="left">Rv0594</td>
<td valign="top" align="center">1,295 (432)</td>
<td valign="top" align="center">A-&#x0003E;G (N-&#x0003E;S)</td>
<td valign="top" align="left">Mce family protein Mce2F</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#ed1c24">mmpL10</td>
<td valign="top" align="left" style="color:#ed1c24">Rv1183</td>
<td valign="top" align="center" style="color:#ed1c24">1,222 (408)</td>
<td valign="top" align="center" style="color:#ed1c24">A-&#x0003E;G (T-&#x0003E;A)</td>
<td valign="top" align="left" style="color:#ed1c24">Transmembrane transport protein MmpL10</td>
</tr>
<tr>
<td valign="top" align="left">plcC</td>
<td valign="top" align="left">Rv2349c</td>
<td valign="top" align="center">1,081 (361)</td>
<td valign="top" align="center">G-&#x0003E;T (G-&#x0003E;C)</td>
<td valign="top" align="left">Phospholipase C</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#ed1c24">plcA</td>
<td valign="top" align="left" style="color:#ed1c24">Rv2351c</td>
<td valign="top" align="center" style="color:#ed1c24">1,336 (446)</td>
<td valign="top" align="center" style="color:#ed1c24">A-&#x0003E;G (T-&#x0003E;A)</td>
<td valign="top" align="left" style="color:#ed1c24">Membrane-associated phospholipase A</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#ed1c24">mbtB</td>
<td valign="top" align="left" style="color:#ed1c24">Rv2383c</td>
<td valign="top" align="center" style="color:#ed1c24">2,020 (674)</td>
<td valign="top" align="center" style="color:#ed1c24">G-&#x0003E;C (V-&#x0003E;L)</td>
<td valign="top" align="left" style="color:#ed1c24">Phenyloxazoline synthase</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#ed1c24">ppsA</td>
<td valign="top" align="left" style="color:#ed1c24">Rv2931</td>
<td valign="top" align="center" style="color:#ed1c24">3,581 (1194)</td>
<td valign="top" align="center" style="color:#ed1c24">T-&#x0003E;G (L-&#x0003E;R)</td>
<td valign="top" align="left" style="color:#ed1c24">Phthiocerol synthesis polyketide synthase type I PpsA</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#ed1c24">Mas</td>
<td valign="top" align="left" style="color:#ed1c24">Rv2940c</td>
<td valign="top" align="center" style="color:#ed1c24">6,013 (2005)</td>
<td valign="top" align="center" style="color:#ed1c24">A-&#x0003E;C (T-&#x0003E;P)</td>
<td valign="top" align="left" style="color:#ed1c24">Multifunctional mycocerosic acid synthase</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#ed1c24">&#x02013;</td>
<td valign="top" align="left" style="color:#ed1c24">Rv2952</td>
<td valign="top" align="center" style="color:#ed1c24">526 (176)</td>
<td valign="top" align="center" style="color:#ed1c24">G-&#x0003E;A (G-&#x0003E;R)</td>
<td valign="top" align="left" style="color:#ed1c24">Phthiotriol/phenolphthiotriol dimycocerosates methyltransferase</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#ed1c24">kefB</td>
<td valign="top" align="left" style="color:#ed1c24">Rv3236c</td>
<td valign="top" align="center" style="color:#ed1c24">304 (102)</td>
<td valign="top" align="center" style="color:#ed1c24">A-&#x0003E;G (T-&#x0003E;A)</td>
<td valign="top" align="left" style="color:#ed1c24">Integral membrane transport protein</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#ed1c24">lipF</td>
<td valign="top" align="left" style="color:#ed1c24">Rv3487c</td>
<td valign="top" align="center" style="color:#ed1c24">697 (233)</td>
<td valign="top" align="center" style="color:#ed1c24">C-&#x0003E;T (R-&#x0003E;C)</td>
<td valign="top" align="left" style="color:#ed1c24">Carboxylesterase LipF</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#ed1c24">papA2</td>
<td valign="top" align="left" style="color:#ed1c24">Rv3820c</td>
<td valign="top" align="center" style="color:#ed1c24">1,397 (466)</td>
<td valign="top" align="center" style="color:#ed1c24">C-&#x0003E;T (P-&#x0003E;L)</td>
<td valign="top" align="left" style="color:#ed1c24">Trehalose-2-sulfate acyltransferase</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#ed1c24">fadD23</td>
<td valign="top" align="left" style="color:#ed1c24">Rv3826</td>
<td valign="top" align="center" style="color:#ed1c24">1,264 (422)</td>
<td valign="top" align="center" style="color:#ed1c24">G-&#x0003E;C (E-&#x0003E;Q)</td>
<td valign="top" align="left" style="color:#ed1c24">Long-chain-fatty-acid&#x02013;CoA ligase FadD23</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#ed1c24">espK</td>
<td valign="top" align="left" style="color:#ed1c24">Rv3879c</td>
<td valign="top" align="center" style="color:#ed1c24">130 (44)</td>
<td valign="top" align="center" style="color:#ed1c24">G-&#x0003E;A (D-&#x0003E;N)</td>
<td valign="top" align="left" style="color:#ed1c24">ESX-1 secretion-associated protein EspK</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#ed1c24">espK</td>
<td valign="top" align="left" style="color:#ed1c24">Rv3879c</td>
<td valign="top" align="center" style="color:#ed1c24">1,979 (660)</td>
<td valign="top" align="center" style="color:#ed1c24">A-&#x0003E;C (E-&#x0003E;A)</td>
<td valign="top" align="left" style="color:#ed1c24">ESX-1 secretion-associated protein EspK</td>
</tr>
<tr>
<td valign="top" align="left" style="color:#ed1c24">eccC2</td>
<td valign="top" align="left" style="color:#ed1c24">Rv3894c</td>
<td valign="top" align="center" style="color:#ed1c24">1,949 (650)</td>
<td valign="top" align="center" style="color:#ed1c24">A-&#x0003E;G (D-&#x0003E;G)</td>
<td valign="top" align="left" style="color:#ed1c24">ESX-2 type VII secretion system protein EccC</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TN4"><label>a</label><p><italic>SNPs in red are the ones that are further validated in other ten Beijing strains</italic>.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>The two homologous pairs of virulent/non-virulent strains (i.e., H37Rv vs. H37Ra and <italic>M. bovis</italic> vs. <italic>M. bovis</italic> BCG) are well suited for virulence genetic analysis because they have the same origins (Steenken and Gardner, <xref ref-type="bibr" rid="B44">1946</xref>; Liu et al., <xref ref-type="bibr" rid="B32">2009</xref>). Thus, we conducted a comparative genomic analysis between virulent and non-virulent strains at the whole genome level (not limited to virulence factors).</p>
<sec>
<title>H37Rv vs. H37Ra</title>
<p>H37Ra and H37Rv show very different virulence even though they share an origin. To investigate the relationship between the phenotypic differences and genetic variations, we implemented a comparative genomic study between the five H37Rv (including four online complete genomes from NCBI GenBank and Mtb H37Rv F1 genome) and two H37Ra genomes (including one online complete genome from NCBI GenBank and Mtb H37Ra F28 genome; Table <xref ref-type="supplementary-material" rid="SM1">S9B</xref>). ANI analysis revealed 99.98% identity between the H37Rv and H37Ra strains. No large genome structure variation was found. Four specific genes in all H37Ra strains and five missense SNPs were identified.</p>
<p>The four H37Ra specific genes were clustered together and located in RvD2 (H37Rv-related deletion 2) with two IS6110 transposase genes on both ends (Figure <xref ref-type="supplementary-material" rid="SM1">S4</xref>). Further, comparison with the other three L4 strains showed that the four genes also existed in L4 virulent strains except H37Rv; this indicates that they are not relevant to the virulence of H37Rv (Figure <xref ref-type="supplementary-material" rid="SM1">S4</xref>). We inferred that the four genes might have been lost through IS6110 in the long-term passage of H37Rv.</p>
<p>Five missense SNPs were identified between the H37Rv and H37Ra strains (Table <xref ref-type="table" rid="T4">4</xref>). Through comparison with our six clinical virulent strains, two SNPs were eliminated. Of the other three SNPs, two of them (on PhoP and MazG) have been reported and implicated in H37Rv virulence (Ryndak et al., <xref ref-type="bibr" rid="B40">2008</xref>; Lu et al., <xref ref-type="bibr" rid="B33">2010</xref>). We identified a new mutation, C684G (I228M), on the validated virulence factor gene <italic>espK</italic> in H37Ra strains (Table <xref ref-type="table" rid="T4">4</xref>). EspK is an ESX-1 secretion-associated protein that is required for virulence, growth in macrophages, and suppression of macrophage inflammatory and immune response (McLaughlin et al., <xref ref-type="bibr" rid="B34">2007</xref>). Thus, this SNP might be involved in the attenuation of virulence in H37Ra.</p>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p><bold>The SNPs between non-virulent H37Ra and virulent H37Rv</bold>.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>Gene<xref ref-type="table-fn" rid="TN5"><sup>a</sup></xref></bold></th>
<th valign="top" align="center"><bold>Position<xref ref-type="table-fn" rid="TN6"><sup>b</sup></xref></bold></th>
<th valign="top" align="center"><bold>SNPs<xref ref-type="table-fn" rid="TN7"><sup>c</sup></xref></bold></th>
<th valign="top" align="left"><bold>Annotation</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Rv0966c</td>
<td valign="top" align="center">524 (175)</td>
<td valign="top" align="center">A-&#x0003E;G(V-&#x0003E;A)</td>
<td valign="top" align="left">Hypothetical protein</td>
</tr>
<tr>
<td valign="top" align="left">Rv0757(phoP)</td>
<td valign="top" align="center">656 (219)</td>
<td valign="top" align="center">C-&#x0003E;T(S-&#x0003E;L)</td>
<td valign="top" align="left">Two component system response transcriptional positive regulator PhoP</td>
</tr>
<tr>
<td valign="top" align="left">Rv0658c</td>
<td valign="top" align="center">224 (75)</td>
<td valign="top" align="center">A-&#x0003E;G(L-&#x0003E;P)</td>
<td valign="top" align="left">Integral membrane protein</td>
</tr>
<tr>
<td valign="top" align="left">Rv1021(mazG)</td>
<td valign="top" align="center">656 (219)</td>
<td valign="top" align="center">C-&#x0003E;A(A-&#x0003E;E)</td>
<td valign="top" align="left">Nucleoside triphosphate pyrophosphohydrolase</td>
</tr>
<tr>
<td valign="top" align="left">Rv3879c(espK)</td>
<td valign="top" align="center">684 (228)</td>
<td valign="top" align="center">G-&#x0003E;C(I-&#x0003E;M)</td>
<td valign="top" align="left">ESX-1 secretion-associated protein EspK</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TN5"><label>a</label><p><italic>For comparison, the H37Rv genome is used as reference genome;</italic></p></fn>
<fn id="TN6"><label>b</label><p><italic>The number in the parentheses indicates the mutant position in protein;</italic></p></fn>
<fn id="TN7"><label>c</label><p><italic>The letters in the parentheses indicates the amino acid substitutions</italic>.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec>
<title><italic>M. bovis</italic> vs. <italic>M. bovis</italic> BCG</title>
<p>To explore the genetic differences between <italic>M. bovis</italic> and <italic>M. bovis</italic> BCG, we performed a comparative genomic analysis between three <italic>M. bovis</italic> (including two online complete genomes from NCBI GenBank and <italic>M. bovis</italic> 30 genome) and nine <italic>M. bovis</italic> BCG genomes (including eight online complete genomes from NCBI GenBank and <italic>M. bovis</italic> BCG 26 genome; Table <xref ref-type="supplementary-material" rid="SM1">S9C</xref>). ANI analysis showed a 99.90% identity between <italic>M. bovis</italic> and <italic>M. bovis</italic> BCG strains. Here, we identified 28 <italic>M. bovis</italic> specific genes (Table <xref ref-type="supplementary-material" rid="SM1">S10A</xref>), six <italic>M. bovis</italic> BCG specific genes (Table <xref ref-type="supplementary-material" rid="SM1">S10B</xref>), and 308 missense SNPs (Table <xref ref-type="supplementary-material" rid="SM1">S11</xref>).</p>
<p>Eight of 28 <italic>M. bovis</italic> specific genes (i.e., <italic>pe35, ppe68, esxB, esxA, espI, eccD1, espJ</italic>, and <italic>espK</italic>) were clustered together and composed RD1 (Sreejit et al., <xref ref-type="bibr" rid="B43">2014</xref>). Absence of RD1 in <italic>M. bovis</italic> BCG strains was reported to lead to attenuation of virulence (Pym et al., <xref ref-type="bibr" rid="B37">2002</xref>). We further analyzed the distribution of these 8 genes in our 12 MTBC strains and found that only the first four genes (i.e., <italic>pe35, ppe68, esxB</italic>, and <italic>esxA</italic>) were absent in <italic>M. microti</italic> 12 (Figure <xref ref-type="fig" rid="F2">2</xref>). Since <italic>M. microti</italic> is a non-virulent strain, which seldom causes disease in immunocompetent individuals (Rodriguez-Campos et al., <xref ref-type="bibr" rid="B38">2014</xref>), we deduced that the first four genes of RD1 (rather than RD1) might play a key role in attenuating the virulence of <italic>M. bovis</italic> BCG and <italic>M. microti</italic> strains. In addition, 13 phiRV1 phage proteins were also identified to be absent in <italic>M. bovis</italic> BCG strains. They were clustered together and composed RD3 (Table <xref ref-type="supplementary-material" rid="SM1">S10A</xref>). It was reported that they could sense oxygen status within the host; thus, their presence could help <italic>M. bovis</italic> adapt to environmental stress (Fan et al., <xref ref-type="bibr" rid="B16">2016</xref>).</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p><bold>A schematic diagram showing the RD1 distribution in 12 MTBC strains</bold>. Different colors represent different genes on RD1 (<italic>PE35</italic>, pink; <italic>PPE68</italic>, purple; <italic>esxB</italic>, blue; <italic>esxA</italic>, slate blue; <italic>espI</italic>, green; <italic>eccD1</italic>, yellow; <italic>espJ</italic>, orange; <italic>espK</italic>, red). Genes in the red dashed box indicate the four lost genes in the attenuated <italic>M. bovis</italic> BCG 26 and <italic>M. microti</italic> 12 strains. Genes upstream (<italic>eccCb1</italic>) and downstream (<italic>espJ</italic>) of RD1 are shown in gray.</p></caption>
<graphic xlink:href="fcimb-07-00088-g0002.tif"/>
</fig>
<p>Three hundred and eight missense SNPs (on 251 genes) were identified between <italic>M. bovis</italic> and <italic>M. bovis</italic> BCG (Table <xref ref-type="supplementary-material" rid="SM1">S11A</xref>). Further Clusters of Orthologous Group (COG) functional analysis indicated that 19 genes (19 missense SNPs) displayed a relative enrichment in COG category I (Lipid transport and metabolism; Figure <xref ref-type="supplementary-material" rid="SM1">S5</xref>). MTBC strains exhibited lipid enrichment on their cell surfaces, which is reported to play a role in pathogenesis (Cole et al., <xref ref-type="bibr" rid="B9">1998</xref>). Importantly, 21 of the nonsynonymous SNPs were located on 21 virulence factors (four validated and 17 putative; Table <xref ref-type="supplementary-material" rid="SM1">S11B</xref>). These SNPs appear to be associated with the attenuation of virulence in <italic>M. bovis</italic> BCG strains. At last, we need to emphasize that we are particularly careful about the conservation of the standard strains (obtained from ATCC), and they had not been subject to repeated <italic>in vitro</italic> cultivation and belong to the first generations of the original strains. Even so, the animal studies were not carried out due to the limitation of objective condition, all the arguments on the virulence is a speculation that need further validation.</p>
</sec>
</sec>
<sec>
<title>Antigen epitope variations</title>
<p>An epitope is the part of an antigen that is recognized by the immune system, and plays a core role in immune response (<ext-link ext-link-type="uri" xlink:href="https://en.wikipedia.org/wiki/Epitope">https://en.wikipedia.org/wiki/Epitope</ext-link>). To date, epitope studies are confined to T-cells in <italic>M. tuberculosis</italic> (Comas et al., <xref ref-type="bibr" rid="B10">2010</xref>; Copin et al., <xref ref-type="bibr" rid="B12">2014</xref>; Lindestam Arlehamn et al., <xref ref-type="bibr" rid="B31">2015</xref>). However, comparative analysis of epitopes in different lineages of MTBC strains, including T-cell and B-cell epitopes, is still lacking. In this study, we analyzed the diversity of T-cell and B-cell epitopes in 12 MTBC strains, which serves to enhance our understanding of immune response differences in diverse MTBC strains.</p>
<p>A total of 2,245 MTBC epitopes were downloaded from the IEDB (Vita et al., <xref ref-type="bibr" rid="B48">2015</xref>), including 1,755 T-cell epitopes and 490 B-cell epitopes (Table <xref ref-type="supplementary-material" rid="SM1">S12</xref>). Pan-genomic analysis indicated that 1,522 epitopes were conserved in all 12 MTBC strains (Figure <xref ref-type="supplementary-material" rid="SM1">S2C</xref>). Among these, 1,428 epitopes possessed the same copy number.</p>
<p>Despite sharing the majority of both T-cell and B-cell epitopes, the 12 MTBC strains still had some differences. Two non-virulent animal-origin MTBC strains, <italic>M. bovis BCG</italic> and <italic>M. microti</italic>, had significantly less T-cell and B-cell epitopes, suggesting that these lost epitopes might be related to the non-virulence, since more than 50% of the lost epitopes (164 of 326) were located in the virulence factors (Figure <xref ref-type="fig" rid="F3">3</xref>, Table <xref ref-type="supplementary-material" rid="SM1">S13</xref>).</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p><bold>Comparison of (A)</bold> T cell and <bold>(B)</bold> B cell epitopes in 12 MTBC strains. Duplicate epitopes were removed, and only epitopes with 100% identical matches were considered present in the strain. The x-axis refers to the strain name, and the vertical axis indicates the number of epitopes in the corresponding strain.</p></caption>
<graphic xlink:href="fcimb-07-00088-g0003.tif"/>
</fig>
<sec>
<title>Antigen epitope analysis between human- and animal-isolates</title>
<p>The epitopes with the same distribution pattern in the 12 MTBC strains were clustered to further clarify the results. We then analyzed the epitope-clusters for the 12 MTBC strains (Figure <xref ref-type="fig" rid="F4">4</xref>). For T-cell epitope-clusters, we first investigated the relationship between epitopes and host association. Four epitope-clusters (Tcell_cluster_0002, 0017, 0078, and 0083: 25 epitopes) and one epitope-cluster (Tcell_cluster_0002: two epitopes) were identified to exist only in the human- and animal-associated MTBC strains, respectively (Figure <xref ref-type="fig" rid="F4">4A</xref>). In addition, the copy number of two epitope-clusters (Tcell_cluster_0015 and 0079: five epitopes) in the human isolates was higher than in the animal isolates. For B cell epitope-clusters, two epitope-clusters (Bcell_cluster_0002 and 0012: eight epitopes) only existed in human isolates, and the copy number of one epitope-cluster (Bcell_cluster_0031: one epitope) in human isolates was higher than in animal-associated ones (Figure <xref ref-type="fig" rid="F4">4B</xref>).</p>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p><bold>Distribution of differential T cell (A)</bold> and B cell <bold>(B)</bold> epitope-clusters in 12 MTBC strains. The epitope-clusters present in all 12 MTBC strains with the same copy number were excluded. Each row represents an epitope-cluster, and each column indicates a strain. The color intensity shows the copy number of each epitope-cluster. Some epitope-clusters were enlarged to highlight the differences amongst diverse MTBC lineages on the right section of figure.</p></caption>
<graphic xlink:href="fcimb-07-00088-g0004.tif"/>
</fig>
<p>To further explore the immune-genetic mechanisms, we studied the corresponding antigen genes of the previously described epitopes. The 25 human-associated strain-specific epitopes (belonging to four T-cell epitope-clusters) were located on 13 antigen genes (Figure <xref ref-type="fig" rid="F5">5</xref>). Among these, the loss of ten epitopes in animal-associated strains resulted from the deletion of the corresponding antigen genes; the loss of the other 15 epitopes was due to SNPs or Insertions or Deletions (Indels) on the relevant antigen genes (Table <xref ref-type="supplementary-material" rid="SM1">S14</xref>).</p>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p><bold>Distribution of some differential T-cell (A)</bold> and B-cell <bold>(B)</bold> epitopes and the corresponding antigen genes for 12 MTBC strains. The distribution of epitopes and corresponding antigens are shown at the top and bottom sections of the figure. Epitope copy number and antigen is indicated by the intensity of the color.</p></caption>
<graphic xlink:href="fcimb-07-00088-g0005.tif"/>
</fig>
<p>The deletion of antigen genes <italic>ephA</italic> (epoxide hydrolase) and <italic>ppe65</italic> (PPE family protein), which are located in RD8, led to the loss of four epitopes (Tcell_1633 and 18566&#x0002B;6; Tcell_1738 and 2145) in animal-associated strains. <italic>EphA</italic> has been reported to regulate integrin-mediated T lymphocyte interactions, and its activation might lead to the down-regulation of T-cell interactions (Sharfe et al., <xref ref-type="bibr" rid="B41">2008</xref>). In addition, three human-associated strain-specific epitopes (Tcell_0813, 0814, and 0815) were located on the antigen gene <italic>echA1</italic> (described in &#x0201C;<bold>Host association analysis</bold>&#x0201D;). <italic>EchA1</italic> was previously reported to be lost in <italic>M. bovis</italic> and <italic>M. bovis BCG</italic> strains (Brosch et al., <xref ref-type="bibr" rid="B5">2002</xref>; Rosenkrands et al., <xref ref-type="bibr" rid="B39">2008</xref>). Our analysis shows that <italic>echA1</italic> is also absent from <italic>M. microti</italic> strains. In addition, our analysis indicates that the animal-associated strains lacked the antigen gene <italic>Rv1977</italic>, which resulted in the absence of epitopes (Tcell_0819-0821).</p>
<p>In addition to the deletion of antigens, there were 15 human-associated strains with specific epitopes attributed to SNPs and Indels in nine T cell antigen genes of animal-associated strains (Table <xref ref-type="supplementary-material" rid="SM1">S14</xref>). One of these was the antigen gene <italic>PstS-1</italic>, which is a lipoprotein phosphate transport receptor on the cell surface (de Araujo et al., <xref ref-type="bibr" rid="B14">2014</xref>); this possessed five mutant epitopes in animal-associated strains. In our study, the five mutant epitopes were all derived from one SNP (V352A; Figure <xref ref-type="supplementary-material" rid="SM1">S6</xref>), which might lead to the animal-specific immune reaction.</p>
<p>The 25 human-associated strain-specific epitopes were further tested on ten animal-associated strains (two <italic>M. bovis</italic> and eight <italic>M. bovis BCG</italic> strains) with complete genome sequences from the NCBI (Table <xref ref-type="supplementary-material" rid="SM1">S9C</xref>). BLAST results confirmed our findings.</p>
</sec>
<sec>
<title>Antigen epitope analysis of beijing strains</title>
<p>Based on different animal experiments showing the Beijing strains to be more virulent, and to cause more histopathological changes, higher outgrowth, and increased mortality (Ida et al., <xref ref-type="bibr" rid="B26">2010</xref>), the Beijing strains are considered the most pathogenic of the MTBC strains. Epitope-cluster analysis showed that Tcell_cluster_0009 (4 epitopes: Tcell_0579, 0591, 1737, and 1829) was lost only in the Beijing strains (Mtb 2242 and 2279; Figures <xref ref-type="fig" rid="F4">4</xref>, <xref ref-type="fig" rid="F5">5</xref>). Sequence alignment results revealed that the nucleotide sequence &#x0201C;AAACATT&#x0201D; from 71 to 77 of the antigen gene <italic>vapB17</italic> mutated into &#x0201C;AC&#x0201D; in these two strains; this resulted in the absence of epitope Tcell_0579 and 0591. The &#x0201C;I245M&#x0201D; mutation on antigen CcsA led to the loss of epitope Tcell_1737. Finally the deletion of the antigen gene <italic>Rv2819c</italic> (a CRISPR type III-associated RAMP protein Csm5) led to the loss of epitope Tcell_1829 in the Beijing lineage strains. Further, verification was carried out on ten other Beijing lineage strains with complete genome sequences obtained from NCBI (Table <xref ref-type="supplementary-material" rid="SM1">S9A</xref>). The results confirmed the finding that these four epitopes were lost in all Beijing lineage strains. The absence of the four epitopes in all of these Beijing lineage strains might enable them to better evade the human immune system and promote growth in the hostile environment of the host cell. This could be a reason for their more virulent phenotype.</p>
</sec>
<sec>
<title>Antigen epitope analysis between <italic>M. bovis</italic> and <italic>M. bovis</italic> BCG strains</title>
<p>To investigate the relationship between epitope variation and virulence, we compared the difference in epitopes of three virulent <italic>M. bovis</italic> strains and nine non-virulent <italic>M. bovis</italic> BCG strains (Figure <xref ref-type="supplementary-material" rid="SM1">S7</xref>). We determined that Tcell_cluster_A02 (149 epitopes) and Bcell_cluster_A01 (19 epitopes) were lost in all non-virulent BCG strains; meanwhile, Tcell_cluster_A06 (six epitopes) and Bcell_cluster_A04 (one epitope) were absent from all <italic>M. bovis</italic> strains. Tcell_cluster_A23 (five epitopes) had two copies in <italic>M. bovis</italic>, but only one copy in <italic>M. bovis</italic> BCG. We further examined the 13 corresponding antigen genes carrying the sequences of the above 180 epitopes (Figure <xref ref-type="supplementary-material" rid="SM1">S8</xref> and Table <xref ref-type="supplementary-material" rid="SM1">S15</xref>). Among these, the loss of 146 epitopes (including 130 T-cell epitopes and 16 B-cell epitopes) in BCG strains resulted from the deletion of the corresponding three antigen genes (<italic>esxA, esxB</italic>, and <italic>ppe68</italic>), which were located in RD1 and are reported to be associated with the attenuation of virulence in <italic>M. bovis</italic> BCG (Lewis et al., <xref ref-type="bibr" rid="B27">2003</xref>).</p>
</sec>
</sec>
</sec>
<sec sec-type="conclusions" id="s4">
<title>Conclusion</title>
<p>Tuberculosis now exceeds HIV as the top infectious disease killer, and is caused by the <italic>Mycobacterium tuberculosis</italic> complex (MTBC). MTBC strains possess highly similar genomes (99%) but large variations in phenotype, including host-association, virulence, and immunoreactivity. This might be the result of long-term co-evolution with diverse populations of humans and animals. Long-term geographical isolation gave rise to the accumulation of &#x0201C;genotype isolation,&#x0201D; thus resulting in different phenotypes. To analyze the relationship between genotype and phenotype, we selected 12 MTBC strains representing different lineages, re-sequenced their genomes to correct homo-polymer errors, and performed a comparative genomic analysis of the strains.</p>
<p>Host-association analysis indicated that eight <italic>mce3</italic> family genes, two enoyl-CoA hydratases, and five PE/PPE family genes were present in human-associated strains but not in animal-associated strains.</p>
<p>Fifteen SNPs were found on virulence factors (including five SNPs in three ESX secretion-proteins) only in the Beijing strains. Comparisons between the virulent H37Rv strain and the non-virulent H37Ra strain revealed that three SNPs, S219L (PhoP), A219E (MazG), and a newly identified I228M (EspK), might lead to the attenuation of virulence in H37Ra. On the other hand, a comparison of the animal-associated MTBC strains showed that the deletion of the first four genes (i.e., <italic>pe35, ppe68, esxB</italic>, and <italic>esxA</italic>), rather than all eight RD1 genes, might play a central role in the virulence of animal-associated MTBC strains.</p>
<p>Finally, by comparing the epitopes of MTBC strains, we found that four epitopes were lost only in the Beijing strains. In addition, 32 T-cell epitopes and eight B-cell epitopes showed different distribution patterns in human- and animal-associated strains.</p>
<p>Overall, our research reveals the genetic variations leading to the differences in MTBC genotypes, and enhances our understanding of the relationship between highly conserved genome sequences/genotypes (similarity &#x0003E;99%) and highly different phenotypes in MTBC strains.</p>
</sec>
<sec id="s5">
<title>Accession numbers</title>
<p>The SRA accession number for the sequencing data reported in this paper is <ext-link ext-link-type="NCBI:sra" xlink:href="SRP064893">SRP064893</ext-link>. The GAS (Genome Sequence Archive in BIG Data Center) accession number is <ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="PRJCA000307">PRJCA000307</ext-link>.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>FC, XJ, and HH designed the project. XJ, MD, and FC wrote the paper. XJ, LY, SC, LL, DC, JF, TY, JZ, XZ, YS, GW, and YS performed the experiments.</p>
<sec>
<title>Conflict of interest statement</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
</sec>
</body>
<back>
<ack>
<p>This work was supported by National Natural Science Foundation of China (31601047, 31570133, and 31600107); the &#x0201C;100-Talent Program&#x0201D; of Chinese Academy of Sciences (Y3CAS81554); Collaborative Innovation Center of Infectious diseases (PXM2015_014226_000058).</p>
</ack>
<sec sec-type="supplementary-material" id="s7">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="http://journal.frontiersin.org/article/10.3389/fcimb.2017.00088/full#supplementary-material">http://journal.frontiersin.org/article/10.3389/fcimb.2017.00088/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="DataSheet1.zip" id="SM1" mimetype="application/zip" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ahmad</surname> <given-names>S.</given-names></name> <name><surname>El-Shazly</surname> <given-names>S.</given-names></name> <name><surname>Mustafa</surname> <given-names>A. S.</given-names></name> <name><surname>Al-Attiyah</surname> <given-names>R.</given-names></name></person-group> (<year>2004</year>). <article-title>Mammalian cell-entry proteins encoded by mce3 operon of <italic>Mycobacterium tuberculosis</italic> are expressed during natural infection in humans</article-title>. <source>Scand. J. Immunol.</source> <volume>60</volume>, <fpage>382</fpage>&#x02013;<lpage>391</lpage>. <pub-id pub-id-type="doi">10.1111/j.0300-9475.2004.01490.x</pub-id><pub-id pub-id-type="pmid">15379863</pub-id></citation>
</ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Akhter</surname> <given-names>Y.</given-names></name> <name><surname>Ehebauer</surname> <given-names>M. T.</given-names></name> <name><surname>Mukhopadhyay</surname> <given-names>S.</given-names></name> <name><surname>Hasnain</surname> <given-names>S. E.</given-names></name></person-group> (<year>2012</year>). <article-title>The PE/PPE multigene family codes for virulence factors and is a possible source of mycobacterial antigenic variation: perhaps more?</article-title> <source>Biochimie</source> <volume>94</volume>, <fpage>110</fpage>&#x02013;<lpage>116</lpage>. <pub-id pub-id-type="doi">10.1016/j.biochi.2011.09.026</pub-id><pub-id pub-id-type="pmid">22005451</pub-id></citation>
</ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Aziz</surname> <given-names>R. K.</given-names></name> <name><surname>Bartels</surname> <given-names>D.</given-names></name> <name><surname>Best</surname> <given-names>A. A.</given-names></name> <name><surname>DeJongh</surname> <given-names>M.</given-names></name> <name><surname>Disz</surname> <given-names>T.</given-names></name> <name><surname>Edwards</surname> <given-names>R. A.</given-names></name> <etal/></person-group>. (<year>2008</year>). <article-title>The RAST server: rapid annotations using subsystems technology</article-title>. <source>BMC Genomics</source> <volume>9</volume>:<fpage>75</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2164-9-75</pub-id><pub-id pub-id-type="pmid">18261238</pub-id></citation>
</ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bentley</surname> <given-names>S. D.</given-names></name> <name><surname>Comas</surname> <given-names>I.</given-names></name> <name><surname>Bryant</surname> <given-names>J. M.</given-names></name> <name><surname>Walker</surname> <given-names>D.</given-names></name> <name><surname>Smith</surname> <given-names>N. H.</given-names></name> <name><surname>Harris</surname> <given-names>S. R.</given-names></name> <etal/></person-group>. (<year>2012</year>). <article-title>The Genome of <italic>Mycobacterium africanum</italic> west african 2 reveals a lineage-specific locus and genome erosion common to the M. <italic>tuberculosis</italic> complex</article-title>. <source>PLoS Negl. Trop. Dis.</source> <volume>6</volume>:<fpage>e1552</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pntd.0001552</pub-id><pub-id pub-id-type="pmid">22389744</pub-id></citation>
</ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Brosch</surname> <given-names>R.</given-names></name> <name><surname>Gordon</surname> <given-names>S. V.</given-names></name> <name><surname>Marmiesse</surname> <given-names>M.</given-names></name> <name><surname>Brodin</surname> <given-names>P.</given-names></name> <name><surname>Buchrieser</surname> <given-names>C.</given-names></name> <name><surname>Eiglmeier</surname> <given-names>K.</given-names></name> <etal/></person-group>. (<year>2002</year>). <article-title>A new evolutionary scenario for the <italic>Mycobacterium tuberculosis</italic> complex</article-title>. <source>Proc. Natl. Acad. Sci. U.S.A.</source> <volume>99</volume>, <fpage>3684</fpage>&#x02013;<lpage>3689</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.052548299</pub-id><pub-id pub-id-type="pmid">11891304</pub-id></citation>
</ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Brosch</surname> <given-names>R.</given-names></name> <name><surname>Gordon</surname> <given-names>S. V.</given-names></name> <name><surname>Pym</surname> <given-names>A.</given-names></name> <name><surname>Eiglmeier</surname> <given-names>K.</given-names></name> <name><surname>Garnier</surname> <given-names>T.</given-names></name> <name><surname>Cole</surname> <given-names>S. T.</given-names></name></person-group> (<year>2000</year>). <article-title>Comparative genomics of the mycobacteria</article-title>. <source>Int. J. Med. Microbiol.</source> <volume>290</volume>, <fpage>143</fpage>&#x02013;<lpage>152</lpage>. <pub-id pub-id-type="doi">10.1016/S1438-4221(00)80083-1</pub-id><pub-id pub-id-type="pmid">11045919</pub-id></citation>
</ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>L. H.</given-names></name> <name><surname>Yang</surname> <given-names>J.</given-names></name> <name><surname>Yu</surname> <given-names>J.</given-names></name> <name><surname>Ya</surname> <given-names>Z. J.</given-names></name> <name><surname>Sun</surname> <given-names>L. L.</given-names></name> <name><surname>Shen</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2005</year>). <article-title>VFDB: a reference database for bacterial virulence factors</article-title>. <source>Nucleic Acids Res.</source> <volume>33</volume>, <fpage>D325</fpage>&#x02013;<lpage>D328</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gki008</pub-id><pub-id pub-id-type="pmid">15608208</pub-id></citation>
</ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>M. J.</given-names></name> <name><surname>Gan</surname> <given-names>H. X.</given-names></name> <name><surname>Remold</surname> <given-names>H. G.</given-names></name></person-group> (<year>2006</year>). <article-title>A mechanism of virulence: virulent <italic>Mycobacterium tuberculosis</italic> strain H37Rv, but not attenuated H37Ra, causes significant mitochondrial inner membrane disruption in macrophages leading to necrosis</article-title>. <source>J. Immunol.</source> <volume>176</volume>, <fpage>3707</fpage>&#x02013;<lpage>3716</lpage>. <pub-id pub-id-type="doi">10.4049/jimmunol.176.6.3707</pub-id></citation>
</ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cole</surname> <given-names>S. T.</given-names></name> <name><surname>Brosch</surname> <given-names>R.</given-names></name> <name><surname>Parkhill</surname> <given-names>J.</given-names></name> <name><surname>Garnier</surname> <given-names>T.</given-names></name> <name><surname>Churcher</surname> <given-names>C.</given-names></name> <name><surname>Harris</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>1998</year>). <article-title>Deciphering the biology of <italic>Mycobacterium tuberculosis</italic> from the complete genome sequence</article-title>. <source>Nature</source> <volume>393</volume>, <fpage>537</fpage>&#x02013;<lpage>544</lpage>. <pub-id pub-id-type="doi">10.1038/31159</pub-id><pub-id pub-id-type="pmid">9634230</pub-id></citation>
</ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Comas</surname> <given-names>I.</given-names></name> <name><surname>Chakravartti</surname> <given-names>J.</given-names></name> <name><surname>Small</surname> <given-names>P. M.</given-names></name> <name><surname>Galagan</surname> <given-names>J.</given-names></name> <name><surname>Niemann</surname> <given-names>S.</given-names></name> <name><surname>Kremer</surname> <given-names>K.</given-names></name> <etal/></person-group>. (<year>2010</year>). <article-title>Human T cell epitopes of <italic>Mycobacterium tuberculosis</italic> are evolutionarily hyperconserved</article-title>. <source>Nat. Genet.</source> <volume>42</volume>, <fpage>498</fpage>&#x02013;<lpage>503</lpage>. <pub-id pub-id-type="doi">10.1038/ng.590</pub-id><pub-id pub-id-type="pmid">20495566</pub-id></citation>
</ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Comas</surname> <given-names>I.</given-names></name> <name><surname>Coscolla</surname> <given-names>M.</given-names></name> <name><surname>Luo</surname> <given-names>T.</given-names></name> <name><surname>Borrell</surname> <given-names>S.</given-names></name> <name><surname>Holt</surname> <given-names>K. E.</given-names></name> <name><surname>Kato-Maeda</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2013</year>). <article-title>Out-of-Africa migration and Neolithic coexpansion of <italic>Mycobacterium tuberculosis</italic> with modern humans</article-title>. <source>Nat. Genet.</source> <volume>45</volume>, <fpage>1176</fpage>&#x02013;<lpage>1182</lpage>. <pub-id pub-id-type="doi">10.1038/ng.2744</pub-id><pub-id pub-id-type="pmid">23995134</pub-id></citation>
</ref>
<ref id="B12">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Copin</surname> <given-names>R.</given-names></name> <name><surname>Coscolla</surname> <given-names>M.</given-names></name> <name><surname>Efstathiadis</surname> <given-names>E.</given-names></name> <name><surname>Gagneux</surname> <given-names>S.</given-names></name> <name><surname>Ernst</surname> <given-names>J. D.</given-names></name></person-group> (<year>2014</year>). <article-title>Impact of <italic>in vitro</italic> evolution on antigenic diversity of <italic>Mycobacterium bovis</italic> bacillus Calmette-Guerin (BCG)</article-title>. <source>Vaccine</source> <volume>32</volume>, <fpage>5998</fpage>&#x02013;<lpage>6004</lpage>. <pub-id pub-id-type="doi">10.1016/j.vaccine.2014.07.113</pub-id><pub-id pub-id-type="pmid">25211768</pub-id></citation>
</ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Darling</surname> <given-names>A. E.</given-names></name> <name><surname>Mau</surname> <given-names>B.</given-names></name> <name><surname>Perna</surname> <given-names>N. T.</given-names></name></person-group> (<year>2010</year>). <article-title>progressiveMauve: multiple genome alignment with gene gain, loss and rearrangement</article-title>. <source>PLoS ONE</source> <volume>5</volume>:<fpage>e11147</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0011147</pub-id><pub-id pub-id-type="pmid">20593022</pub-id></citation>
</ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>de Araujo</surname> <given-names>L. S.</given-names></name> <name><surname>Mello</surname> <given-names>F. C. D.</given-names></name> <name><surname>da Silva</surname> <given-names>N. D. M.</given-names></name> <name><surname>Leung</surname> <given-names>J. A. M.</given-names></name> <name><surname>Machado</surname> <given-names>S. M. A.</given-names></name> <name><surname>Sardella</surname> <given-names>I. G.</given-names></name> <etal/></person-group>. (<year>2014</year>). <article-title>Evaluation of gamma interferon immune response elicited by the newly constructed PstS-1(285-374): CFP10 fusion protein to detect <italic>Mycobacterium tuberculosis</italic> infection</article-title>. <source>Clin. Vaccine Immunol.</source> <volume>21</volume>, <fpage>552</fpage>&#x02013;<lpage>560</lpage>. <pub-id pub-id-type="doi">10.1128/CVI.00726-13</pub-id></citation>
</ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fabre</surname> <given-names>M.</given-names></name> <name><surname>Koeck</surname> <given-names>J. L.</given-names></name> <name><surname>Le Fleche</surname> <given-names>P.</given-names></name> <name><surname>Simon</surname> <given-names>F.</given-names></name> <name><surname>Herve</surname> <given-names>V.</given-names></name> <name><surname>Vergnaud</surname> <given-names>G.</given-names></name> <etal/></person-group>. (<year>2004</year>). <article-title>High genetic diversity revealed by variable-number tandem repeat genotyping and analysis of hsp65 gene polymorphism in a large collection of &#x0201C;<italic>Mycobacterium canettii</italic>&#x0201D; strains indicates that the M. <italic>tuberculosis</italic> complex is a recently emerged clone of &#x0201C;<italic>M. canettii.&#x0201D;</italic></article-title> <source>J. Clin. Microbiol.</source> <volume>42</volume>, <fpage>3248</fpage>&#x02013;<lpage>3255</lpage>. <pub-id pub-id-type="doi">10.1128/JCM.42.7.3248-3255.2004</pub-id><pub-id pub-id-type="pmid">15243089</pub-id></citation>
</ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fan</surname> <given-names>X.</given-names></name> <name><surname>Abd Alla</surname> <given-names>A. A.</given-names></name> <name><surname>Xie</surname> <given-names>J.</given-names></name></person-group> (<year>2016</year>). <article-title>Distribution and function of prophage phiRv1 and phiRv2 among <italic>Mycobacterium tuberculosis</italic> complex</article-title>. <source>J. Biomol. Struct. Dyn.</source> <volume>34</volume>, <fpage>233</fpage>&#x02013;<lpage>238</lpage>. <pub-id pub-id-type="doi">10.1080/07391102.2015.1022602</pub-id><pub-id pub-id-type="pmid">25855385</pub-id></citation>
</ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Firdessa</surname> <given-names>R.</given-names></name> <name><surname>Berg</surname> <given-names>S.</given-names></name> <name><surname>Hailu</surname> <given-names>E.</given-names></name> <name><surname>Schelling</surname> <given-names>E.</given-names></name> <name><surname>Gumi</surname> <given-names>B.</given-names></name> <name><surname>Erenso</surname> <given-names>G.</given-names></name> <etal/></person-group>. (<year>2013</year>). <article-title>Mycobacterial lineages causing pulmonary and extrapulmonary tuberculosis, Ethiopia</article-title>. <source>Emerg. Infect. Dis.</source> <volume>19</volume>, <fpage>460</fpage>&#x02013;<lpage>463</lpage>. <pub-id pub-id-type="doi">10.3201/eid1903.120256</pub-id><pub-id pub-id-type="pmid">23622814</pub-id></citation>
</ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Forrellad</surname> <given-names>M. A.</given-names></name> <name><surname>Klepp</surname> <given-names>L. I.</given-names></name> <name><surname>Gioffre</surname> <given-names>A.</given-names></name> <name><surname>Sabio y Garcia</surname> <given-names>J.</given-names></name> <name><surname>Morbidoni</surname> <given-names>H. R.</given-names></name> <name><surname>de la Paz Santangelo</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2013</year>). <article-title>Virulence factors of the <italic>Mycobacterium tuberculosis</italic> complex</article-title>. <source>Virulence</source> <volume>4</volume>, <fpage>3</fpage>&#x02013;<lpage>66</lpage>. <pub-id pub-id-type="doi">10.4161/viru.22329</pub-id><pub-id pub-id-type="pmid">23076359</pub-id></citation>
</ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gagneux</surname> <given-names>S.</given-names></name></person-group> (<year>2012</year>). <article-title>Host-pathogen coevolution in human tuberculosis</article-title>. <source>Phil. Trans. R. Soc. B</source> <volume>367</volume>, <fpage>850</fpage>&#x02013;<lpage>859</lpage>. <pub-id pub-id-type="doi">10.1098/rstb.2011.0316</pub-id><pub-id pub-id-type="pmid">22312052</pub-id></citation>
</ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gagneux</surname> <given-names>S.</given-names></name> <name><surname>DeRiemer</surname> <given-names>K.</given-names></name> <name><surname>Van</surname> <given-names>T.</given-names></name> <name><surname>Kato-Maeda</surname> <given-names>M.</given-names></name> <name><surname>de Jong</surname> <given-names>B. C.</given-names></name> <name><surname>Narayanan</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2006</year>). <article-title>Variable host-pathogen compatibility in <italic>Mycobacterium tuberculosis</italic></article-title>. <source>Proc. Natl. Acad. Sci. U.S.A.</source> <volume>103</volume>, <fpage>2869</fpage>&#x02013;<lpage>2873</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.0511240103</pub-id><pub-id pub-id-type="pmid">16477032</pub-id></citation>
</ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Galagan</surname> <given-names>J. E.</given-names></name></person-group> (<year>2014</year>). <article-title>Genomic insights into tuberculosis</article-title>. <source>Nat. Rev. Genet.</source> <volume>15</volume>, <fpage>307</fpage>&#x02013;<lpage>320</lpage>. <pub-id pub-id-type="doi">10.1038/nrg3664</pub-id><pub-id pub-id-type="pmid">24662221</pub-id></citation>
</ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ghebremichael</surname> <given-names>S.</given-names></name> <name><surname>Groenheit</surname> <given-names>R.</given-names></name> <name><surname>Pennhag</surname> <given-names>A.</given-names></name> <name><surname>Koivula</surname> <given-names>T.</given-names></name> <name><surname>Andersson</surname> <given-names>E.</given-names></name> <name><surname>Bruchfeld</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2010</year>). <article-title>Drug resistant <italic>Mycobacterium tuberculosis</italic> of the Beijing genotype does not spread in Sweden</article-title>. <source>PLoS ONE</source> <volume>5</volume>:<fpage>e10893</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0010893</pub-id></citation>
</ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Grace</surname> <given-names>P. S.</given-names></name> <name><surname>Ernst</surname> <given-names>J. D.</given-names></name></person-group> (<year>2016</year>). <article-title>Suboptimal antigen presentation contributes to virulence of <italic>Mycobacterium tuberculosis in vivo</italic></article-title>. <source>J. Immunol.</source> <volume>196</volume>, <fpage>357</fpage>&#x02013;<lpage>364</lpage>. <pub-id pub-id-type="doi">10.4049/jimmunol.1501494</pub-id><pub-id pub-id-type="pmid">26573837</pub-id></citation>
</ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Harboe</surname> <given-names>M.</given-names></name> <name><surname>Christensen</surname> <given-names>A.</given-names></name> <name><surname>Ahmad</surname> <given-names>S.</given-names></name> <name><surname>Ulvund</surname> <given-names>G.</given-names></name> <name><surname>Harkness</surname> <given-names>R. E.</given-names></name> <name><surname>Mustafa</surname> <given-names>A. S.</given-names></name> <etal/></person-group>. (<year>2002</year>). <article-title>Cross-reaction between mammalian cell entry (Mce) proteins of <italic>Mycobacterium tuberculosis</italic></article-title>. <source>Scand. J. Immunol.</source> <volume>56</volume>, <fpage>580</fpage>&#x02013;<lpage>587</lpage>. <pub-id pub-id-type="doi">10.1046/j.1365-3083.2002.01172.x</pub-id><pub-id pub-id-type="pmid">12472669</pub-id></citation>
</ref>
<ref id="B25">
<citation citation-type="other"><person-group person-group-type="author"><collab>World Health Organization</collab></person-group> (<year>2016</year>). <source>Global Tuberculosis Report 2016</source>.</citation>
</ref>
<ref id="B26">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ida</surname> <given-names>P.</given-names></name> <name><surname>Reinout</surname> <given-names>V. C.</given-names></name> <name><surname>Dick</surname> <given-names>V. S.</given-names></name></person-group> (<year>2010</year>). <article-title>Possible underlying mechanisms for successful emergence of the <italic>Mycobacterium tuberculosis</italic> Beijing genotype strains</article-title>. <source>Lancet Infect. Dis.</source> <volume>10</volume>, <fpage>103</fpage>&#x02013;<lpage>111</lpage>. <pub-id pub-id-type="doi">10.1016/S1473-3099(09)70330-5</pub-id></citation>
</ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lewis</surname> <given-names>K. N.</given-names></name> <name><surname>Liao</surname> <given-names>R. L.</given-names></name> <name><surname>Guinn</surname> <given-names>K. M.</given-names></name> <name><surname>Hickey</surname> <given-names>M. J.</given-names></name> <name><surname>Smith</surname> <given-names>S.</given-names></name> <name><surname>Behr</surname> <given-names>M. A.</given-names></name> <etal/></person-group>. (<year>2003</year>). <article-title>Deletion of RD1 from <italic>Mycobacterium tuberculosis</italic> mimics bacille Calmette-Guerin attenuation</article-title>. <source>J. Infec. Dis.</source> <volume>187</volume>, <fpage>117</fpage>&#x02013;<lpage>123</lpage>. <pub-id pub-id-type="doi">10.1086/345862</pub-id><pub-id pub-id-type="pmid">12508154</pub-id></citation>
</ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>H.</given-names></name> <name><surname>Durbin</surname> <given-names>R.</given-names></name></person-group> (<year>2010</year>). <article-title>Fast and accurate long-read alignment with Burrows-Wheeler transform</article-title>. <source>Bioinformatics</source> <volume>26</volume>, <fpage>589</fpage>&#x02013;<lpage>595</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp698</pub-id><pub-id pub-id-type="pmid">20080505</pub-id></citation>
</ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>H.</given-names></name> <name><surname>Handsaker</surname> <given-names>B.</given-names></name> <name><surname>Wysoker</surname> <given-names>A.</given-names></name> <name><surname>Fennell</surname> <given-names>T.</given-names></name> <name><surname>Ruan</surname> <given-names>J.</given-names></name> <name><surname>Homer</surname> <given-names>N.</given-names></name> <etal/></person-group>. (<year>2009</year>). <article-title>The sequence alignment/map format and SAMtools</article-title>. <source>Bioinformatics</source> <volume>25</volume>, <fpage>2078</fpage>&#x02013;<lpage>2079</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp352</pub-id><pub-id pub-id-type="pmid">19505943</pub-id></citation>
</ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>L.</given-names></name> <name><surname>Stoeckert</surname> <given-names>C. J.</given-names></name> <name><surname>Roos</surname> <given-names>D. S.</given-names></name></person-group> (<year>2003</year>). <article-title>OrthoMCL: identification of ortholog groups for eukaryotic genomes</article-title>. <source>Genome Res.</source> <volume>13</volume>, <fpage>2178</fpage>&#x02013;<lpage>2189</lpage>. <pub-id pub-id-type="doi">10.1101/gr.1224503</pub-id><pub-id pub-id-type="pmid">12952885</pub-id></citation>
</ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lindestam Arlehamn</surname> <given-names>C. S.</given-names></name> <name><surname>Paul</surname> <given-names>S.</given-names></name> <name><surname>Mele</surname> <given-names>F.</given-names></name> <name><surname>Huang</surname> <given-names>C.</given-names></name> <name><surname>Greenbaum</surname> <given-names>J. A.</given-names></name> <name><surname>Vita</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2015</year>). <article-title>Immunological consequences of intragenus conservation of <italic>Mycobacterium tuberculosis</italic> T-cell epitopes</article-title>. <source>Proc. Natl. Acad. Sci. U.S.A.</source> <volume>112</volume>, <fpage>E147</fpage>&#x02013;<lpage>155</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1416537112</pub-id><pub-id pub-id-type="pmid">25548174</pub-id></citation>
</ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>J.</given-names></name> <name><surname>Tran</surname> <given-names>V.</given-names></name> <name><surname>Leung</surname> <given-names>A. S.</given-names></name> <name><surname>Alexander</surname> <given-names>D. C.</given-names></name> <name><surname>Zhu</surname> <given-names>B. L.</given-names></name></person-group> (<year>2009</year>). <article-title>BCG vaccines: their mechanisms of attenuation and impact on safety and protective efficacy</article-title>. <source>Hum. Vaccin.</source> <volume>5</volume>, <fpage>70</fpage>&#x02013;<lpage>78</lpage>. <pub-id pub-id-type="doi">10.4161/hv.5.2.7210</pub-id><pub-id pub-id-type="pmid">19164935</pub-id></citation>
</ref>
<ref id="B33">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lu</surname> <given-names>L. D.</given-names></name> <name><surname>Sun</surname> <given-names>Q.</given-names></name> <name><surname>Fan</surname> <given-names>X. Y.</given-names></name> <name><surname>Zhong</surname> <given-names>Y.</given-names></name> <name><surname>Yao</surname> <given-names>Y. F.</given-names></name> <name><surname>Zhao</surname> <given-names>G. P.</given-names></name></person-group> (<year>2010</year>). <article-title>Mycobacterial MazG is a novel NTP pyrophosphohydrolase involved in oxidative stress response</article-title>. <source>J. Biol. Chem.</source> <volume>285</volume>, <fpage>28076</fpage>&#x02013;<lpage>28085</lpage>. <pub-id pub-id-type="doi">10.1074/jbc.M109.088872</pub-id><pub-id pub-id-type="pmid">20529853</pub-id></citation>
</ref>
<ref id="B34">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>McLaughlin</surname> <given-names>B.</given-names></name> <name><surname>Chon</surname> <given-names>J. S.</given-names></name> <name><surname>MacGurn</surname> <given-names>J. A.</given-names></name> <name><surname>Carlsson</surname> <given-names>F.</given-names></name> <name><surname>Cheng</surname> <given-names>T. L.</given-names></name> <name><surname>Cox</surname> <given-names>J. S.</given-names></name> <etal/></person-group>. (<year>2007</year>). <article-title>A mycobacterium ESX-1-secreted virulence factor with unique requirements for export</article-title>. <source>PLoS Pathog.</source> <volume>3</volume>:<fpage>e105</fpage>. <pub-id pub-id-type="doi">10.1371/journal.ppat.0030105</pub-id><pub-id pub-id-type="pmid">17676952</pub-id></citation>
</ref>
<ref id="B35">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pirofski</surname> <given-names>L. A.</given-names></name> <name><surname>Casadevall</surname> <given-names>A.</given-names></name></person-group> (<year>2012</year>). <article-title>Q&#x00026;A: What is a pathogen? a question that begs the point</article-title>. <source>BMC Biol.</source> <volume>10</volume>:<fpage>6</fpage>. <pub-id pub-id-type="doi">10.1186/1741-7007-10-6</pub-id><pub-id pub-id-type="pmid">22293325</pub-id></citation>
</ref>
<ref id="B36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Portevin</surname> <given-names>D.</given-names></name> <name><surname>Gagneux</surname> <given-names>S.</given-names></name> <name><surname>Comas</surname> <given-names>I.</given-names></name> <name><surname>Young</surname> <given-names>D.</given-names></name></person-group> (<year>2011</year>). <article-title>Human macrophage responses to clinical isolates from the <italic>Mycobacterium tuberculosis</italic> complex discriminate between ancient and modern lineages</article-title>. <source>PLoS Pathog.</source> <volume>7</volume>:<fpage>e1001307</fpage>. <pub-id pub-id-type="doi">10.1371/journal.ppat.1001307</pub-id><pub-id pub-id-type="pmid">21408618</pub-id></citation>
</ref>
<ref id="B37">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pym</surname> <given-names>A. S.</given-names></name> <name><surname>Brodin</surname> <given-names>P.</given-names></name> <name><surname>Brosch</surname> <given-names>R.</given-names></name> <name><surname>Huerre</surname> <given-names>M.</given-names></name> <name><surname>Cole</surname> <given-names>S. T.</given-names></name></person-group> (<year>2002</year>). <article-title>Loss of RD1 contributed to the attenuation of the live tuberculosis vaccines <italic>Mycobacterium bovis</italic> BCG and Mycobacterium microti</article-title>. <source>Mol. Microbiol.</source> <volume>46</volume>, <fpage>709</fpage>&#x02013;<lpage>717</lpage>. <pub-id pub-id-type="doi">10.1046/j.1365-2958.2002.03237.x</pub-id><pub-id pub-id-type="pmid">12410828</pub-id></citation>
</ref>
<ref id="B38">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rodriguez-Campos</surname> <given-names>S.</given-names></name> <name><surname>Smith</surname> <given-names>N. H.</given-names></name> <name><surname>Boniotti</surname> <given-names>M. B.</given-names></name> <name><surname>Aranaz</surname> <given-names>A.</given-names></name></person-group> (<year>2014</year>). <article-title>Overview and phylogeny of <italic>Mycobacterium tuberculosis</italic> complex organisms: implications for diagnostics and legislation of bovine tuberculosis</article-title>. <source>Res. Vet. Sci.</source> <volume>97</volume>, <fpage>S5</fpage>&#x02013;<lpage>S19</lpage>. <pub-id pub-id-type="doi">10.1016/j.rvsc.2014.02.009</pub-id><pub-id pub-id-type="pmid">24630673</pub-id></citation>
</ref>
<ref id="B39">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rosenkrands</surname> <given-names>I.</given-names></name> <name><surname>Aagaard</surname> <given-names>C.</given-names></name> <name><surname>Weldingh</surname> <given-names>K.</given-names></name> <name><surname>Brock</surname> <given-names>I.</given-names></name> <name><surname>Dziegiel</surname> <given-names>M. H.</given-names></name> <name><surname>Singh</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2008</year>). <article-title>Identification of Rv0222 from RD4 as a novel serodiagnostic target for tuberculosis</article-title>. <source>Tuberculosis</source> <volume>88</volume>, <fpage>335</fpage>&#x02013;<lpage>343</lpage>. <pub-id pub-id-type="doi">10.1016/j.tube.2007.12.001</pub-id><pub-id pub-id-type="pmid">18243798</pub-id></citation>
</ref>
<ref id="B40">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ryndak</surname> <given-names>M.</given-names></name> <name><surname>Wang</surname> <given-names>S. S.</given-names></name> <name><surname>Smith</surname> <given-names>I.</given-names></name></person-group> (<year>2008</year>). <article-title>PhoP, a key player in <italic>Mycobacterium tuberculosis</italic> virulence</article-title>. <source>Trends Microbiol.</source> <volume>16</volume>, <fpage>528</fpage>&#x02013;<lpage>534</lpage>. <pub-id pub-id-type="doi">10.1016/j.tim.2008.08.006</pub-id><pub-id pub-id-type="pmid">18835713</pub-id></citation>
</ref>
<ref id="B41">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sharfe</surname> <given-names>N.</given-names></name> <name><surname>Nikolic</surname> <given-names>M.</given-names></name> <name><surname>Cimpeon</surname> <given-names>L.</given-names></name> <name><surname>De Kratts</surname> <given-names>A. V.</given-names></name> <name><surname>Freywald</surname> <given-names>A.</given-names></name> <name><surname>Roifman</surname> <given-names>C. M.</given-names></name></person-group> (<year>2008</year>). <article-title>EphA and ephrin-A proteins regulate integrin-mediated T lymphocyte interactions</article-title>. <source>Mol. Immunol.</source> <volume>45</volume>, <fpage>1208</fpage>&#x02013;<lpage>1220</lpage>. <pub-id pub-id-type="doi">10.1016/j.molimm.2007.09.019</pub-id><pub-id pub-id-type="pmid">17980912</pub-id></citation>
</ref>
<ref id="B42">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Simeone</surname> <given-names>R.</given-names></name> <name><surname>Bottai</surname> <given-names>D.</given-names></name> <name><surname>Frigui</surname> <given-names>W.</given-names></name> <name><surname>Majlessi</surname> <given-names>L.</given-names></name> <name><surname>Brosch</surname> <given-names>R.</given-names></name></person-group> (<year>2015</year>). <article-title>ESX/type VII secretion systems of mycobacteria: insights into evolution, pathogenicity and protection</article-title>. <source>Tuberculosis</source> <volume>95</volume>, <fpage>S150</fpage>&#x02013;<lpage>S154</lpage>. <pub-id pub-id-type="doi">10.1016/j.tube.2015.02.019</pub-id><pub-id pub-id-type="pmid">25732627</pub-id></citation>
</ref>
<ref id="B43">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sreejit</surname> <given-names>G.</given-names></name> <name><surname>Ahmed</surname> <given-names>A.</given-names></name> <name><surname>Parveen</surname> <given-names>N.</given-names></name> <name><surname>Jha</surname> <given-names>V.</given-names></name> <name><surname>Valluri</surname> <given-names>V. L.</given-names></name> <name><surname>Ghosh</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2014</year>). <article-title>The ESAT-6 protein of <italic>Mycobacterium tuberculosis</italic> interacts with beta-2-microglobulin (&#x003B2;2M) affecting antigen presentation function of macrophage</article-title>. <source>PLoS Pathog.</source> <volume>10</volume>:<fpage>e1004446</fpage>. <pub-id pub-id-type="doi">10.1371/journal.ppat.1004446</pub-id><pub-id pub-id-type="pmid">25356553</pub-id></citation>
</ref>
<ref id="B44">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Steenken</surname> <given-names>W.</given-names></name> <name><surname>Gardner</surname> <given-names>L. U.</given-names></name></person-group> (<year>1946</year>). <article-title>History of H37 strain of tubercle bacillus</article-title>. <source>Am. Rev. Tuberc.</source> <volume>54</volume>, <fpage>62</fpage>&#x02013;<lpage>66</lpage>. <pub-id pub-id-type="pmid">20995860</pub-id></citation>
</ref>
<ref id="B45">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Takezaki</surname> <given-names>N.</given-names></name> <name><surname>Nei</surname> <given-names>M.</given-names></name></person-group> (<year>1996</year>). <article-title>Genetic distances and reconstruction of phylogenetic trees from microsatellite DNA</article-title>. <source>Genetics</source> <volume>144</volume>, <fpage>389</fpage>&#x02013;<lpage>399</lpage>. <pub-id pub-id-type="pmid">8878702</pub-id></citation>
</ref>
<ref id="B46">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tamura</surname> <given-names>K.</given-names></name> <name><surname>Stecher</surname> <given-names>G.</given-names></name> <name><surname>Peterson</surname> <given-names>D.</given-names></name> <name><surname>Filipski</surname> <given-names>A.</given-names></name> <name><surname>Kumar</surname> <given-names>S.</given-names></name></person-group> (<year>2013</year>). <article-title>MEGA6: molecular evolutionary genetics analysis version 6.0</article-title>. <source>Mol. Biol. Evol.</source> <volume>30</volume>, <fpage>2725</fpage>&#x02013;<lpage>2729</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/mst197</pub-id><pub-id pub-id-type="pmid">24132122</pub-id></citation>
</ref>
<ref id="B47">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tarailo-Graovac</surname> <given-names>M.</given-names></name> <name><surname>Chen</surname> <given-names>N.</given-names></name></person-group> (<year>2009</year>). <article-title>Using RepeatMasker to identify repetitive elements in genomic sequences</article-title>. <source>Curr. Protoc. Bioinform.</source> Chapter 4, Unit 4, 10. <pub-id pub-id-type="doi">10.1002/0471250953.bi0410s25</pub-id><pub-id pub-id-type="pmid">19274634</pub-id></citation>
</ref>
<ref id="B48">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vita</surname> <given-names>R.</given-names></name> <name><surname>Overton</surname> <given-names>J. A.</given-names></name> <name><surname>Greenbaum</surname> <given-names>J. A.</given-names></name> <name><surname>Ponomarenko</surname> <given-names>J.</given-names></name> <name><surname>Clark</surname> <given-names>J. D.</given-names></name> <name><surname>Cantrell</surname> <given-names>J. R.</given-names></name> <etal/></person-group>. (<year>2015</year>). <article-title>The immune epitope database (IEDB) 3.0</article-title>. <source>Nucleic Acids Res.</source> <volume>43</volume>, <fpage>D405</fpage>&#x02013;<lpage>D412</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gku938</pub-id><pub-id pub-id-type="pmid">25300482</pub-id></citation>
</ref>
<ref id="B49">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Walker</surname> <given-names>B. J.</given-names></name> <name><surname>Abeel</surname> <given-names>T.</given-names></name> <name><surname>Shea</surname> <given-names>T.</given-names></name> <name><surname>Priest</surname> <given-names>M.</given-names></name> <name><surname>Abouelliel</surname> <given-names>A.</given-names></name> <name><surname>Sakthikumar</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2014</year>). <article-title>Pilon: an integrated tool for comprehensive microbial variant detection and genome assembly improvement</article-title>. <source>PLoS ONE</source> <volume>9</volume>:<fpage>e112963</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0112963</pub-id><pub-id pub-id-type="pmid">25409509</pub-id></citation>
</ref>
<ref id="B50">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Winglee</surname> <given-names>K.</given-names></name> <name><surname>Manson McGuire</surname> <given-names>A.</given-names></name> <name><surname>Maiga</surname> <given-names>M.</given-names></name> <name><surname>Abeel</surname> <given-names>T.</given-names></name> <name><surname>Shea</surname> <given-names>T.</given-names></name> <name><surname>Desjardins</surname> <given-names>C. A.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>Whole genome sequencing of <italic>Mycobacterium africanum</italic> strains from Mali provides insights into the mechanisms of geographic restriction</article-title>. <source>PLoS Negl. Trop. Dis.</source> <volume>10</volume>:<fpage>e0004332</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pntd.0004332</pub-id><pub-id pub-id-type="pmid">26751217</pub-id></citation>
</ref>
<ref id="B51">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zheng</surname> <given-names>H. J.</given-names></name> <name><surname>Lu</surname> <given-names>L. D.</given-names></name> <name><surname>Wang</surname> <given-names>B. F.</given-names></name> <name><surname>Pu</surname> <given-names>S. Y.</given-names></name> <name><surname>Zhang</surname> <given-names>X. L.</given-names></name> <name><surname>Zhu</surname> <given-names>G. F.</given-names></name> <etal/></person-group>. (<year>2008</year>). <article-title>Genetic basis of virulence attenuation revealed by comparative genomic analysis of <italic>Mycobacterium tuberculosis</italic> strain H37Ra vs. H37Rv</article-title>. <source>PLoS ONE</source> <volume>3</volume>:<fpage>e2375</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0002375</pub-id></citation>
</ref>
<ref id="B52">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhu</surname> <given-names>L.</given-names></name> <name><surname>Zhong</surname> <given-names>J.</given-names></name> <name><surname>Jia</surname> <given-names>X.</given-names></name> <name><surname>Liu</surname> <given-names>G.</given-names></name> <name><surname>Kang</surname> <given-names>Y.</given-names></name> <name><surname>Dong</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>Precision methylome characterization of <italic>Mycobacterium tuberculosis</italic> complex (MTBC) using PacBio single-molecule real-time (SMRT) technology</article-title>. <source>Nucleic Acids Res.</source> <volume>44</volume>, <fpage>730</fpage>&#x02013;<lpage>743</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkv1498</pub-id><pub-id pub-id-type="pmid">26704977</pub-id></citation>
</ref>
</ref-list>
</back>
</article>
