<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="EN" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Microbiol.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Microbiology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Microbiol.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-302X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmicb.2026.1729846</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Genomic diversity and host-specificity in <italic>Corynebacterium pseudotuberculosis</italic> using comparative population genomics</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Profeta</surname> <given-names>Rodrigo</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/3298897/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Schlesener</surname> <given-names>Cory L.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Shaw</surname> <given-names>Claire A.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/918807/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Busch</surname> <given-names>Roselle C.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Heller</surname> <given-names>Meera C.</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/138857/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Spier</surname> <given-names>Sharon</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Wu</surname> <given-names>Jing</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Welch</surname> <given-names>Shannara</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/3311208/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Viana</surname> <given-names>Marcus Vinicius C.</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/576700/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Barroso</surname> <given-names>Fernanda A. L.</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/858963/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Brenig</surname> <given-names>Bertram</given-names></name>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/757671/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Azevedo</surname> <given-names>Vasco</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/34672/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Weimer</surname> <given-names>Bart C.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/427409/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Population Health and Reproduction, School of Veterinary Medicine, UC Davis</institution>, <city>Davis, CA</city>, <country country="us">United States</country></aff>
<aff id="aff2"><label>2</label><institution>100K Pathogen Genome Project, School of Veterinary Medicine, UC Davis</institution>, <city>Davis, CA</city>, <country country="us">United States</country></aff>
<aff id="aff3"><label>3</label><institution>Veterinary Medicine and Epidemiology, School of Veterinary Medicine, UC Davis</institution>, <city>Davis, CA</city>, <country country="us">United States</country></aff>
<aff id="aff4"><label>4</label><institution>Texas A&#x0026;M Veterinary Medical Teaching Hospital</institution>, <city>College Station, TX</city>, <country country="us">United States</country></aff>
<aff id="aff5"><label>5</label><institution>Department of Genetics, Ecology and Evolution, Federal University of Minas Gerais</institution>, <city>Belo Horizonte</city>, <country country="br">Brazil</country></aff>
<aff id="aff6"><label>6</label><institution>Institute of Veterinary Medicine, University of G&#x00F6;ttingen</institution>, <city>G&#x00F6;ttingen</city>, <country country="de">Germany</country></aff>
<author-notes>
<corresp id="c001"><label>&#x002A;</label>Correspondence: Roselle C. Busch, <email xlink:href="mailto:rcbusch@ucdavis.edu">rcbusch@ucdavis.edu</email></corresp>
<corresp id="c002">Bart C. Weimer, <email xlink:href="mailto:bcweimer@ucdavis.edu">bcweimer@ucdavis.edu</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-17">
<day>17</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>17</volume>
<elocation-id>1729846</elocation-id>
<history>
<date date-type="received">
<day>27</day>
<month>10</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>12</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>28</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2026 Profeta, Schlesener, Shaw, Busch, Heller, Spier, Wu, Welch, Viana, Barroso, Brenig, Azevedo and Weimer.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Profeta, Schlesener, Shaw, Busch, Heller, Spier, Wu, Welch, Viana, Barroso, Brenig, Azevedo and Weimer</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-17">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Corynebacterium <italic>pseudotuberculosis</italic> is a facultative intracellular pathogen responsible for chronic infections in livestock, primarily small ruminants and horses, with occasional zoonotic transmission. To investigate the genomic diversity, evolutionary stability, and host adaptation of this species, we analyzed 788 high-quality genomes representing isolates from diverse hosts, geographic regions, and time periods. Comparative population genomics revealed remarkably conserved genome architecture, supporting a closed pangenome with minimal accessory gene variation. Virulence and antimicrobial resistance (AMR) screening across multiple databases confirmed the universal presence of phospholipase D (pld) and the absence of major horizontally acquired AMR determinants, except for APH(3&#x2019;)-IIa, TEM-116, and APH(3&#x2019;)-IIIa in a few goat isolates from Brazil. Distinct metabolic features between biovars were conserved, notably nitrate reduction and molybdenum cofactor biosynthesis in biovar equi. However, gene presence/absence alone did not explain host specificity. Instead, machine learning applied to 8,028 core-genome SNPs identified allelic variants associated with host origin, particularly in genes linked to amino-acid biosynthesis and peptide transport (Opp system). These findings demonstrate that host adaptation in this species is driven by fine-scale SNP variation within core metabolic pathways, rather than acquisition of classical virulence or resistance genes, highlighting the species&#x2019; exceptional genomic stability and narrow evolutionary flexibility.</p>
</abstract>
<kwd-group>
<kwd>host/microbe association</kwd>
<kwd>machine learning</kwd>
<kwd>pangenome</kwd>
<kwd>point mutation</kwd>
<kwd>SNP</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work was supported by the CEH &#x2013; Weimer et al., RB PHR post-COVID Seed Grant Program, and Conselho Nacional de Desenvolvimento Cient&#x00ED;fico e Tecnol&#x00F3;gico (CNPq) under the call Chamada CNPq N&#x00B0; 26/2021 (process no. 403067/2022-7).</funding-statement>
</funding-group>
<counts>
<fig-count count="5"/>
<table-count count="2"/>
<equation-count count="1"/>
<ref-count count="53"/>
<page-count count="14"/>
<word-count count="9520"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Infectious Agents and Disease</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="S1" sec-type="intro">
<title>Introduction</title>
<p><italic>Corynebacterium pseudotuberculosis</italic> is a facultative intracellular bacterial pathogen affecting primarily livestock, small ruminants, and equids. Infections caused by this organism are an increasing concern, especially in animal production where outbreaks lead to severe economic losses. These occur due to the need for culling, condemnation of whole carcasses at slaughter, and deterioration in body condition that reduces the value of other animal products such as pelts and leather (<xref ref-type="bibr" rid="B15">Dorella et al., 2006</xref>; <xref ref-type="bibr" rid="B30">O&#x2019;Hara et al., 2021</xref>; <xref ref-type="bibr" rid="B17">Esmaeili et al., 2025</xref>). Transmission occurs through skin abrasions or mucous membranes, with soil and insect vectors acting as major reservoirs and vehicles for infection (<xref ref-type="bibr" rid="B3">Baird and Fontaine, 2007</xref>). In addition, sheep-to-sheep transmission via respiratory lesions has been recognized as a significant factor in the spread of disease within flocks (<xref ref-type="bibr" rid="B16">Ellis et al., 1987</xref>). While infections have historically been associated primarily with summer months in arid regions of the western United States, reports suggest this pathogen has undergone a geographical expansion over the past two decades (<xref ref-type="bibr" rid="B47">Spier, 2008</xref>). Environmental persistence and potential climate-driven changes in vector populations are believed to contribute to this rising prevalence and extended geographic reach of the disease (<xref ref-type="bibr" rid="B48">Spier et al., 2012</xref>). Though a pertinent concern in veterinary medicine, knowledge of the epidemiology and pathogenesis of <italic>C. pseudotuberculosis</italic> is incomplete, allowing the species to become endemic in some regions (<xref ref-type="bibr" rid="B15">Dorella et al., 2006</xref>). This limited understanding is partly due to the lack of specific diagnostic tools and the historically low-resolution methods used to differentiate strains. Although genomic studies have revealed relatively low diversity within <italic>C. pseudotuberculosis</italic> populations (<xref ref-type="bibr" rid="B45">Soares et al., 2013</xref>), the scale and depth of these analyses remain limited. As a result, important questions continue regarding the genomic basis of the organism&#x2019;s resilience, persistence, adaptability, and control in multiple hosts and environments.</p>
<p>This organism has two major biovars that are genetically and metabolically distinct; biovar ovis, which typically infects small ruminants such as sheep and goats, and biovar equi, which primarily infects horses and occasionally cattle (<xref ref-type="bibr" rid="B5">Biberstein et al., 1971</xref>). Although cross-species transmission between horses and small ruminants is rare, cattle have been documented to harbor strains from either biovar (<xref ref-type="bibr" rid="B22">Hiller et al., 2024</xref>). In conjunction with host specificity, the major diagnostic differentiation between biovars ovis and equi is limited to their ability to reduce nitrate, with equi strains being nitrate-positive and ovis being nitrate-negative (<xref ref-type="bibr" rid="B15">Dorella et al., 2006</xref>).</p>
<p>In horses, <italic>C. pseudotuberculosis</italic> biovar equi infection has three clinical presentations: external abscesses, internal organ infection, or ulcerative lymphangitis. The most common manifestation involves subcutaneous abscesses, especially in the pectoral region or ventral abdomen, often referred to as &#x201C;pigeon fever&#x201D; due to the characteristic swelling. While external infections are generally self-limiting, internal abscesses and lymphangitis are more severe and can require prolonged antibiotic treatment (<xref ref-type="bibr" rid="B34">Pratt et al., 2005</xref>). Small ruminant infections share a similar presentation, with <italic>C. pseudotuberculosis</italic> biovar ovis infection resulting in notable abscess development both externally and internally (<xref ref-type="bibr" rid="B3">Baird and Fontaine, 2007</xref>).</p>
<p>Despite the agricultural and economic importance of <italic>C. pseudotuberculosis</italic>, in depth comprehensive genomic analyses have been limited, hindering the ability to make informed decisions about how best to control infections based on strain variation and presentation symptoms. The advent of whole genome sequencing (WGS) and population-scale comparative genomics has revolutionized our ability to characterize understudied pathogens. When integrated with machine learning, these data-rich methods enable the identification of subtle genomic signatures associated with host specificity, virulence, and antimicrobial resistance, offering new insights that traditional analyses may overlook (<xref ref-type="bibr" rid="B4">Bandoy and Weimer, 2020</xref>). However, most studies of <italic>C. pseudotuberculosis</italic> have relied on small isolate collections or targeted traits, resulting in a fragmented understanding of the species genomic landscape and what role genome variation plays in disease manifestation (<xref ref-type="bibr" rid="B45">Soares et al., 2013</xref>). Consequently, the full extent of its genomic diversity, including genes associated with host adaptation, virulence, and resistance, remains poorly characterized.</p>
<p>In this study, we sequenced 571 genomes and leveraged publicly available <italic>C. pseudotuberculosis</italic> genomes to conduct a population genome comparison. This included integrating detailed metadata including host species, geographic origin, isolation date, and antimicrobial treatment history into the analyses of genetic variance across isolates. While this comparison confirmed the known differences between biovars ovis and equi, it found that gene presence/absence did not play a role in disease characteristics or animal host. Rather, we discovered single nucleotide polymorphisms (SNP) in many core genes contributed to the observed epidemiological variations. This study uncovered the role of allelic variants in genes important in protein metabolism in variations for host association and disease presentations for both biovars.</p>
</sec>
<sec id="S2" sec-type="materials|methods">
<title>Materials and methods</title>
<sec id="S2.SS1">
<title>Genomic data collection</title>
<p>A total of 788 <italic>Corynebacterium pseudotuberculosis</italic> genome sequences were analyzed in this study. Genomes were obtained from multiple sources, including publicly available sequences from GenBank (<italic>n</italic> = 233), a research isolate collection at UC Davis dating back to 1996 (<xref ref-type="bibr" rid="B37">Rhodes et al., 2015</xref>), and clinical isolates recovered at the UC Davis Veterinary Medical Teaching Hospital up to 2021. Additional isolates were collected at Texas A&#x0026;M University between 2010 and 2017, bringing the combined total from UC Davis and Texas A&#x0026;M to 380. The remaining genomes were obtained from collections in Brazil. Comprehensive metadata for all isolates is provided in <xref ref-type="supplementary-material" rid="TS1">Supplementary Table 1</xref>.</p>
</sec>
<sec id="S2.SS2">
<title>gDNA extraction</title>
<p>For UC Davis isolate collection, isolate frozen stocks (80&#x00B0;C) were used to inoculate vented culture tubes with 5 mL of tryptic soy broth, or brain heart infusion broth (BD Difco, Franklin Lakes, NJ, United States) supplemented with 0.2% (v/v) polysorbate 80. Cultures were incubated aerobically with shaking at 37&#x00B0;C for approximately 48 h. Bacteria were pelleted in microcentrifuge tubes by centrifugation at 16,000&#x00D7;g for 5 min, resuspended in phosphate buffered saline to wash, and pelleted again. The supernatant was removed, and pellets were stored at &#x2013;80&#x00B0;C until DNA was extracted. DNA extraction was done with the Wizard Genomic DNA Purification kit (Promega, Madison, WI, United States). Cell pellets were lysed by suspending in Nuclei Lysis Solution from the kit, addition of 0.1 mm Mini-BeadBeater glass beads (BioSpec Products, Bartlesville, OK, United States), and bead beat for 15 s at 16,000 RPM on FastPrep-96 platform (MP Biomedicals, Santa Ana, CA, United States). Cell lysates were further processed following kit procedure. The DNA pellets were rehydrated in 100 &#x03BC;L of 10 mM Tris-HCl (pH 8.0). Genomic DNA quality was evaluated as described previously (<xref ref-type="bibr" rid="B23">Jeannotte et al., 2014</xref>; <xref ref-type="bibr" rid="B27">Kong et al., 2014</xref>). Briefly, purity for protein and organic contamination was assessed by Nanodrop One UV-Vis Spectrophotometer (ThermoScientific, Waltham, MA, United States), using the thresholds A260/280 &#x2265;1.5 and A260/230 &#x2265;1.5 for accepted samples. Genomic DNA integrity was evaluated by genomic DNA TapeStation (Agilent 4200, Santa Clara, CA, United States). The DNA was stored at &#x2013;20&#x00B0;C until processed for Whole genome sequencing.</p>
<p>For the Brazilian isolate collection, genomic DNA was extracted using the Wizard<sup>&#x00AE;</sup> Genomic DNA Purification Kit (Promega), according to the manufacturer&#x2019;s instructions, as previously described (<xref ref-type="bibr" rid="B46">Sousa et al., 2025</xref>). DNA quantity and purity were assessed spectrophotometrically (260/280 ratio; NanoDrop 2000, Thermo Fisher Scientific) and verified by agarose gel electrophoresis. DNA quality was routinely assessed by spectrophotometric purity (260/280 ratio) and agarose gel electrophoresis. Concentrations were standardized across samples, and approximately 5 &#x03BC;g of high-quality DNA per isolate was prepared and submitted for sequencing.</p>
</sec>
<sec id="S2.SS3">
<title>Library prep and sequencing</title>
<p>Whole genome sequencing followed methods previously described for studies under the 100K Pathogen Genome Project (<xref ref-type="bibr" rid="B53">Weis et al., 2016</xref>; <xref ref-type="bibr" rid="B9">Chen et al., 2017</xref>; <xref ref-type="bibr" rid="B52">Weimer, 2017</xref>). Briefly, high-quality genomic DNA was used to construct sequencing libraries with 400&#x2013;550 bp inserts by enzymatic sheering, followed by size selection to an average of 450 bp and sequenced to target 50x depth of coverage per genome. Sequencing was done using paired-end 150 short read method of the Illumina HiSeq X platform (San Diego, CA, United States). Raw sequence information is available on NCBI database under the 100K Pathogen Genome Bioproject (<ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="PRJNA203445">PRJNA203445</ext-link>).</p>
</sec>
<sec id="S2.SS4">
<title>Genome assembly</title>
<p>Raw paired-end reads were trimmed to remove adapters and low-quality bases using Trimmomatic (v0.39) (<xref ref-type="bibr" rid="B6">Bolger et al., 2014</xref>) with the following parameters: ILLUMINACLIP:&#x003C;adapters&#x003E;:2:40:15, LEADING:2, TRAILING:2, SLIDINGWINDOW:4:15, and MINLEN:50. Read quality was assessed before and after trimming using FastQC (v0.11.9) (<xref ref-type="bibr" rid="B2">Andrews, 2010</xref>), and summary reports were compiled using MultiQC (v1.21) (<xref ref-type="bibr" rid="B18">Ewels et al., 2016</xref>). Potential PhiX or other sequencing contaminants were removed by aligning reads to a PhiX reference genome using Bowtie2 (v2.5.1) (<xref ref-type="bibr" rid="B28">Langmead and Salzberg, 2012</xref>), retaining only unmapped read pairs. <italic>De novo</italic> genome assemblies were generated using Shovill (v1.0.4) (<xref ref-type="bibr" rid="B42">Seemann, 2022</xref>) with default parameters and the SPAdes assembler. Assembly quality was assessed with CheckM2 (v1.0.1) (<xref ref-type="bibr" rid="B11">Chklovski et al., 2023</xref>), and sequencing depth was estimated using Mosdepth (v0.3.8) (<xref ref-type="bibr" rid="B33">Pedersen and Quinlan, 2018</xref>). Assemblies were considered high quality if they met the following criteria: sequencing coverage &#x003E; 10&#x00D7;, estimated genome completeness above 95%, and contamination below 5%.</p>
</sec>
<sec id="S2.SS5">
<title>Genomic similarity comparison</title>
<p>Genomic comparisons were conducted using Sourmash (v4.8.5) (<xref ref-type="bibr" rid="B8">Brown and Irber, 2016</xref>). For each genome, MinHash signatures were computed using scaled sketches (&#x2013;scaled 10), and pairwise Jaccard distances (<italic>k</italic> = 31) were calculated and exported to a distance matrix. To visualize genome-wide similarity patterns, we used ComplexHeatmap (v2.21.1) (<xref ref-type="bibr" rid="B20">Gu et al., 2016</xref>) in R to generate a heatmap from the distance matrix, including sample annotations indicating the host and geographical locations of the genomes.</p>
</sec>
<sec id="S2.SS6">
<title>Pangenome analysis</title>
<p>All genome assemblies were annotated using Prokka (v1.14.6) (<xref ref-type="bibr" rid="B39">Seemann, 2014</xref>) with default parameters. The annotated GFF files were then used as input for pangenome construction. Pangenome analysis was conducted using Roary (v3.13.0) (<xref ref-type="bibr" rid="B31">Page et al., 2015</xref>), which clusters orthologous genes across annotated genomes based on amino acid similarity (&#x2265;95% similarity). Separate analyses were performed for <italic>C. pseudotuberculosis</italic> biovars ovis and equi, followed by a combined analysis of all genomes. Roary was executed with the &#x2013;mafft option to generate multiple sequence alignments of core genes.</p>
</sec>
<sec id="S2.SS7">
<title>Gene discovery</title>
<p>The gene presence/absence matrix generated by Roary was used to investigate the gene discovery dynamics of the pangenome. We performed rarefaction analysis in R (v4.2.3) using the micropan (<xref ref-type="bibr" rid="B44">Snipen and Liland, 2015</xref>) package to evaluate how the number of unique genes increases as more genomes are sampled. For each of 100 bootstrap replicates, genomes were incrementally subsampled from one up to the total number of genomes (N). At each sampling step, the cumulative count of unique, non-redundant genes was recorded. The average number of genes across all replicates was then plotted against the number of genomes on a log&#x2013;log scale to generate rarefaction curves according to Heaps&#x2019; law, expressed as:</p>
<disp-formula id="S2.Ex1">
<mml:math id="M1">
<mml:mrow>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mpadded width="+5pt">
<mml:mi>k</mml:mi>
</mml:mpadded>
<mml:mo rspace="7.5pt">&#x22C5;</mml:mo>
<mml:msup>
<mml:mi>n</mml:mi>
<mml:mi>b</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>G(n)</italic> represents the number of unique genes observed in <italic>n</italic> genomes, <italic>k</italic> is a constant, and <italic>b</italic> is the gene discovery rate exponent. The model provided estimates of the exponent (<italic>b</italic>), its standard error (<italic>SE</italic>), and the coefficient of determination (<italic>R</italic><sup>2</sup>), which were used to evaluate model fit and gene discovery dynamics. For comparative purposes, we applied the same rarefaction analysis and Heaps&#x2019; law modeling pipeline to estimate the exponent <italic>b</italic> for additional species, including <italic>Corynebacterium glutamicum</italic>, <italic>Mycobacterium tuberculosis</italic>, and <italic>Helicobacter pylori</italic>. Genomes were downloaded from GenBank and subjected to the same analysis. Results from other species (<italic>Lactococcus lactis</italic>, <italic>Pasteurella multocida</italic>, <italic>Mannheimia haemolytica</italic>, <italic>Escherichia coli</italic>, <italic>Salmonella</italic> spp., and <italic>Campylobacter</italic> spp.) were included from previous studies (<xref ref-type="bibr" rid="B26">Kaufman et al., 2020</xref>; <xref ref-type="bibr" rid="B19">Garzon et al., 2025</xref>), and are summarized in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap position="float" id="T1">
<label>TABLE 1</label>
<caption><p>Genomic characteristics and gene discovery dynamics for Corynebacterium pseudotuberculosis and selected bacterial species.</p></caption>
<table cellspacing="5" cellpadding="5" frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left">Organism</th>
<th valign="top" align="center">Gram status</th>
<th valign="top" align="center">Number of genomes compared</th>
<th valign="top" align="center">Genome size (Mb)</th>
<th valign="top" align="center">GC content (%)</th>
<th valign="top" align="center">Number of CDSs</th>
<th valign="top" align="center">Gene discovery rate (b &#x00B1; SE)</th>
<th valign="top" align="center">Genomes per new gene (1/b)</th>
<th valign="top" align="center">References</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><italic>Corynebacterium pseudotuberculosis</italic> (camelid)</td>
<td valign="top" align="center">Positive</td>
<td valign="top" align="center">22</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center">&#x223C;52%</td>
<td valign="top" align="center">2,035</td>
<td valign="top" align="center">0.037 &#x00B1; 0.00166</td>
<td valign="top" align="center">26.70</td>
<td valign="top" align="center">This work</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Corynebacterium pseudotuberculosis</italic> (ovis)</td>
<td valign="top" align="center">Positive</td>
<td valign="top" align="center">380</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center">&#x223C;52%</td>
<td valign="top" align="center">2,035</td>
<td valign="top" align="center">0.065 &#x00B1; 0.00077</td>
<td valign="top" align="center">15.29</td>
<td valign="top" align="center">This work</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Corynebacterium pseudotuberculosis</italic></td>
<td valign="top" align="center">Positive</td>
<td valign="top" align="center">788</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center">&#x223C;52%</td>
<td valign="top" align="center">2,035</td>
<td valign="top" align="center">0.1005 &#x00B1; 0.0006</td>
<td valign="top" align="center">10</td>
<td valign="top" align="center">This work</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Corynebacterium pseudotuberculosis</italic> (equi)</td>
<td valign="top" align="center">Positive</td>
<td valign="top" align="center">385</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center">&#x223C;52%</td>
<td valign="top" align="center">2,035</td>
<td valign="top" align="center">0.101 &#x00B1; 0.00132</td>
<td valign="top" align="center">9.90</td>
<td valign="top" align="center">This work</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Corynebacterium diphtheriae</italic></td>
<td valign="top" align="center">Positive</td>
<td valign="top" align="center">435</td>
<td valign="top" align="center">2.5</td>
<td valign="top" align="center">&#x223C;54%</td>
<td valign="top" align="center">2,267</td>
<td valign="top" align="center">0.225 &#x00B1; 0.00016</td>
<td valign="top" align="center">4.44</td>
<td valign="top" align="center">This work</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Corynebacterium glutamicum</italic></td>
<td valign="top" align="center">Positive</td>
<td valign="top" align="center">76</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">&#x223C;54%</td>
<td valign="top" align="center">3,000</td>
<td valign="top" align="center">0.235 &#x00B1; 0.00144</td>
<td valign="top" align="center">4.26</td>
<td valign="top" align="center">This work</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Mycobacterium tuberculosis</italic></td>
<td valign="top" align="center">Positive</td>
<td valign="top" align="center">4,900</td>
<td valign="top" align="center">4.4</td>
<td valign="top" align="center">&#x223C;65%</td>
<td valign="top" align="center">4,024</td>
<td valign="top" align="center">0.237 &#x00B1; 0.00032</td>
<td valign="top" align="center">4.22</td>
<td valign="top" align="center">This work</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Pasteurella multocida</italic></td>
<td valign="top" align="center">Negative</td>
<td valign="top" align="center">1,194</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center">&#x223C;40%</td>
<td valign="top" align="center">2,029</td>
<td valign="top" align="center">0.321 &#x00B1; 0.00042</td>
<td valign="top" align="center">3.12</td>
<td valign="top" align="center">(<xref ref-type="bibr" rid="B19">Garzon et al., 2025</xref>)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Mannheimia haemolytica</italic></td>
<td valign="top" align="center">Negative</td>
<td valign="top" align="center">2,418</td>
<td valign="top" align="center">2.5</td>
<td valign="top" align="center">&#x223C;41%</td>
<td valign="top" align="center">2,589</td>
<td valign="top" align="center">0.326 &#x00B1; 0.001</td>
<td valign="top" align="center">3.07</td>
<td valign="top" align="center">(<xref ref-type="bibr" rid="B19">Garzon et al., 2025</xref>)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Helicobacter pylori</italic></td>
<td valign="top" align="center">Negative</td>
<td valign="top" align="center">3,741</td>
<td valign="top" align="center">1.6</td>
<td valign="top" align="center">&#x223C;39%</td>
<td valign="top" align="center">1,520</td>
<td valign="top" align="center">0.454 &#x00B1; 0.00052</td>
<td valign="top" align="center">2.20</td>
<td valign="top" align="center">This work</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Escherichia coli</italic></td>
<td valign="top" align="center">Negative</td>
<td valign="top" align="center">3,300</td>
<td valign="top" align="center">4.6</td>
<td valign="top" align="center">&#x223C;51%</td>
<td valign="top" align="center">4,377</td>
<td valign="top" align="center">0.462 &#x00B1; 0.002</td>
<td valign="top" align="center">2.16</td>
<td valign="top" align="center">(<xref ref-type="bibr" rid="B26">Kaufman et al., 2020</xref>)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Salmonella</italic> spp.</td>
<td valign="top" align="center">Negative</td>
<td valign="top" align="center">792</td>
<td valign="top" align="center">4.8</td>
<td valign="top" align="center">&#x223C;52%</td>
<td valign="top" align="center">4,600</td>
<td valign="top" align="center">0.468 &#x00B1; 0.001</td>
<td valign="top" align="center">2.14</td>
<td valign="top" align="center">(<xref ref-type="bibr" rid="B26">Kaufman et al., 2020</xref>)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Lactococcus lactis</italic></td>
<td valign="top" align="center">Positive</td>
<td valign="top" align="center">702</td>
<td valign="top" align="center">2.5</td>
<td valign="top" align="center">&#x223C;32%</td>
<td valign="top" align="center">2,310</td>
<td valign="top" align="center">0.479 &#x00B1; 0.000063</td>
<td valign="top" align="center">2.59</td>
<td valign="top" align="center">Unpublished data</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Campylobacter</italic> spp.</td>
<td valign="top" align="center">Negative</td>
<td valign="top" align="center">17,000</td>
<td valign="top" align="center">1.6</td>
<td valign="top" align="center">&#x223C;30%</td>
<td valign="top" align="center">1,654</td>
<td valign="top" align="center">0.645 &#x00B1; 0.001</td>
<td valign="top" align="center">1.55</td>
<td valign="top" align="center">(<xref ref-type="bibr" rid="B26">Kaufman et al., 2020</xref>)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn><p>Genomic characteristics of the organisms being compared are shown, such as Gram Status, Genomes size, GC content, and Number of CDSs. &#x201C;Gene Discovery Rate (b &#x00B1; SE)&#x201D; reflects the slope of the log-log regression between the number of genomes and the cumulative number of genes. &#x201C;Genomes per New Gene (1/b)&#x201D; estimates the number of additional genomes needed to discover one new gene.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="S2.SS8">
<title>Allelic variant extraction and sequence comparison of the gene <italic>fatD</italic></title>
<p>To investigate allelic variants of the biovar-specific petrobactin import system permease protein (<italic>fatD</italic>) from two distinct gene clusters in the Roary pangenome analysis&#x2014;one exclusive to biovar equi and the other to ovis&#x2014;we performed a sequence comparison. First, we extracted the corresponding locus tags for each genome from the gene_presence_absence.csv file generated by Roary (<xref ref-type="bibr" rid="B31">Page et al., 2015</xref>). Using the annotated gene coordinates from Prokka-generated GFF files (<xref ref-type="bibr" rid="B39">Seemann, 2014</xref>), we determined the contig, start, and end positions of each <italic>fatD</italic> homolog. These coordinates were formatted into a BED file and used to extract nucleotide sequences from the genome assemblies (.fna files) with SeqKit (<xref ref-type="bibr" rid="B43">Shen et al., 2016</xref>) via the subseq command: seqkit subseq &#x2013;bed &#x201C;&#x0024;BED_FILE&#x201D; &#x201C;&#x0024;FNA&#x201D; &#x003E; &#x201C;&#x0024;OUT_FASTA.&#x201D; For each biovar, we created multiple sequence alignments of the extracted <italic>fatD</italic> sequences using MAFFT (<xref ref-type="bibr" rid="B25">Katoh and Standley, 2013</xref>), followed by consensus sequence generation with the cons command from the EMBOSS suite (<xref ref-type="bibr" rid="B38">Rice et al., 2000</xref>): cons -sequence &#x0024;{BIOVAR}_fatD_aligned.fna -outseq &#x0024;{BIOVAR}_consensus.fna.</p>
<p>Consensus sequences were then aligned using BLASTn to assess allelic variation. Although six nucleotide differences were identified between the ovis and equi <italic>fatD</italic> alleles, both translated into identical amino acid sequences, suggesting functional conservation across biovars.</p>
</sec>
<sec id="S2.SS9">
<title>Virulence and resistance screening</title>
<p>To identify <italic>in silico</italic> virulence and antimicrobial resistance (AMR) genes, all genome assemblies were screened using ABRicate (v1.0.1) (<xref ref-type="bibr" rid="B41">Seemann, 2020</xref>). For virulence profiling, the VFDB was queried. For resistance screening, assemblies were compared against the CARD, NCBI AMRFinderPlus, ResFinder, ARG-ANNOT, and MEGARes databases. ABRicate was executed with default parameters for each database, and gene presence was summarized across all genomes using the &#x2013;summary function.</p>
</sec>
<sec id="S2.SS10">
<title>Variant calling and core genome alignment</title>
<p>Variant calling was performed using Snippy (v4.6.0) (<xref ref-type="bibr" rid="B40">Seemann, 2015</xref>). For biovar ovis isolates, sequencing reads were aligned to the <italic>C. pseudotuberculosis</italic> strain 1002B genome (Accession: <ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="GCF_001433475.1">GCF_001433475.1</ext-link>), whereas biovar equi isolates were aligned to strain 258 (Accession: <ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="GCF_000263755.5">GCF_000263755.5</ext-link>). Each sample was processed individually. Resulting outputs were merged with snippy-core to obtain a full alignment. Low-quality and gappy alignment regions were removed using snippy-clean_full_aln, and recombination events were detected and masked using Gubbins (v2.3.4) (<xref ref-type="bibr" rid="B13">Croucher et al., 2015</xref>). The recombination-filtered alignment was then reduced to core SNPs using SNP-sites (v2.5.1) (<xref ref-type="bibr" rid="B32">Page et al., 2016</xref>). A phylogenetic tree was constructed using FastTree (v2.1.11) (<xref ref-type="bibr" rid="B35">Price et al., 2009</xref>) under the GTR model with nucleotide input. The final tree was visualized and annotated using the Interactive Tree of Life (iTOL) (<xref ref-type="bibr" rid="B29">Letunic and Bork, 2021</xref>).</p>
</sec>
<sec id="S2.SS11">
<title>SNP-based host prediction using machine learning</title>
<p>To identify single-nucleotide polymorphisms (SNPs) associated with host specificity in <italic>C. pseudotuberculosis</italic>, machine learning models were developed using the XGBoost algorithm (v1.7.8.1) (<xref ref-type="bibr" rid="B10">Chen and Guestrin, 2016</xref>). SNP presence/absence data were formatted as binary matrices, with each dataset annotated according to host classification. Models were implemented in R using the XGBoost, caret, and dplyr packages. Multi-class classification was performed using a softmax objective (multi:softprob), with model parameters set to eta = 0.3, max_depth = 6, and nrounds = 100, incorporating early stopping to prevent overfitting. Each dataset was partitioned into training (80%) and testing (20%) subsets using stratified sampling to preserve class proportions. Model performance was evaluated using confusion matrices and classification accuracy on the held-out test data. Feature importance scores were extracted from the trained models using built-in XGBoost methods. SNPs with high importance scores were prioritized as potential markers of host specificity. The resulting ranked SNPs were compared to findings from pangenome and core-genome SNP analyses to identify convergent genomic signatures linked to ecological adaptation.</p>
</sec>
<sec id="S2.SS12">
<title>Functional enrichment and network analysis</title>
<p>To investigate the biological relevance of top-ranked SNPs associated with host species classification, functional annotation and pathway analysis were performed. Genes linked to high-importance SNPs identified by XGBoost were mapped to metabolic and regulatory pathways using BioCyc Pathway Tools (<xref ref-type="bibr" rid="B24">Karp et al., 2018</xref>). In parallel, protein-protein interaction networks were reconstructed using the genes of the top-ranked SNPs in the STRING database (v11.5) (<xref ref-type="bibr" rid="B49">Szklarczyk et al., 2021</xref>), enabling the identification of enriched functional modules.</p>
</sec>
</sec>
<sec id="S3" sec-type="results">
<title>Results</title>
<sec id="S3.SS1">
<title>Population structure analysis</title>
<p>A total of 788 good-quality genome assemblies of <italic>C. pseudotuberculosis</italic> were assessed in this study, comprising 408 isolates classified as biovar equi and 380 isolates classified as biovar ovis. K-mer&#x2013;based genomic similarity profiling, using a sketch size of 10, enabled fine-scale resolution of whole-genome comparisons and revealed patterns consistent with the underlying population structure of the species. The genomes clustered into three distinct genomic groups: two corresponding to the classical ovis and equi biovars, and a third novel equi cluster composed predominantly of camelid-derived isolates (<xref ref-type="fig" rid="F1">Figure 1</xref>). Sequence conservation within each genomic cluster was remarkably high, as reflected by pairwise Jaccard similarity values: 0.868&#x2013;1 for the equi cluster, 0.888&#x2013;1 for the ovis cluster, and 0.970&#x2013;1 among camelid-associated strains. The narrow range of pairwise Jaccard similarity values indicates high genomic homogeneity within each cluster. The majority of biovar ovis isolates were associated with small ruminants, while most biovar equi isolates originated from equine samples, consistent with historical reports of host specificity for these biovars (<xref ref-type="bibr" rid="B12">Costa et al., 1998</xref>; <xref ref-type="bibr" rid="B21">Haas et al., 2017</xref>; <xref ref-type="bibr" rid="B14">do Nascimento Sousa et al., 2024</xref>). A total of 25 bovine-derived isolates were identified in the dataset. Of these, 22 were assigned to biovar equi and 3 to biovar ovis. 1 assigned to the biovar equi cluster with the camelid cluster. This supports previous observations that cattle can act as incidental hosts for both biovars, and have strains similar to all genome types (<xref ref-type="bibr" rid="B1">Almeida et al., 2017</xref>). While most genomes were consistent with expected biovar&#x2014;host associations based on epidemiological reports, several exceptions indicated instances of cross-host infection. Specifically, seven ovis biovar genomes were isolated from equine hosts, and eight equi biovar genomes were obtained from small ruminants (four from caprine and four from ovine sources). Also notable in this comparison was the inclusion of six human-derived isolates: two equi strains from Romania and four ovis strains, including three from New Zealand and one from Oslo.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption><p>Pairwise genomic similarity among 788 <italic>Corynebacterium pseudotuberculosis</italic> isolates. The heatmap shows Jaccard similarity indices across genomes from 788 <italic>C. pseudotuberculosis</italic> strains, grouped into biovars &#x201C;Equi&#x201D; (408 isolates) and &#x201C;Ovis&#x201D; (380 isolates). Darker shades indicate higher genetic similarity. Clustering shows two large, biovar-specific blocks corresponding to the canonical biovars equi (385 isolates) and ovis (380 isolates). A third, smaller cluster&#x2014;composed primarily of camelid-derived equi strains (17/22) &#x2014; highlights a distinct gene-content profile within this group. Gray tick marks to the left annotate host species: Eq (equine, 298 isolates), Cp (caprine, 172), Ov (ovine, 84), Bv (bovine, 25), Un (unknown, 181), Hm (human, 6), and Cm (camelid, 19). Three singletons (one feline, one cervid and one wildebeest isolate) are omitted from this left annotation panel for clarity. Colored vertical bars to the right provide (i) biovar classification (blue, equi; orange, ovis), (ii) country of origin (23 categories plus &#x201C;unknown&#x201D;), and (iii) U.S. state for American isolates (California, Colorado, Kentucky, Nevada, New Mexico, Oregon, Texas, Utah, and Washington; gray marks represent non-U.S. isolates).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-17-1729846-g001.tif">
<alt-text content-type="machine-generated">Pairwise genomic similarity among 788 Corynebacterium pseudotuberculosis isolates. The heatmap shows Jaccard similarity indices across genomes from 788 C. pseudotuberculosis strains, grouped into biovars &#x201C;Equi&#x201D; (408 isolates) and &#x201C;Ovis&#x201D; (380 isolates). Darker shades indicate higher genetic similarity. Clustering shows two large, biovar-specific blocks corresponding to the canonical biovars equi (385 isolates) and ovis (380 isolates). A third, smaller cluster&#x2014;composed primarily of camelid-derived equi strains (17/22) &#x2014; highlights a distinct gene-content profile within this group. Gray tick marks to the left annotate host species: Eq (equine, 298 isolates), Cp (caprine, 172), Ov (ovine, 84), Bv (bovine, 25), Un (unknown, 181), Hm (human, 6), and Cm (camelid, 19). Three singletons (one feline, one cervid and one wildebeest isolate) are omitted from this left annotation panel for clarity. Colored vertical bars to the right provide (i) biovar classification (blue, equi; orange, ovis), (ii) country of origin (23 categories plus &#x201C;unknown&#x201D;), and (iii) U.S. state for American isolates (California, Colorado, Kentucky, Nevada, New Mexico, Oregon, Texas, Utah, and Washington; gray marks represent non-U.S. isolates).</alt-text>
</graphic>
</fig>
<p>While there was distinct biovar-level genomic clustering and expected species-association, no meaningful population structure was observed related to geographic origin. Limited clustering was observed by country or US state, however, a high level of genomic similarity by Jaccard index (&#x003E;0.87) was observed. The origins of the isolates included globally diverse locations such as Brazil, Germany, New Zealand, Romania, Switzerland and the United States. The lack of distinct subclusters related to geographic location coupled to the monolithic genome structure indicates that an unusually highly conserved genomic gene content exists for each biovar globally.</p>
</sec>
<sec id="S3.SS2">
<title>Limited gene discovery suggests a closed pangenome in <italic>C. pseudotuberculosis</italic></title>
<p>To quantify the relationship between genome sampling and gene diversity, we applied a rarefaction analysis based on Heaps&#x2019; law, as described in the methods. This model characterizes how the number of unique genes grows with increasing genome sampling, with the Heaps&#x2019; exponent (<italic>b</italic>) serving as a metric of pangenome openness. Higher values indicate more dynamic and diverse gene repertoires, while lower values suggest greater genomic conservation. As previously observed (<xref ref-type="bibr" rid="B50">Tettelin et al., 2005</xref>) the discovery of new genes follows a power law model (log-log scale), where an increase in the number of genomes sequenced leads to a proportional increase in the number of genes discovered. The overall gene discovery rate for <italic>C. pseudotuberculosis</italic> was remarkably low, with a fitted exponent of <italic>b</italic> = 0.1005. This implies that approximately 10 genomes are required to identify one additional unique gene (1/b &#x2245; 10), reflecting a highly conserved gene repertoire despite extensive sampling (<italic>n</italic> = 788). Subgroup analyses within <italic>C. pseudotuberculosis</italic> revealed subtle but informative differences. The equi biovar cluster (<italic>n</italic> = 385) showed a comparable exponent (<italic>b</italic> = 0.101), while the ovis biovar (<italic>n</italic> = 380) was slightly more conserved (<italic>b</italic> = 0.065), requiring over 15 genomes to discover one new gene. The most conserved subgroup was the camelid-associated cluster (<italic>n</italic> = 22), with a Heaps&#x2019; exponent of just <italic>b</italic> = 0.037, indicating that nearly 27 genomes would be needed per novel gene identified. These values collectively underscore the exceptional genomic stability of <italic>C. pseudotuberculosis</italic>, particularly in host-associated subgroups. This genomic stability points to an evolutionary path that is highly constrained, reflecting long-term adaptation to specific hosts and ecological niches. Rather than gaining or losing genes frequently, <italic>C. pseudotuberculosis</italic> appears to have refined existing genetic functions to optimize survival within its preferred environments. The closed nature of its pangenome supports the view of limited horizontal gene transfer, strong purifying selection, and a largely clonal population framework. Consequently, most diversification within the species arises from subtle sequence-level mutations and metabolic fine-tuning instead of major shifts in gene content. Together, these features suggest that the bacterium&#x2019;s specialized pathogenic way of life depends on a stable and well-conserved genetic toolkit that ensures effective colonization, persistence, and transmission in its hosts.</p>
<p>In contrast, other pathogens exhibited substantially more open pangenomes. Close relatives such as <italic>Corynebacterium diphtheriae</italic> (<italic>b</italic> = 0.225) and <italic>C. glutamicum</italic> (<italic>b</italic> = 0.235) demonstrated higher gene discovery rates, with only &#x223C;4 genomes required per new gene. <italic>Mycobacterium tuberculosis</italic>, despite its known clonal structure, exhibited a comparable exponent (<italic>b</italic> = 0.237), suggesting greater gene content variation. Among Gram-negative pathogens of veterinary importance, <italic>Pasteurella multocida</italic> (<italic>b</italic> = 0.321) and <italic>Mannheimia haemolytica</italic> (<italic>b</italic> = 0.326) showed even higher pangenome openness (<xref ref-type="bibr" rid="B19">Garzon et al., 2025</xref>), requiring &#x223C;3 genomes per novel gene. Further comparisons with additional species such as <italic>Helicobacter pylori</italic>, <italic>Escherichia coli</italic>, <italic>Salmonella</italic> spp., <italic>Lactococcus</italic> lactis, and <italic>Campylobacter</italic> spp.&#x2014; which ranged from <italic>b</italic> = 0.454 to 0.645&#x2014; highlighting the contrast in pangenome dynamics. These organisms typically required only 1.5&#x2013;2.2 genomes to yield a new gene, consistent with highly open pangenomes. To evaluate whether gene discovery rates differ between these Gram-positive and Gram-negative bacteria, we compared their mean discovery rates using the Kruskal&#x2014;Wallis rank sum test. This non-parametric test yielded a chi-squared statistic of &#x03C7;<sup>2</sup> = 3.33 (<italic>p</italic> = 0.068), indicating a nonsignificant trend toward higher gene discovery rates in Gram-negative species. Although this result is not statistically significant at the &#x03B1; = 0.05 level, the observed pattern aligns with previous reports that Gram-negative bacteria tend to have more open pangenomes, driven by larger accessory genomes and greater inter-strain gene content variability. Among Gram-positive taxa, <italic>C. pseudotuberculosis</italic> and <italic>Lactococcus lactis</italic> are outliers, exhibiting the lowest and highest gene discovery rates, respectively. Notably, <italic>C</italic>. <italic>pseudotuberculosis</italic> stands out as an extreme case of exceptionally limited gene discovery even in comparison to other Gram-positive taxa, reinforcing its characterization as a genomically stable species.</p>
<p>The contrast in gene discovery became even more pronounced when compared to highly diverse bacterial species such as <italic>Escherichia coli</italic>, <italic>Salmonella</italic> spp., and <italic>Campylobacter</italic> spp., which exhibited gene discovery rates approximately 4.5&#x2013;6.5-fold higher than that of <italic>C. pseudotuberculosis</italic>. These findings highlight the species&#x2019; exceptionally clonal nature and limited genetic variability, consistent with previous reports describing its evolutionary stability.</p>
<sec id="S3.SS2.SSS1">
<title>Pan-genome characterization and biovar-specific gene content</title>
<p>We performed a comprehensive pangenome analysis of the 788 <italic>C. pseudotuberculosis</italic> genomes to investigate the genomic architecture underlying biovar divergence and host adaptation. A total of 4,363 orthologous gene clusters were identified (<xref ref-type="fig" rid="F2">Figure 2</xref>), which were categorized using presence-absence patterns into four major compartments: core (<italic>n</italic> = 1,999 genes), soft-core (<italic>n</italic> = 129), shell (<italic>n</italic> = 640), and cloud (<italic>n</italic> = 1,595). The core genome encompassed genes shared by all strains, while the accessory genome (soft-core, shell, and cloud genes) included elements with variable distribution across strains, many of which were biovar-specific. Notably, the clustering patterns revealed the same genomic signatures associated with the equi and ovis biovars as observed in the pairwise similarity analysis, including a distinct clade of equi strains primarily associated with camelid hosts (<xref ref-type="fig" rid="F1">Figure 1</xref>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption><p>Gene presence-absence matrix of 788 Corynebacterium pseudotuberculosis genomes. The heatmap displays the distribution of 4,363 orthologous gene clusters across <italic>C. pseudotuberculosis</italic> genomes, clustered by presence (blue) or absence (white). Strains are grouped by biovar: &#x201C;Equi&#x201D; (408 genomes, top and bottom clusters) and &#x201C;Ovis&#x201D; (380 genomes, central cluster). A distinct subgroup within equi (top bracket) corresponds to camelid-derived isolates. Gene clusters (columns) are ordered and color-coded by frequency across the dataset: core (present in all strains, green; 1,999 genes), soft core (present in most strains, orange; 129 genes), shell (moderately distributed, light blue; 640 genes), and cloud (strain specific, red; 1,595 genes). The matrix reveals both shared and biovar-specific genomic content, highlighting differential gene retention patterns, particularly among accessory genes.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-17-1729846-g002.tif">
<alt-text content-type="machine-generated">Gene presence-absence matrix of 788 Corynebacterium pseudotuberculosis genomes. The heatmap displays the distribution of 4,363 orthologous gene clusters across C. pseudotuberculosis genomes, clustered by presence (blue) or absence (white). Strains are grouped by biovar: &#x201C;Equi&#x201D; (408 genomes, top and bottom clusters) and &#x201C;Ovis&#x201D; (380 genomes, central cluster). A distinct subgroup within equi (top bracket) corresponds to camelid-derived isolates. Gene clusters (columns) are ordered and color-coded by frequency across the dataset: core (present in all strains, green; 1,999 genes), soft core (present in most strains, orange; 129 genes), shell (moderately distributed, light blue; 640 genes), and cloud (strain specific, red; 1,595 genes). The matrix reveals both shared and biovar-specific genomic content, highlighting differential gene retention patterns, particularly among accessory genes.</alt-text>
</graphic>
</fig>
<p>While the core genome was highly conserved between ovis and equi isolates, a secondary level of conservation was observed within the shell genome, comprising genes present in biovar-specific strains, many of which appear to support distinct metabolic capabilities. In particular, biovar equi harbored a complete nitrate reduction operon (<italic>narTKGYX</italic>) and a molybdenum cofactor biosynthesis cluster (<italic>moaA2, moaE2, moaC2, mobA, mog, modA</italic>, and <italic>moeZ</italic>), which were absent in ovis isolates (<xref ref-type="fig" rid="F3">Figure 3</xref>). This observation of biovar-specific genomic divergence in relation to nitrate-reduction is consistent with the nitrate-positive phenotype historically used to distinguish biovar equi.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption><p>Organization of the <italic>nar</italic> Operon in Biovar equi. Syntenic arrangement of genes involved in nitrate reduction and molybdenum cofactor biosynthesis. Gene colors indicate functional categories: nitrate reduction (green), molybdenum cofactor biosynthesis (blue), hypothetical proteins (cyan), and a vitamin B12 import ATP binding protein (<italic>btuD</italic>) (gray).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-17-1729846-g003.tif">
<alt-text content-type="machine-generated">Organization of the nar Operon in Biovar equi. Syntenic arrangement of genes involved in nitrate reduction and molybdenum cofactor biosynthesis. Gene colors indicate functional categories: nitrate reduction (green), molybdenum cofactor biosynthesis (blue), hypothetical proteins (cyan), and a vitamin B12 import ATP binding protein (btuD) (gray).</alt-text>
</graphic>
</fig>
<p>While the capacity to reduce nitrate has been previously used as a biochemical test to distinguish biovars, a comparative presence-absence analysis revealed other metabolic functions unique to each biovar found in the shell genome. This analysis identified 159 genes exclusive to equi and 119 genes exclusive to ovis that aided in differentiating these biovars. Among these, 30 equi-specific and 15 ovis-specific genes were functionally annotated (<xref ref-type="supplementary-material" rid="TS1">Supplementary Tables 2</xref>, <xref ref-type="supplementary-material" rid="TS1">3</xref>). Beyond nitrate metabolism, equi-specific genes revealed distinct metabolic and regulatory capabilities. These included genes involved in the transport and processing of amino acids and vitamins, such as <italic>alsT</italic> (amino acid carrier) and <italic>btuD</italic> (vitamin B12 import ATPase), as well as enzymes associated with polysaccharide metabolism, such as <italic>treA</italic> (trehalase). Genes related to oxidative stress response and energy production were also identified, including <italic>guaB1</italic> (putative oxidoreductase), <italic>ppx1</italic> (exopolyphosphatase), and <italic>sdhL</italic> (shikimate dehydrogenase-like protein). Additional genes linked to DNA recombination and genome plasticity, such as xer<italic>C</italic> (tyrosine recombinase) and <italic>hin</italic> (invertase), along with components of the CRISPR-Cas immune system (<italic>casC</italic>, <italic>ygbT</italic>), were also present. These findings suggest that biovar equi possesses broader adaptive potential in both host-associated and environmental contexts, beyond its well-characterized nitrate utilization pathways.</p>
<p>Ovis-specific genes encompassed a different set of metabolic and regulatory functions. Some were involved in amino acid and nitrogen metabolism, including <italic>tdcG</italic> (L-serine dehydratase), <italic>ilvB1</italic> (acetolactate synthase large subunit), <italic>metXA</italic> (homoserine O-acetyltransferase), and <italic>pepN</italic> (aminopeptidase N). Genes associated with transport systems included <italic>acp</italic> (sodium/proton-dependent alanine transporter), <italic>bceA</italic> (bacitracin export ATPase), <italic>btuC</italic> (vitamin B12 import permease), and <italic>fecE</italic> (ferric dicitrate transport ATPase). In addition, <italic>rlmN</italic> (putative RNA methyltransferase) and <italic>recF</italic> (DNA replication and repair protein) may play roles in regulatory control and genome maintenance. Other notable genes, such as <italic>yidA</italic> (a sugar phosphatase) and a fimbriae subunit, point to potential differences in environmental sensing and host interaction. Together, these biovar-specific accessory genes likely underpin distinct strategies for nutrient acquisition, redox balance, and surface adaptation, supporting their divergent ecological niches and host preferences.</p>
<p>One gene of particular interest among the biovar-specific sets was <italic>fatD</italic>, encoding a petrobactin import system permease protein. This protein is predicted to function as a membrane receptor for ferric-petrobactin, a catecholate-type siderophore with unique structural features that distinguish it from other bacterial iron-chelating compounds. Petrobactin is primarily produced by some <italic>Bacillus</italic> species such as <italic>B. cereus</italic>, <italic>B. thuringiensis</italic>, and <italic>B. anthracis</italic>. Remarkably, we identified two versions of <italic>fatD</italic> in C. <italic>pseudotuberculosis</italic>, one found exclusively in biovar equi and the other exclusively in biovar ovis. These allelic variants differ by six nucleotides, yet their translated amino acid sequences were identical, demonstrating complete conservation at the protein sequence level.</p>
</sec>
</sec>
<sec id="S3.SS3">
<title>Screening of virulence and antimicrobial resistance genes</title>
<p>In addition to metabolic capacity, virulence factors can also contribute to biovar-specific host adaptation. To assess the presence of virulence factors, genes that support a pathogen&#x2019;s ability to infect a host and initiate disease, all 788 <italic>C. pseudotuberculosis</italic> genomes were analyzed using the Virulence Factor Database (VFDB). The phospholipase D gene (<italic>pld</italic>), which encodes a potent exotoxin, was found in all isolates, consistent with its known crucial role in <italic>C. pseudotuberculosis</italic> virulence. The diphtheria toxin gene (<italic>tox</italic>) was detected only in 11 isolates from buffalo hosts, in accordance with previous reports (<xref ref-type="bibr" rid="B51">Viana et al., 2017</xref>). No additional virulence-associated genes were identified by VFDB across the dataset, indicating that <italic>pld</italic> and <italic>tox</italic> represent the only detectable virulence factors under the applied screening criteria.</p>
<p>Subsequently, the genomes were screened for antimicrobial resistance (AMR) genes using multiple curated databases. While most genomes lacked significant matches to known AMR genes under standard thresholds, a subset of isolates carried resistance determinants. Specifically, the aminoglycoside resistance gene APH(3&#x2019;)-IIa and the beta-lactamase gene TEM-116 were co-detected in seven isolates (<italic>phoP</italic>, SigmaE, <italic>sigB, sigH, sigM, sigD, and sigC</italic>), all derived from goat hosts in Brazil. In contrast, APH(3&#x2019;)-IIIa was detected exclusively in a single, distinct isolate (Cp13), also from a goat in Brazil. All detected resistance genes exhibited high sequence identity (&#x2265;99.9%).</p>
<sec id="S3.SS3.SSS1">
<title>Core genome SNP analysis highlights clonality within biovars and limited geographic signal</title>
<p>Given the high degree of conservation of genes among the genomes, we next investigated whether differences between biovar ovis and biovar equi were associated with nucleotide mutation. Determining SNPs in the core genome using Snippy-core was done independently for each biovar. One high-quality representative genome for each biovar was used as a reference: strain 1002B (ovis; accession: <ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="GCF_001433475.1">GCF_001433475.1</ext-link>) and strain 258 (equi; accession: <ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="GCF_000263755.5">GCF_000263755.5</ext-link>). This analysis was restricted to isolates for which raw sequencing reads were available, allowing consistent read mapping, SNP calling, and quality filtering across samples.</p>
<p>Among the 330 equi isolates, 2,109 SNPs were identified in the core genome, while 241 ovis isolates displayed 1,206 SNPs. Per-genome SNP counts ranged from a few hundred to just over 2,000 in equi, and from under 1,000 to around 1,200 in ovis genomes, reflecting modest intra-biovar variability. Most isolates exhibited uniformly low SNP counts, reinforcing the clonal structure determine from gene-based analyses (<xref ref-type="fig" rid="F1">Figures 1</xref>, <xref ref-type="fig" rid="F2">2</xref>).</p>
<p>To visualize SNP-related phylogenetic relationships and assess potential associations with host, geography, or temporal patterns, phylogenetic trees were constructed from the core SNPs (<xref ref-type="fig" rid="F4">Figure 4</xref>). In equi, the resulting tree had no meaningful clustering by host species or sampling year (<xref ref-type="fig" rid="F4">Figure 4A</xref>). This underscores the high genomic homogeneity within the biovar. Conversely, the ovis SNP tree (<xref ref-type="fig" rid="F4">Figure 4B</xref>) exhibited a modest but noticeable geographic signal, with some clustering of isolates from New Zealand and Brazil. However, even in ovis, there was no clear association between SNP-defined clades and host species.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption><p>Core genome SNP phylogenies of <italic>Corynebacterium pseudotuberculosis</italic> biovars. <bold>(A)</bold> Phylogenetic tree of biovar equi constructed from core genome SNPs across 330 isolates. <bold>(B)</bold> Phylogenetic tree of biovar ovis constructed from core genome SNPs identified across 241 isolates. Trees were generated using alignments produced by Snippy core, representing high-confidence SNPs present in the conserved core genome of each biovar. Each tip is annotated with isolate ID and colored by geographic origin. Metadata rings indicate host species (inner strip) and year of isolation (outer ring). Colors correspond to the legends shown. Geographic origin is shown at the state level for U.S. isolates and at the country level for non-U.S. isolates based on metadata availability. In equi, no clear phylogenetic structure was observed based on host, location, or sampling date, consistent with a highly clonal population. In contrast, ovis displayed a modest geographic signal, with some regional clustering. These results suggest that biovar-specific traits are not strongly associated with core genome divergence.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-17-1729846-g004.tif">
<alt-text content-type="machine-generated">Core genome SNP phylogenies of Corynebacterium pseudotuberculosis biovars. (A) Phylogenetic tree of biovar equi constructed from core genome SNPs across 330 isolates. (B) Phylogenetic tree of biovar ovis constructed from core genome SNPs identified across 241 isolates. Trees were generated using alignments produced by Snippy core, representing high-confidence SNPs present in the conserved core genome of each biovar. Each tip is annotated with isolate ID and colored by geographic origin. Metadata rings indicate host species (inner strip) and year of isolation (outer ring). Colors correspond to the legends shown. Geographic origin is shown at the state level for U.S. isolates and at the country level for non-U.S. isolates based on metadata availability. In equi, no clear phylogenetic structure was observed based on host, location, or sampling date, consistent with a highly clonal population. In contrast, ovis displayed a modest geographic signal, with some regional clustering. These results suggest that biovar-specific traits are not strongly associated with core genome divergence.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="S3.SS4">
<title>Machine learning identifies SNPs associated with host and biovar differences</title>
<p>Building upon the observed high conservation of <italic>C. pseudotuberculosis</italic> genomes at both the genome content and SNP levels, machine learning was used to assess whether subtle patterns of nucleotide variation could distinguish isolates according to host species between biovars. The XGBoost algorithm was applied to the core genome SNP matrices with a total of nine host categories considered. Feature importance analysis revealed that classification was not driven by a single dominant variant but rather by the cumulative effect of multiple SNPs, each contributing modestly to model performance (<xref ref-type="table" rid="T2">Table 2</xref>). Feature importance scores reflect the relative contribution of each SNP to the model&#x2019;s predictive accuracy during classification, rather than direct causal effects on host specificity.</p>
<table-wrap position="float" id="T2">
<label>TABLE 2</label>
<caption><p>Important SNPs contributing to host species adaptation identified using XGBoost.</p></caption>
<table cellspacing="5" cellpadding="5" frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left">Rank</th>
<th valign="top" align="left">SNP description</th>
<th valign="top" align="left">Gene</th>
<th valign="top" align="left">Gene function</th>
<th valign="top" align="left">Importance</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="left">Missense_variant c.442G&#x003E;A p.Val148Ile</td>
<td valign="top" align="left"><italic>oppB1</italic></td>
<td valign="top" align="left">ABC-type dipeptide/oligopeptide/nickel transporter permease</td>
<td valign="top" align="left">0.0165</td>
</tr>
<tr>
<td valign="top" align="left">2</td>
<td valign="top" align="left">Missense_variant c.322G&#x003E;A p.Ala108Thr</td>
<td valign="top" align="left"><italic>rnhB</italic></td>
<td valign="top" align="left">Endonuclease RNase H, involved in RNA-DNA hybrid resolution</td>
<td valign="top" align="left">0.0041</td>
</tr>
<tr>
<td valign="top" align="left">3</td>
<td valign="top" align="left">Synonymous_variant c.613C&#x003E;T p.Leu205Leu</td>
<td valign="top" align="left"><italic>oppC4</italic></td>
<td valign="top" align="left">ABC-type dipeptide/oligopeptide/nickel transporter permease</td>
<td valign="top" align="left">0.0025</td>
</tr>
<tr>
<td valign="top" align="left">4</td>
<td valign="top" align="left">Frameshift_variant &#x0026; stop_gained c.288_321dup</td>
<td valign="top" align="left">-</td>
<td valign="top" align="left">(Unannotated locus)</td>
<td valign="top" align="left">0.00098</td>
</tr>
<tr>
<td valign="top" align="left">5</td>
<td valign="top" align="left">Missense_variant c.166C&#x003E;G p.Leu56Val</td>
<td valign="top" align="left"><italic>bioD1</italic></td>
<td valign="top" align="left">Biotin biosynthesis protein (AAA domain)</td>
<td valign="top" align="left">0.00079</td>
</tr>
<tr>
<td valign="top" align="left">6</td>
<td valign="top" align="left">Synonymous_variant c.285C&#x003E;T p.Gly95Gly</td>
<td valign="top" align="left"><italic>ilvB</italic></td>
<td valign="top" align="left">Thiamine pyrophosphate-requiring enzymes acetolactate synthase pyruvate dehydrogenase (cytochrome) glyoxylate carboligase phosphonopyruvate decarboxylase</td>
<td valign="top" align="left">0.000247746</td>
</tr>
<tr>
<td valign="top" align="left">7</td>
<td valign="top" align="left">Missense_variant c.121A&#x003E;G p.Ile41Val</td>
<td valign="top" align="left"><italic>srtC1</italic></td>
<td valign="top" align="left">Sortase (surface protein transpeptidase)</td>
<td valign="top" align="left">0.000210489</td>
</tr>
<tr>
<td valign="top" align="left">8</td>
<td valign="top" align="left">Synonymous_variant c.315A&#x003E;G p.Ala105Ala</td>
<td valign="top" align="left"><italic>infB</italic></td>
<td valign="top" align="left">One of the essential components for the initiation of protein synthesis. Protects formylmethionyl-tRNA from spontaneous hydrolysis and promotes its binding to the 30S ribosomal subunits. Also involved in the hydrolysis of GTP during the formation of the 70S ribosomal complex</td>
<td valign="top" align="left">0.0001408</td>
</tr>
<tr>
<td valign="top" align="left">9</td>
<td valign="top" align="left">Missense_variant c.823G&#x003E;A p.Ala275Thr</td>
<td valign="top" align="left"><italic>dapC</italic></td>
<td valign="top" align="left">Aspartate tyrosine aromatic aminotransferase</td>
<td valign="top" align="left">0.000135756</td>
</tr>
<tr>
<td valign="top" align="left">10</td>
<td valign="top" align="left">Synonymous_variant c.684A&#x003E;G p.Gly228Gly</td>
<td valign="top" align="left"><italic>htaA</italic></td>
<td valign="top" align="left">Htaa</td>
<td valign="top" align="left">0.000133307</td>
</tr>
<tr>
<td valign="top" align="left">11</td>
<td valign="top" align="left">Missense_variant c.350C&#x003E;A p.Ala117Glu</td>
<td valign="top" align="left"><italic>yhgF</italic></td>
<td valign="top" align="left">Accessory protein</td>
<td valign="top" align="left">0.000120098</td>
</tr>
<tr>
<td valign="top" align="left">12</td>
<td valign="top" align="left">Synonymous_variant c.441G&#x003E;A p.Gln147Gln</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">Transport system, ATP-binding protein</td>
<td valign="top" align="left">0.00011504</td>
</tr>
<tr>
<td valign="top" align="left">13</td>
<td valign="top" align="left">Missense_variant c.2990C&#x003E;T p.Ala997Val</td>
<td valign="top" align="left"><italic>smc</italic></td>
<td valign="top" align="left">Required for chromosome condensation and partitioning</td>
<td valign="top" align="left">0.00010732</td>
</tr>
<tr>
<td valign="top" align="left">14</td>
<td valign="top" align="left">Missense_variant c.374T&#x003E;C p.Val125Ala</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">Belongs to the alpha-IPM synthase homocitrate synthase family</td>
<td valign="top" align="left">0.000104831</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn><p>Importance refers to the improvement in accuracy attributed to splits using the SNP across all decision trees in the model.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>The top-ranked SNP was a missense variant (c.442G&#x003E;A; p.Val148Ile) within the <italic>oppB1</italic> gene, encoding a subunit of an ABC-type dipeptide/oligopeptide/nickel transporter. Additional important variants included a missense mutation in <italic>rnhB</italic> (an RNase H enzyme), a synonymous variant in <italic>oppC4</italic> (another oligopeptide transporter), and a missense variant in <italic>bioD1</italic> (involved in biotin biosynthesis) (<xref ref-type="table" rid="T2">Table 2</xref>). These SNPs were prioritized based on their importance within the trained model and collectively capture nucleotide patterns that improve discrimination among host-associated isolate groups. Individually, none of these variants uniquely defines a host species; instead, host classification emerges from the combined signal of multiple SNPs distributed across conserved metabolic and transport genes.</p>
</sec>
<sec id="S3.SS5">
<title>Functional enrichment and network analysis of important SNPs reveal transport and amino acid biosynthesis as distinguishing processes</title>
<p>Functional enrichment analysis of SNP-associated genes revealed a significant overrepresentation in the &#x201C;Amino Acid Biosynthesis&#x201D; ontology, which is a subclass of the broader &#x201C;Biosynthesis&#x201D; category (<xref ref-type="fig" rid="F5">Figure 5A</xref>). Enrichment of amino acid processes suggests that a subset of core metabolic functions may be differentially modulated among isolates from different hosts.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption><p>Functional enrichment and interaction network analysis of genes associated with host-specific SNPs. <bold>(A)</bold> BioCyc pathway ontology highlighting the enrichment of genes involved in &#x201C;Amino Acid Biosynthesis&#x201D; and related metabolic processes. <bold>(B)</bold> STRING-db protein&#x2013;protein interaction network of top SNP-associated genes. A major cluster centered on Opp system transporters and a biosynthetic module including biotin and lysine biosynthesis enzymes are visible. <bold>(C)</bold> Gene Ontology (GO) enrichment graph displaying overrepresentation of biosynthetic pathways, particularly amino acid biosynthesis and protein modification processes. Circle size reflects the number of genes per GO term; color shading represents statistical significance (FDR).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-17-1729846-g005.tif">
<alt-text content-type="machine-generated">Functional enrichment and interaction network analysis of genes associated with host-specific SNPs. (A) BioCyc pathway ontology highlighting the enrichment of genes involved in &#x201C;Amino Acid Biosynthesis&#x201D; and related metabolic processes. (B) STRING-db protein&#x2013;protein interaction network of top SNP-associated genes. A major cluster centered on Opp system transporters and a biosynthetic module including biotin and lysine biosynthesis enzymes are visible. (C) Gene Ontology (GO) enrichment graph displaying overrepresentation of biosynthetic pathways, particularly amino acid biosynthesis and protein modification processes. Circle size reflects the number of genes per GO term; color shading represents statistical significance (FDR).</alt-text>
</graphic>
</fig>
<p>Further analysis of protein-protein interactions for the gene products of top SNPs revealed two major functional modules (<xref ref-type="fig" rid="F5">Figure 5B</xref>). The first comprised a densely interconnected cluster of oligopeptide permease (Opp) system components, including <italic>oppB1</italic>, <italic>oppC4</italic>, and <italic>oppA</italic>&#x2014;<italic>F</italic> paralogs. The Opp system is a well-known ATP-binding cassette (ABC) transporter involved in peptide import and environmental sensing. This system has been associated to host adaptation in other bacterial pathogens, including <italic>Streptococcus pneumoniae</italic> and <italic>Listeria monocytogenes</italic> (<xref ref-type="bibr" rid="B7">Borezee et al., 2000</xref>). The second cluster was composed of biosynthetic enzymes, such as <italic>bioD1</italic> (biotin biosynthesis), <italic>argD</italic> (arginine biosynthesis), and <italic>dapC</italic> (lysine pathway).</p>
<p>GO enrichment (<xref ref-type="fig" rid="F5">Figure 5C</xref>) further supported these findings. Statistically significant terms included &#x201C;Amino Acid Biosynthetic Process,&#x201D; &#x201C;Proteinogenic Amino Acid Biosynthesis,&#x201D; and &#x201C;Modification of Amino Acids Within Proteins,&#x201D; all with FDR values &#x003C; 1.8 &#x00D7; 10<sup>&#x2013;6</sup>. These pathways emphasize metabolic functions are associated with host-adaption in each biovar.</p>
</sec>
</sec>
<sec id="S4" sec-type="discussion">
<title>Discussion</title>
<p>This study presents the most comprehensive population-scale genomic analysis of <italic>C. pseudotuberculosis</italic> to date, integrating 788 genomes across both biovars (ovis and equi). The results reveal a species of remarkable genomic stability, with highly conserved gene content and limited genetic variability despite broad host and geographic representation. The two biovars remain clearly distinguishable, yet both display near-clonal genome structures, indicating a species that evolves primarily through small-scale nucleotide changes rather than extensive horizontal gene transfer or gene gain/loss.</p>
<p>The pangenome analysis reinforces this view, showing a closed genome architecture and an exceptionally low gene discovery rate, the lowest reported among Gram-positive bacteria. For every ten newly sequenced genomes, only one additional gene was detected, underscoring the organism&#x2019;s evolutionary rigidity. This extreme conservation contrasts with the open pangenomes of other pathogens such as <italic>Escherichia coli</italic>, <italic>Salmonella</italic> sp., and <italic>Campylobacter</italic> sp., where new genes emerge with nearly every additional genome. The high core-to-accessory gene ratio in <italic>C. pseudotuberculosis</italic> reflects a strong selective constraint, suggesting that its genetic repertoire is already well optimized for survival in its host niches.</p>
<p>Although accessory genome analyses confirmed classical biovar-specific features, such as nitrate reduction and molybdenum cofactor biosynthesis in equi, these gene differences alone do not explain host range or virulence variation. The uniform presence of the phospholipase D (<italic>pld</italic>) gene across all isolates supports its central role in virulence (<xref ref-type="bibr" rid="B3">Baird and Fontaine, 2007</xref>), while the detection of the tox gene exclusively in buffalo isolates aligns with previous findings (<xref ref-type="bibr" rid="B51">Viana et al., 2017</xref>). Beyond these canonical virulence determinants, no strong associations between gene presence/absence and host origin were observed, emphasizing that the determinants of host adaptation likely lie at the single-nucleotide level.</p>
<p>By integrating machine learning with SNP-based comparative genomics, we identified a set of subtle but informative allelic variants that discriminate isolates by host species. The top-ranked SNPs were found in genes linked to amino acid biosynthesis, peptide transport (Opp system), and cofactor metabolism, pathways central to nutrient acquisition and intracellular survival. These processes may facilitate fine-tuned metabolic compatibility between bacterial strains and host environments, consistent with the notion that <italic>C. pseudotuberculosis</italic> adapts through modulation of existing pathways rather than acquisition of new virulence genes.</p>
<p>This metabolic-centric model of adaptation is consistent with prior evidence implicating cell wall composition and enzymatic activity in <italic>C. pseudotuberculosis</italic> pathogenesis (<xref ref-type="bibr" rid="B36">Rebou&#x00E7;as et al., 2020</xref>). The species&#x2019; thick, mycolic acid rich cell wall contributes to its resilience and cytotoxicity, while compositional differences in fatty acids and phospholipids have been linked to variation in virulence between isolates. Similarly, the activity of phospholipase D, which is universally present among strains, plays a dual role in tissue invasion and immune evasion. Together, these features suggest that <italic>C. pseudotuberculosis</italic> relies on fine scale metabolic and structural modulation rather than gene acquisition to navigate host defenses and persist in diverse environments.</p>
<p>The combination of genomic conservation, limited pangenomic expansion, and host-associated SNP signatures defines <italic>C. pseudotuberculosis</italic> as an unusually stable yet adaptable pathogen. Its evolution appears to proceed through incremental allelic shifts that optimize interactions within specific host environments, rather than through large-scale genomic innovation. This stability may explain the persistence of disease outbreaks over decades with minimal phenotypic change, despite global spread and host diversity.</p>
</sec>
<sec id="S5" sec-type="conclusion">
<title>Conclusion</title>
<p>This work provides the most extensive comparative genomic analysis to date for <italic>C. pseudotuberculosis</italic>, integrating nearly 800 genomes spanning multiple hosts and global regions. The analyses revealed an organism with extraordinary genomic stability, characterized by a closed pangenome and minimal gene turnover. Such conservation contrasts with the dynamic gene exchange typical of other veterinary pathogens, positioning <italic>C. pseudotuberculosis</italic> as one of the most genetically homogeneous bacterial species examined at this scale. Gene content alone did not account for host adaptation or disease variation. Despite this stability, distinct biovar signatures persisted, including the nitrate reduction and molybdenum cofactor biosynthesis operons that define equi isolates. The lack of gene content differentiation let to the integration of machine learning with SNP-based comparative genomics that identified allelic variants within conserved metabolic pathways, particularly those involved in amino acid biosynthesis and peptide transport. These results indicate that <italic>C. pseudotuberculosis</italic> adapts through fine-scale genetic modulation of metabolic networks rather than through acquisition of classical virulence or resistance determinants. Collectively, these findings redefine <italic>C. pseudotuberculosis</italic> as a pathogen that evolves through precision rather than plasticity, maintaining a stable genetic core while fine-tuning key metabolic traits for host adaptation. This evolutionary strategy likely contributes to its persistence in livestock populations and its resilience across environments.</p>
</sec>
</body>
<back>
<sec id="S6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found at: <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/">https://www.ncbi.nlm.nih.gov/</ext-link>, <ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="PRJNA203445">PRJNA203445</ext-link>.</p>
</sec>
<sec id="S7" sec-type="author-contributions">
<title>Author contributions</title>
<p>RP: Conceptualization, Data curation, Formal analysis, Investigation, Methodology, Software, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. CLS: Conceptualization, Data curation, Formal analysis, Investigation, Methodology, Software, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. CAS: Methodology, Software, Visualization, Writing &#x2013; review &#x0026; editing. RB: Data curation, Funding acquisition, Investigation, Resources, Writing &#x2013; review &#x0026; editing. MH: Data curation, Funding acquisition, Resources, Writing &#x2013; review &#x0026; editing. SS: Conceptualization, Data curation, Funding acquisition, Resources, Writing &#x2013; review &#x0026; editing. JW: Data curation, Resources, Writing &#x2013; review &#x0026; editing. SW: Data curation, Resources, Writing &#x2013; review &#x0026; editing. MV: Data curation, Formal analysis, Investigation, Methodology, Resources, Software, Writing &#x2013; review &#x0026; editing. FB: Data curation, Resources, Writing &#x2013; review &#x0026; editing. BB: Data curation, Resources, Writing &#x2013; review &#x0026; editing. VA: Conceptualization, Project administration, Resources, Supervision, Writing &#x2013; review &#x0026; editing. BW: Conceptualization, Data curation, Funding acquisition, Investigation, Methodology, Project administration, Resources, Supervision, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<ack>
<title>Acknowledgments</title>
<p>We thank Carol Huang for assistance with DNA library preparation and Ashleigh M. Flores for contributions to the metadata organization.</p>
</ack>
<sec id="S9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="S10" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec id="S11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="S12" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fmicb.2026.1729846/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fmicb.2026.1729846/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Table_1.xlsx" id="TS1" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Almeida</surname> <given-names>S.</given-names></name> <name><surname>Dorneles</surname> <given-names>E. M. S.</given-names></name> <name><surname>Diniz</surname> <given-names>C.</given-names></name> <name><surname>Abreu</surname> <given-names>V.</given-names></name> <name><surname>Sousa</surname> <given-names>C.</given-names></name> <name><surname>Alves</surname> <given-names>J.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>Quadruplex PCR assay for identification of <italic>Corynebacterium pseudotuberculosis</italic> differentiating biovar Ovis and Equi.</article-title> <source><italic>BMC Vet. Res.</italic></source> <volume>13</volume>:<fpage>290</fpage>. <pub-id pub-id-type="doi">10.1186/s12917-017-1210-5</pub-id> <pub-id pub-id-type="pmid">28946887</pub-id></mixed-citation></ref>
<ref id="B2"><mixed-citation publication-type="web"><person-group person-group-type="author"><name><surname>Andrews</surname> <given-names>S.</given-names></name></person-group> (<year>2010</year>). <source><italic>FastQC: A Quality Control Tool for High Throughput Sequence Data.</italic></source> Available online at: <ext-link ext-link-type="uri" xlink:href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc">http://www.bioinformatics.babraham.ac.uk/projects/fastqc</ext-link> <comment>(accessed March 1, 2023)</comment>.</mixed-citation></ref>
<ref id="B3"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Baird</surname> <given-names>G. J.</given-names></name> <name><surname>Fontaine</surname> <given-names>M. C.</given-names></name></person-group> (<year>2007</year>). <article-title><italic>Corynebacterium pseudotuberculosis</italic> and its role in ovine caseous lymphadenitis.</article-title> <source><italic>J. Comp. Pathol.</italic></source> <volume>137</volume> <fpage>179</fpage>&#x2013;<lpage>210</lpage>. <pub-id pub-id-type="doi">10.1016/j.jcpa.2007.07.002</pub-id> <pub-id pub-id-type="pmid">17826790</pub-id></mixed-citation></ref>
<ref id="B4"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bandoy</surname> <given-names>D. D. R.</given-names></name> <name><surname>Weimer</surname> <given-names>B. C.</given-names></name></person-group> (<year>2020</year>). <article-title>Biological machine learning combined with campylobacter population genomics reveals virulence gene allelic variants cause disease.</article-title> <source><italic>Microorganisms</italic></source> <volume>8</volume>:<fpage>549</fpage>. <pub-id pub-id-type="doi">10.3390/microorganisms8040549</pub-id> <pub-id pub-id-type="pmid">32290186</pub-id></mixed-citation></ref>
<ref id="B5"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Biberstein</surname> <given-names>E. L.</given-names></name> <name><surname>Knight</surname> <given-names>H. D.</given-names></name> <name><surname>Jang</surname> <given-names>S.</given-names></name></person-group> (<year>1971</year>). <article-title>Two biotypes of <italic>Corynebacterium pseudotuberculosis</italic>.</article-title> <source><italic>Vet. Rec.</italic></source> <volume>89</volume> <fpage>691</fpage>&#x2013;<lpage>692</lpage>. <pub-id pub-id-type="doi">10.1136/vr.89.26.691</pub-id> <pub-id pub-id-type="pmid">5168555</pub-id></mixed-citation></ref>
<ref id="B6"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bolger</surname> <given-names>A. M.</given-names></name> <name><surname>Lohse</surname> <given-names>M.</given-names></name> <name><surname>Usadel</surname> <given-names>B.</given-names></name></person-group> (<year>2014</year>). <article-title>Trimmomatic: a flexible trimmer for Illumina sequence data.</article-title> <source><italic>Bioinformatics</italic></source> <volume>30</volume> <fpage>2114</fpage>&#x2013;<lpage>2120</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btu170</pub-id> <pub-id pub-id-type="pmid">24695404</pub-id></mixed-citation></ref>
<ref id="B7"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Borezee</surname> <given-names>E.</given-names></name> <name><surname>Pellegrini</surname> <given-names>E.</given-names></name> <name><surname>Berche</surname> <given-names>P.</given-names></name></person-group> (<year>2000</year>). <article-title>OppA of listeria monocytogenes, an oligopeptide-binding protein required for bacterial growth at low temperature and involved in intracellular survival.</article-title> <source><italic>Infect. Immun.</italic></source> <volume>68</volume> <fpage>7069</fpage>&#x2013;<lpage>7077</lpage>. <pub-id pub-id-type="doi">10.1128/IAI.68.12.7069-7077.2000</pub-id> <pub-id pub-id-type="pmid">11083832</pub-id></mixed-citation></ref>
<ref id="B8"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Brown</surname> <given-names>C. T.</given-names></name> <name><surname>Irber</surname> <given-names>L.</given-names></name></person-group> (<year>2016</year>). <article-title>Sourmash: a library for MinHash sketching of DNA.</article-title> <source><italic>J. Open Source Softw.</italic></source> <volume>1</volume>:<fpage>27</fpage>. <pub-id pub-id-type="doi">10.21105/joss.00027</pub-id></mixed-citation></ref>
<ref id="B9"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>P.</given-names></name> <name><surname>den Bakker</surname> <given-names>H. C.</given-names></name> <name><surname>Korlach</surname> <given-names>J.</given-names></name> <name><surname>Kong</surname> <given-names>N.</given-names></name> <name><surname>Storey</surname> <given-names>D. B.</given-names></name> <name><surname>Paxinos</surname> <given-names>E. E.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>Comparative genomics reveals the diversity of restriction-modification systems and DNA methylation sites in Listeria monocytogenes.</article-title> <source><italic>Appl. Environ. Microbiol.</italic></source> <volume>83</volume> <fpage>e2091</fpage>&#x2013;<lpage>e2016</lpage>. <pub-id pub-id-type="doi">10.1128/AEM.02091-16</pub-id> <pub-id pub-id-type="pmid">27836852</pub-id></mixed-citation></ref>
<ref id="B10"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>T.</given-names></name> <name><surname>Guestrin</surname> <given-names>C.</given-names></name></person-group> (<year>2016</year>). &#x201C;<article-title>XGBoost: A scalable tree boosting system</article-title>,&#x201D; in <source><italic>Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</italic></source>, (<publisher-loc>New York, NY</publisher-loc>), <fpage>785</fpage>&#x2013;<lpage>794</lpage>.</mixed-citation></ref>
<ref id="B11"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chklovski</surname> <given-names>A.</given-names></name> <name><surname>Parks</surname> <given-names>D. H.</given-names></name> <name><surname>Woodcroft</surname> <given-names>B. J.</given-names></name> <name><surname>Tyson</surname> <given-names>G. W.</given-names></name></person-group> (<year>2023</year>). <article-title>CheckM2: a rapid, scalable and accurate tool for assessing microbial genome quality using machine learning.</article-title> <source><italic>Nat. Methods</italic></source> <volume>20</volume> <fpage>1203</fpage>&#x2013;<lpage>1212</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-023-01940-w</pub-id> <pub-id pub-id-type="pmid">37500759</pub-id></mixed-citation></ref>
<ref id="B12"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Costa</surname> <given-names>L. R. R.</given-names></name> <name><surname>Spier</surname> <given-names>S. J.</given-names></name> <name><surname>Hirsh</surname> <given-names>D. C.</given-names></name></person-group> (<year>1998</year>). <article-title>Comparative molecular characterization of <italic>Cornybacterium pseudotuberculosis</italic> of different origin.</article-title> <source><italic>Vet. Microbiol.</italic></source> <volume>62</volume> <fpage>135</fpage>&#x2013;<lpage>143</lpage>. <pub-id pub-id-type="doi">10.1016/s0378-1135(98)00202-8</pub-id> <pub-id pub-id-type="pmid">9695286</pub-id></mixed-citation></ref>
<ref id="B13"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Croucher</surname> <given-names>N. J.</given-names></name> <name><surname>Page</surname> <given-names>A. J.</given-names></name> <name><surname>Connor</surname> <given-names>T. R.</given-names></name> <name><surname>Delaney</surname> <given-names>A. J.</given-names></name> <name><surname>Keane</surname> <given-names>J. A.</given-names></name> <name><surname>Bentley</surname> <given-names>S. D.</given-names></name><etal/></person-group> (<year>2015</year>). <article-title>Rapid phylogenetic analysis of large samples of recombinant bacterial whole genome sequences using Gubbins.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>43</volume>:<fpage>e15</fpage>. <pub-id pub-id-type="doi">10.1093/nar/gku1196</pub-id> <pub-id pub-id-type="pmid">25414349</pub-id></mixed-citation></ref>
<ref id="B14"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>do Nascimento Sousa</surname> <given-names>S. M.</given-names></name> <name><surname>Lima</surname> <given-names>A. C. A.</given-names></name> <name><surname>Gon&#x00E7;alves de Moura</surname> <given-names>V. A.</given-names></name> <name><surname>Rossetti Mateus</surname> <given-names>A.</given-names></name></person-group> (<year>2024</year>). <article-title><italic>Corynebacterium pseudotuberculosis</italic> biovar ovis strains isolated from small ruminants herds from the Brazilian Amazon present clonal genomic profile.</article-title> <source><italic>Small Rumin. Res.</italic></source> <volume>233</volume>:<fpage>107227</fpage>. <pub-id pub-id-type="doi">10.1016/j.smallrumres.2024.107227</pub-id></mixed-citation></ref>
<ref id="B15"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dorella</surname> <given-names>F. A.</given-names></name> <name><surname>Carvalho Pacheco</surname> <given-names>L.</given-names></name> <name><surname>Oliveira</surname> <given-names>S. C.</given-names></name> <name><surname>Miyoshi</surname> <given-names>A.</given-names></name> <name><surname>Azevedo</surname> <given-names>V.</given-names></name></person-group> (<year>2006</year>). <article-title><italic>Corynebacterium pseudotuberculosis</italic>?: microbiology, biochemical properties, pathogenesis and molecular studies of virulence.</article-title> <source><italic>Vet. Res.</italic></source> <volume>37</volume> <fpage>201</fpage>&#x2013;<lpage>218</lpage>.</mixed-citation></ref>
<ref id="B16"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ellis</surname> <given-names>T. M.</given-names></name> <name><surname>Sutherland</surname> <given-names>S. S.</given-names></name> <name><surname>Wilkinson</surname> <given-names>F. C.</given-names></name> <name><surname>Mercy</surname> <given-names>A. R.</given-names></name> <name><surname>Paton</surname> <given-names>M. W.</given-names></name></person-group> (<year>1987</year>). <article-title>The role of <italic>Corynebacterium pseudotuberculosis</italic> lung lesions in the transmission of this bacterium to other sheep.</article-title> <source><italic>Aust. Vet. J.</italic></source> <volume>64</volume> <fpage>261</fpage>&#x2013;<lpage>263</lpage>. <pub-id pub-id-type="doi">10.1111/j.1751-0813.1987.tb15952.x</pub-id> <pub-id pub-id-type="pmid">3426463</pub-id></mixed-citation></ref>
<ref id="B17"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Esmaeili</surname> <given-names>H.</given-names></name> <name><surname>Joghataei</surname> <given-names>S. M.</given-names></name> <name><surname>Khanjari</surname> <given-names>A.</given-names></name> <name><surname>Khiyabani</surname> <given-names>F. H. A.</given-names></name></person-group> (<year>2025</year>). <article-title>Comprehensive survey of caseous lymphadenitis in sheep and goats flocks of Iran: clinical, bacteriological, and molecular insights.</article-title> <source><italic>Small Rumin. Res.</italic></source> <volume>246</volume> <fpage>0</fpage>&#x2013;<lpage>2</lpage>. <pub-id pub-id-type="doi">10.1016/j.smallrumres.2025.107490</pub-id></mixed-citation></ref>
<ref id="B18"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ewels</surname> <given-names>P.</given-names></name> <name><surname>Magnusson</surname> <given-names>M.</given-names></name> <name><surname>Lundin</surname> <given-names>S.</given-names></name> <name><surname>K&#x00E4;ller</surname> <given-names>M.</given-names></name></person-group> (<year>2016</year>). <article-title>MultiQC: summarize analysis results for multiple tools and samples in a single report.</article-title> <source><italic>Bioinformatics</italic></source> <volume>32</volume> <fpage>3047</fpage>&#x2013;<lpage>3048</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btw354</pub-id> <pub-id pub-id-type="pmid">27312411</pub-id></mixed-citation></ref>
<ref id="B19"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Garzon</surname> <given-names>A.</given-names></name> <name><surname>Miramontes</surname> <given-names>C.</given-names></name> <name><surname>Weimer</surname> <given-names>B. C.</given-names></name> <name><surname>Profeta</surname> <given-names>R.</given-names></name> <name><surname>Hoyos-Jaramillo</surname> <given-names>A.</given-names></name> <name><surname>Fritz</surname> <given-names>H. M.</given-names></name><etal/></person-group> (<year>2025</year>). <article-title>Comparison of virulence and resistance genes in <italic>Mannheimia haemolytica</italic> and <italic>Pasteurella multocida</italic> from dairy cattle with and without bovine respiratory disease.</article-title> <source><italic>Microbiol. Spectr.</italic></source> <volume>13</volume>:<fpage>e0120025</fpage>. <pub-id pub-id-type="doi">10.1128/spectrum.01200-25</pub-id> <pub-id pub-id-type="pmid">40522106</pub-id></mixed-citation></ref>
<ref id="B20"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gu</surname> <given-names>Z.</given-names></name> <name><surname>Eils</surname> <given-names>R.</given-names></name> <name><surname>Schlesner</surname> <given-names>M.</given-names></name></person-group> (<year>2016</year>). <article-title>Complex heatmaps reveal patterns and correlations in multidimensional genomic data.</article-title> <source><italic>Bioinformatics</italic></source> <volume>32</volume> <fpage>2847</fpage>&#x2013;<lpage>2849</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btw313</pub-id> <pub-id pub-id-type="pmid">27207943</pub-id></mixed-citation></ref>
<ref id="B21"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Haas</surname> <given-names>D. J.</given-names></name> <name><surname>Dorneles</surname> <given-names>E. M.</given-names></name> <name><surname>Spier</surname> <given-names>S. J.</given-names></name> <name><surname>Carroll</surname> <given-names>S. P.</given-names></name> <name><surname>Edman</surname> <given-names>J.</given-names></name> <name><surname>Azevedo</surname> <given-names>V. A.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>Molecular epidemiology of <italic>Corynebacterium pseudotuberculosis</italic> isolated from horses in California.</article-title> <source><italic>Infect. Genet. Evol.</italic></source> <volume>49</volume> <fpage>186</fpage>&#x2013;<lpage>194</lpage>. <pub-id pub-id-type="doi">10.1016/j.meegid.2016.12.011</pub-id> <pub-id pub-id-type="pmid">27979735</pub-id></mixed-citation></ref>
<ref id="B22"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hiller</surname> <given-names>E.</given-names></name> <name><surname>H&#x00F6;rz</surname> <given-names>V.</given-names></name> <name><surname>Sting</surname> <given-names>R.</given-names></name></person-group> (<year>2024</year>). <article-title><italic>Corynebacterium pseudotuberculosis</italic>: whole genome sequencing reveals unforeseen and relevant genetic diversity in this pathogen.</article-title> <source><italic>PLoS One</italic></source> <volume>19</volume>:<fpage>e0309282</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0309282</pub-id> <pub-id pub-id-type="pmid">39186721</pub-id></mixed-citation></ref>
<ref id="B23"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Jeannotte</surname> <given-names>R.</given-names></name> <name><surname>Lee</surname> <given-names>E.</given-names></name> <name><surname>Kong</surname> <given-names>N.</given-names></name> <name><surname>Ng</surname> <given-names>W.</given-names></name></person-group> (<year>2014</year>). <source><italic>High-Throughput Analysis of Foodborne Bacterial Genomic DNA Using Agilent 2200 TapeStation and Genomic DNA ScreenTape System.</italic></source> <publisher-loc>Chennai</publisher-loc>: <publisher-name>Agilent Technologies</publisher-name>, <fpage>1</fpage>&#x2013;<lpage>7</lpage>.</mixed-citation></ref>
<ref id="B24"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Karp</surname> <given-names>P. D.</given-names></name> <name><surname>Billington</surname> <given-names>R.</given-names></name> <name><surname>Caspi</surname> <given-names>R.</given-names></name> <name><surname>Fulcher</surname> <given-names>C. A.</given-names></name> <name><surname>Latendresse</surname> <given-names>M.</given-names></name> <name><surname>Kothari</surname> <given-names>A.</given-names></name><etal/></person-group> (<year>2018</year>). <article-title>The BioCyc collection of microbial genomes and metabolic pathways.</article-title> <source><italic>Brief Bioinform.</italic></source> <volume>20</volume> <fpage>1085</fpage>&#x2013;<lpage>1093</lpage>. <pub-id pub-id-type="doi">10.1093/bib/bbx085</pub-id> <pub-id pub-id-type="pmid">29447345</pub-id></mixed-citation></ref>
<ref id="B25"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Katoh</surname> <given-names>K.</given-names></name> <name><surname>Standley</surname> <given-names>D. M.</given-names></name></person-group> (<year>2013</year>). <article-title>MAFFT multiple sequence alignment software version 7: improvements in performance and usability.</article-title> <source><italic>Mol. Biol. Evol.</italic></source> <volume>30</volume> <fpage>772</fpage>&#x2013;<lpage>780</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/mst010</pub-id> <pub-id pub-id-type="pmid">23329690</pub-id></mixed-citation></ref>
<ref id="B26"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kaufman</surname> <given-names>J. H.</given-names></name> <name><surname>Elkins</surname> <given-names>C. A.</given-names></name> <name><surname>Davis</surname> <given-names>M.</given-names></name> <name><surname>Weis</surname> <given-names>A. M.</given-names></name> <name><surname>Huang</surname> <given-names>B. C.</given-names></name> <name><surname>Mammel</surname> <given-names>M. K.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Insular microbiogeography: three pathogens as exemplars.</article-title> <source><italic>Curr. Issues Mol. Biols.</italic></source> <volume>68</volume>:<fpage>344</fpage>. <pub-id pub-id-type="doi">10.21775/cimb.036.089</pub-id> <pub-id pub-id-type="pmid">31596250</pub-id></mixed-citation></ref>
<ref id="B27"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Kong</surname> <given-names>N.</given-names></name> <name><surname>Ng</surname> <given-names>W.</given-names></name> <name><surname>Cai</surname> <given-names>L.</given-names></name> <name><surname>Leonardo</surname> <given-names>A.</given-names></name> <name><surname>Weimer</surname> <given-names>B. C.</given-names></name></person-group> (<year>2014</year>). <source><italic>Integrating the DNA Integrity Number (DIN) to Assess Genomic DNA (gDNA) Quality Control Using the Agilent 2200 TapeStation System.</italic></source> <publisher-loc>Chennai</publisher-loc>: <publisher-name>Aglient Technologies</publisher-name>.</mixed-citation></ref>
<ref id="B28"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Langmead</surname> <given-names>B.</given-names></name> <name><surname>Salzberg</surname> <given-names>S. L.</given-names></name></person-group> (<year>2012</year>). <article-title>Fast gapped-read alignment with Bowtie 2.</article-title> <source><italic>Nat. Methods</italic></source> <volume>9</volume> <fpage>357</fpage>&#x2013;<lpage>359</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.1923</pub-id> <pub-id pub-id-type="pmid">22388286</pub-id></mixed-citation></ref>
<ref id="B29"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Letunic</surname> <given-names>I.</given-names></name> <name><surname>Bork</surname> <given-names>P.</given-names></name></person-group> (<year>2021</year>). <article-title>Interactive tree of life (iTOL) v5: An online tool for phylogenetic tree display and annotation.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>49</volume> <fpage>W293</fpage>&#x2013;<lpage>W296</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkab301</pub-id> <pub-id pub-id-type="pmid">33885785</pub-id></mixed-citation></ref>
<ref id="B30"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>O&#x2019;Hara</surname> <given-names>K. C.</given-names></name> <name><surname>Pires</surname> <given-names>A. F. A.</given-names></name> <name><surname>Mart&#x00ED;nez-L&#x00F3;pez</surname> <given-names>B.</given-names></name></person-group> (<year>2021</year>). <article-title>Evaluating the association between climatic factors and sheep condemnations in the United States using cluster analysis and spatio-temporal modeling.</article-title> <source><italic>Prev. Vet. Med.</italic></source> <volume>191</volume>:<fpage>105342</fpage>. <pub-id pub-id-type="doi">10.1016/j.prevetmed.2021.105342</pub-id> <pub-id pub-id-type="pmid">33848741</pub-id></mixed-citation></ref>
<ref id="B31"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Page</surname> <given-names>A. J.</given-names></name> <name><surname>Cummins</surname> <given-names>C. A.</given-names></name> <name><surname>Hunt</surname> <given-names>M.</given-names></name> <name><surname>Wong</surname> <given-names>V. K.</given-names></name> <name><surname>Reuter</surname> <given-names>S.</given-names></name> <name><surname>Holden</surname> <given-names>M. T.</given-names></name><etal/></person-group> (<year>2015</year>). <article-title>Roary: rapid large-scale prokaryote pan genome analysis.</article-title> <source><italic>Bioinformatics</italic></source> <volume>31</volume> <fpage>3691</fpage>&#x2013;<lpage>3693</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btv421</pub-id> <pub-id pub-id-type="pmid">26198102</pub-id></mixed-citation></ref>
<ref id="B32"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Page</surname> <given-names>A. J.</given-names></name> <name><surname>Taylor</surname> <given-names>B.</given-names></name> <name><surname>Delaney</surname> <given-names>A. J.</given-names></name> <name><surname>Soares</surname> <given-names>J.</given-names></name> <name><surname>Seemann</surname> <given-names>T.</given-names></name> <name><surname>Keane</surname> <given-names>J. A.</given-names></name><etal/></person-group> (<year>2016</year>). <article-title>SNP-sites: rapid efficient extraction of SNPs from multi-FASTA alignments.</article-title> <source><italic>Microb. Genom.</italic></source> <volume>2</volume>:<fpage>e000056</fpage>. <pub-id pub-id-type="doi">10.1099/mgen.0.000056</pub-id> <pub-id pub-id-type="pmid">28348851</pub-id></mixed-citation></ref>
<ref id="B33"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pedersen</surname> <given-names>B. S.</given-names></name> <name><surname>Quinlan</surname> <given-names>A. R.</given-names></name></person-group> (<year>2018</year>). <article-title>Mosdepth: quick coverage calculation for genomes and exomes.</article-title> <source><italic>Bioinformatics</italic></source> <volume>34</volume> <fpage>867</fpage>&#x2013;<lpage>868</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btx699</pub-id> <pub-id pub-id-type="pmid">29096012</pub-id></mixed-citation></ref>
<ref id="B34"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pratt</surname> <given-names>S. M.</given-names></name> <name><surname>Spier</surname> <given-names>S. J.</given-names></name> <name><surname>Carroll</surname> <given-names>S. P.</given-names></name> <name><surname>Vaughan</surname> <given-names>B.</given-names></name> <name><surname>Whitcomb</surname> <given-names>M. B.</given-names></name> <name><surname>Wilson</surname> <given-names>W. D.</given-names></name></person-group> (<year>2005</year>). <article-title>Evaluation of clinical characteristics, diagnostic test results, and outcome in horses with internal infection caused by <italic>Corynebacterium pseudotuberculosis</italic>: 30 cases (1995-2003).</article-title> <source><italic>J. Am. Vet. Med. Assoc.</italic></source> <volume>227</volume> <fpage>441</fpage>&#x2013;<lpage>448</lpage>. <pub-id pub-id-type="doi">10.2460/javma.2005.227.441</pub-id> <pub-id pub-id-type="pmid">16121612</pub-id></mixed-citation></ref>
<ref id="B35"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Price</surname> <given-names>M. N.</given-names></name> <name><surname>Dehal</surname> <given-names>P. S.</given-names></name> <name><surname>Arkin</surname> <given-names>A. P.</given-names></name></person-group> (<year>2009</year>). <article-title>Fasttree: computing large minimum evolution trees with profiles instead of a distance matrix.</article-title> <source><italic>Mol. Biol. Evol.</italic></source> <volume>26</volume> <fpage>1641</fpage>&#x2013;<lpage>1650</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msp077</pub-id> <pub-id pub-id-type="pmid">19377059</pub-id></mixed-citation></ref>
<ref id="B36"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rebou&#x00E7;as</surname> <given-names>M. F.</given-names></name> <name><surname>Loureiro</surname> <given-names>D.</given-names></name> <name><surname>Barral</surname> <given-names>T. D.</given-names></name> <name><surname>Seyffert</surname> <given-names>N.</given-names></name> <name><surname>Raynal</surname> <given-names>J. T.</given-names></name> <name><surname>Sousa</surname> <given-names>T. J.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Cell wall glycolipids from <italic>Corynebacterium pseudotuberculosis</italic> strains with different virulences differ in terms of composition and immune recognition.</article-title> <source><italic>Braz. J. Microbiol.</italic></source> <volume>51</volume> <fpage>2101</fpage>&#x2013;<lpage>2110</lpage>. <pub-id pub-id-type="doi">10.1007/s42770-020-00343-9</pub-id> <pub-id pub-id-type="pmid">32712830</pub-id></mixed-citation></ref>
<ref id="B37"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rhodes</surname> <given-names>D. M.</given-names></name> <name><surname>Magdesian</surname> <given-names>K. G.</given-names></name> <name><surname>Byrne</surname> <given-names>B. A.</given-names></name> <name><surname>Kass</surname> <given-names>P. H.</given-names></name> <name><surname>Edman</surname> <given-names>J.</given-names></name> <name><surname>Spier</surname> <given-names>S. J.</given-names></name></person-group> (<year>2015</year>). <article-title>Minimum inhibitory concentrations of equine <italic>Corynebacterium pseudotuberculosis</italic> isolates (1996-2012).</article-title> <source><italic>J. Vet. Intern. Med.</italic></source> <volume>29</volume> <fpage>327</fpage>&#x2013;<lpage>332</lpage>. <pub-id pub-id-type="doi">10.1111/jvim.12534</pub-id> <pub-id pub-id-type="pmid">25586790</pub-id></mixed-citation></ref>
<ref id="B38"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rice</surname> <given-names>P.</given-names></name> <name><surname>Longden</surname> <given-names>L.</given-names></name> <name><surname>Bleasby</surname> <given-names>A.</given-names></name></person-group> (<year>2000</year>). <article-title>EMBOSS: the European molecular biology open software suite.</article-title> <source><italic>Trends Genet.</italic></source> <volume>16</volume> <fpage>276</fpage>&#x2013;<lpage>277</lpage>. <pub-id pub-id-type="doi">10.1016/s0168-9525(00)02024-2</pub-id> <pub-id pub-id-type="pmid">10827456</pub-id></mixed-citation></ref>
<ref id="B39"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Seemann</surname> <given-names>T.</given-names></name></person-group> (<year>2014</year>). <article-title>Prokka: rapid prokaryotic genome annotation.</article-title> <source><italic>Bioinformatics</italic></source> <volume>30</volume> <fpage>2068</fpage>&#x2013;<lpage>2069</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btu153</pub-id> <pub-id pub-id-type="pmid">24642063</pub-id></mixed-citation></ref>
<ref id="B40"><mixed-citation publication-type="web"><person-group person-group-type="author"><name><surname>Seemann</surname> <given-names>T.</given-names></name></person-group> (<year>2015</year>). <source><italic>Snippy - Rapid Haploid Variant Calling and Core Genome Alignment.</italic></source> <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://github.com/tseemann/snippy">https://github.com/tseemann/snippy</ext-link> (accessed March 9, 2020)</comment>.</mixed-citation></ref>
<ref id="B41"><mixed-citation publication-type="web"><person-group person-group-type="author"><name><surname>Seemann</surname> <given-names>T.</given-names></name></person-group> (<year>2020</year>). <source><italic>ABRicate: Mass Screening of Contigs for Antimicrobial Resistance or Virulence Genes.</italic></source> <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://github.com/tseemann/abricate">https://github.com/tseemann/abricate</ext-link> (accessed December 5, 2025)</comment>.</mixed-citation></ref>
<ref id="B42"><mixed-citation publication-type="web"><person-group person-group-type="author"><name><surname>Seemann</surname> <given-names>T.</given-names></name></person-group> (<year>2022</year>). <source><italic>Shovill: Assemble Bacterial Isolate Genomes from Illumina Paired-End Reads.</italic></source> <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://github.com/tseemann/shovill">https://github.com/tseemann/shovill</ext-link> (accessed December 13, 2025)</comment>.</mixed-citation></ref>
<ref id="B43"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shen</surname> <given-names>W.</given-names></name> <name><surname>Le</surname> <given-names>S.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Hu</surname> <given-names>F.</given-names></name></person-group> (<year>2016</year>). <article-title>SeqKit: a cross-platform and ultrafast toolkit for FASTA/Q file manipulation.</article-title> <source><italic>PLoS One</italic></source> <volume>11</volume>:<fpage>e0163962</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0163962</pub-id> <pub-id pub-id-type="pmid">27706213</pub-id></mixed-citation></ref>
<ref id="B44"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Snipen</surname> <given-names>L.</given-names></name> <name><surname>Liland</surname> <given-names>K. H.</given-names></name></person-group> (<year>2015</year>). <article-title>micropan: an R-package for microbial pan-genomics.</article-title> <source><italic>BMC Bioinformatics</italic></source> <volume>16</volume> <issue>79</issue>. <pub-id pub-id-type="doi">10.1186/s12859-015-0517-0</pub-id> <pub-id pub-id-type="pmid">25888166</pub-id></mixed-citation></ref>
<ref id="B45"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Soares</surname> <given-names>S. C.</given-names></name> <name><surname>Silva</surname> <given-names>A.</given-names></name> <name><surname>Trost</surname> <given-names>E.</given-names></name> <name><surname>Blom</surname> <given-names>J.</given-names></name> <name><surname>Ramos</surname> <given-names>R.</given-names></name> <name><surname>Carneiro</surname> <given-names>A.</given-names></name><etal/></person-group> (<year>2013</year>). <article-title>The pan-genome of the animal pathogen <italic>Corynebacterium pseudotuberculosis</italic> reveals differences in genome plasticity between the biovar ovis and equi strains.</article-title> <source><italic>PLoS One</italic></source> <volume>8</volume>:<fpage>e53818</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0053818</pub-id> <pub-id pub-id-type="pmid">23342011</pub-id></mixed-citation></ref>
<ref id="B46"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sousa</surname> <given-names>E. G.</given-names></name> <name><surname>Campos</surname> <given-names>G. M.</given-names></name> <name><surname>Viana</surname> <given-names>M. V. C.</given-names></name> <name><surname>Gomes</surname> <given-names>G. C.</given-names></name> <name><surname>Rodrigues</surname> <given-names>D. L. N.</given-names></name> <name><surname>Aburjaile</surname> <given-names>F. F.</given-names></name><etal/></person-group> (<year>2025</year>). <article-title>The research on the identification, taxonomy, and comparative genomics analysis of nine <italic>Bacillus velezensis</italic> strains significantly contributes to microbiology, genetics, bioinformatics, and biotechnology.</article-title> <source><italic>Front. Microbiol.</italic></source> <volume>16</volume>:<fpage>1544934</fpage>. <pub-id pub-id-type="doi">10.3389/fmicb.2025.1544934</pub-id> <pub-id pub-id-type="pmid">40177483</pub-id></mixed-citation></ref>
<ref id="B47"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Spier</surname> <given-names>S. J.</given-names></name></person-group> (<year>2008</year>). <article-title><italic>Corynebacterium pseudotuberculosis</italic> infection in horses: an emerging disease associated with climate change?</article-title> <source><italic>Equine Vet. Educ.</italic></source> <volume>20</volume> <fpage>37</fpage>&#x2013;<lpage>39</lpage>. <pub-id pub-id-type="doi">10.2746/095777307X260106</pub-id></mixed-citation></ref>
<ref id="B48"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Spier</surname> <given-names>S. J.</given-names></name> <name><surname>Toth</surname> <given-names>B.</given-names></name> <name><surname>Edman</surname> <given-names>J.</given-names></name> <name><surname>Quave</surname> <given-names>A.</given-names></name> <name><surname>Habasha</surname> <given-names>F.</given-names></name> <name><surname>Garrick</surname> <given-names>M.</given-names></name><etal/></person-group> (<year>2012</year>). <article-title>Survival of <italic>Corynebacterium pseudotuberculosis</italic> biovar equi in soil.</article-title> <source><italic>Vet. Rec.</italic></source> <volume>170</volume>:<fpage>180</fpage>. <pub-id pub-id-type="doi">10.1136/vr.100543</pub-id> <pub-id pub-id-type="pmid">22266682</pub-id></mixed-citation></ref>
<ref id="B49"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Szklarczyk</surname> <given-names>D.</given-names></name> <name><surname>Gable</surname> <given-names>A. L.</given-names></name> <name><surname>Nastou</surname> <given-names>K. C.</given-names></name> <name><surname>Lyon</surname> <given-names>D.</given-names></name> <name><surname>Kirsch</surname> <given-names>R.</given-names></name> <name><surname>Pyysalo</surname> <given-names>S.</given-names></name><etal/></person-group> (<year>2021</year>). <article-title>The STRING database in 2021: customizable protein-protein networks, and functional characterization of user-uploaded gene/measurement sets.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>49</volume> <fpage>D605</fpage>&#x2013;<lpage>D612</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkaa1074</pub-id> <pub-id pub-id-type="pmid">33237311</pub-id></mixed-citation></ref>
<ref id="B50"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tettelin</surname> <given-names>H.</given-names></name> <name><surname>Masignani</surname> <given-names>V.</given-names></name> <name><surname>Cieslewicz</surname> <given-names>M. J.</given-names></name> <name><surname>Donati</surname> <given-names>C.</given-names></name> <name><surname>Medini</surname> <given-names>D.</given-names></name> <name><surname>Ward</surname> <given-names>N. L.</given-names></name><etal/></person-group> (<year>2005</year>). <article-title>Genome analysis of multiple pathogenic isolates of Streptococcus agalactiae: implications for the microbial &#x201C;pan-genome&#x201D;.</article-title> <source><italic>Proc. Natl. Acad. Sci. U. S. A.</italic></source> <volume>102</volume> <fpage>13950</fpage>&#x2013;<lpage>13955</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.0506758102</pub-id> <pub-id pub-id-type="pmid">16172379</pub-id></mixed-citation></ref>
<ref id="B51"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Viana</surname> <given-names>M. V. C.</given-names></name> <name><surname>Figueiredo</surname> <given-names>H.</given-names></name> <name><surname>Ramos</surname> <given-names>R.</given-names></name> <name><surname>Guimar&#x00E3;es</surname> <given-names>L. C.</given-names></name> <name><surname>Pereira</surname> <given-names>F. L.</given-names></name> <name><surname>Dorella</surname> <given-names>F. A.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>Comparative genomic analysis between <italic>Corynebacterium pseudotuberculosis</italic> strains isolated from buffalo.</article-title> <source><italic>PLoS One</italic></source> <volume>12</volume>:<fpage>e0176347</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0176347</pub-id> <pub-id pub-id-type="pmid">28445543</pub-id></mixed-citation></ref>
<ref id="B52"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Weimer</surname> <given-names>B. C.</given-names></name></person-group> (<year>2017</year>). <article-title>100K pathogen genome project.</article-title> <source><italic>Genome Announc.</italic></source> <volume>5</volume>:<fpage>e0594-17</fpage>. <pub-id pub-id-type="doi">10.1128/genomeA.00594-17</pub-id> <pub-id pub-id-type="pmid">28705971</pub-id></mixed-citation></ref>
<ref id="B53"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Weis</surname> <given-names>A. M.</given-names></name> <name><surname>Clothier</surname> <given-names>K. A.</given-names></name> <name><surname>Huang</surname> <given-names>B. C.</given-names></name> <name><surname>Kong</surname> <given-names>N.</given-names></name> <name><surname>Weimer</surname> <given-names>B. C.</given-names></name></person-group> (<year>2016</year>). <article-title>Draft genome sequences of <italic>Campylobacter jejuni</italic> strains that cause abortion in livestock.</article-title> <source><italic>Genome Announc.</italic></source> <volume>4</volume> <fpage>1</fpage>&#x2013;<lpage>2</lpage>. <pub-id pub-id-type="doi">10.1128/genomea.01324-16</pub-id> <pub-id pub-id-type="pmid">27908990</pub-id></mixed-citation></ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by"><p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/302230/overview">Leonard Peruski</ext-link>, Wadsworth Center, United States</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by"><p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/525660/overview">Wanderson Marques Da Silva</ext-link>, National Scientific and Technical Research Council (CONICET), Argentina</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3192720/overview">Edith Ch&#x00E1;vez</ext-link>, Meritorious Autonomous University of Puebla, Mexico</p></fn>
</fn-group>
</back>
</article>