<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1228552</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2023.1228552</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Technology and Code</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>EasySSR: a user-friendly web application with full command-line features for large-scale batch microsatellite mining and samples comparison</article-title>
<alt-title alt-title-type="left-running-head">Alves et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fgene.2023.1228552">10.3389/fgene.2023.1228552</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Alves</surname>
<given-names>Sandy Ingrid Aguiar</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2286090/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ferreira</surname>
<given-names>Victor Benedito Costa</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2369813/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Dantas</surname>
<given-names>Carlos Willian Dias</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1975182/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Silva</surname>
<given-names>Artur Luiz da Costa da</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/891870/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Ramos</surname>
<given-names>Rommel Thiago Juc&#xe1;</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/425394/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Laboratory of Biological Engineering</institution>, <institution>Biological Science Institute</institution>, <institution>Park of Science and Technology</institution>, <institution>Federal University of Par&#xe1;</institution>, <addr-line>Bel&#xe9;m</addr-line>, <country>Brazil</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Institute of Biological Sciences</institution>, <institution>Federal University of Minas Gerais</institution>, <addr-line>Belo Horizonte</addr-line>, <country>Brazil</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/200612/overview">Nathan Olson</ext-link>, National Institute of Standards and Technology (NIST), United States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/559264/overview">Safdar Ali</ext-link>, Aliah University, India</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2228876/overview">Nancy Manchanda</ext-link>, Orna Therapeutics, Inc., United States</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Rommel Thiago Juc&#xe1; Ramos, <email>rommelramos@ufpa.br</email>
</corresp>
<fn fn-type="present-address" id="fn1">
<label>
<sup>&#x2020;</sup>
</label>
<p>
<bold>Present Address:</bold> Carlos Willian Dias Dantas, Laboratory of Biological Engineering, Biological Science Institute, Park of Science and Technology, Federal University of Par&#xe1;, Bel&#xe9;m, Brazil</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>24</day>
<month>08</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>14</volume>
<elocation-id>1228552</elocation-id>
<history>
<date date-type="received">
<day>25</day>
<month>05</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>28</day>
<month>07</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Alves, Ferreira, Dantas, Silva and Ramos.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Alves, Ferreira, Dantas, Silva and Ramos</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Microsatellites, also known as SSRs or STRs, are polymorphic DNA regions with tandem repetitions of a nucleotide motif of size 1&#x2013;6 base pairs with a broad range of applications in many fields, such as comparative genomics, molecular biology, and forensics. However, the majority of researchers do not have computational training and struggle while running command-line tools or very limited web tools for their SSR research, spending a considerable amount of time learning how to execute the software and conducting the post-processing data tabulation in other tools or manually&#x2014;time that could be used directly in data analysis. We present EasySSR, a user-friendly web tool with command-line full functionality, designed for practical use in batch identifying and comparing SSRs in sequences, draft, or complete genomes, not requiring previous bioinformatic skills to run. EasySSR requires only a FASTA and an optional GENBANK file of one or more genomes to identify and compare STRs. The tool can automatically analyze and compare SSRs in whole genomes, convert GenBank to PTT files, identify perfect and imperfect SSRs and coding and non-coding regions, compare their frequencies, abundancy, motifs, flanking sequences, and iterations, producing many outputs ready for download such as PTT files, interactive charts, and Excel tables, giving the user the data ready for further analysis in minutes. EasySSR was implemented as a web application, which can be executed from any browser and is available for free at <ext-link ext-link-type="uri" xlink:href="https://computationalbiology.ufpa.br/easyssr/">https://computationalbiology.ufpa.br/easyssr/</ext-link>. Tutorials, usage notes, and download links to the source code can be found at <ext-link ext-link-type="uri" xlink:href="https://github.com/engbiopct/EasySSR">https://github.com/engbiopct/EasySSR</ext-link>.</p>
</abstract>
<kwd-group>
<kwd>batch</kwd>
<kwd>genome</kwd>
<kwd>microsatellites</kwd>
<kwd>motifs</kwd>
<kwd>large scale</kwd>
<kwd>web tool</kwd>
<kwd>comparison</kwd>
<kwd>bioinformatics</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Computational Genomics</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Microsatellites, also known as Simple Sequence Repeats (SSRs) or Short Tandem Repeats (STRs), are polymorphic DNA regions with tandem repetitions of a nucleotide motif ranging 1&#x2013;6&#xa0;bp, also called mononucleotide, di-, tri-, tetra-, penta-, and hexanucleotide repeats (<xref ref-type="bibr" rid="B28">Pinheiro et al., 2022</xref>). They can be categorized into perfect, imperfect, and compound and are found in both coding and non-coding regions in eukaryotes, prokaryotes, and viruses (<xref ref-type="bibr" rid="B24">Mudunuri and Nagarajaram, 2007</xref>; <xref ref-type="bibr" rid="B1">Beier et al., 2017</xref>). The SSRs have various clinical implications and a broad range of applications in many fields, such as conservation and evolutionary studies, comparative genomics, molecular biology, biotechnology, oncology, and forensics (<xref ref-type="bibr" rid="B15">Laskar et al., 2022</xref>; <xref ref-type="bibr" rid="B28">Pinheiro et al., 2022</xref>).</p>
<p>With the application of computational approaches in biological data along with the advance of Next-Generation Sequencing technologies (NGS), many tools for SSR mining have been developed over the years, with IMEx (<xref ref-type="bibr" rid="B24">Mudunuri and Nagarajaram, 2007</xref>), MISA (<xref ref-type="bibr" rid="B1">Beier et al., 2017</xref>), TRF (<xref ref-type="bibr" rid="B2">Benson, 1999</xref>), and Repeat Masker (<xref ref-type="bibr" rid="B34">Tarailo&#x2010;Graovac and Chen, 2009</xref>) among the most popular and widely used tools, as reviewed by <xref ref-type="bibr" rid="B23">Mudunuri et al. (2010a)</xref>, <xref ref-type="bibr" rid="B17">Lim et al. (2013)</xref>, <xref ref-type="bibr" rid="B20">Mathur et al. (2020)</xref>.</p>
<p>However, many researchers need advanced computational training and therefore have difficulty using these tools as most of these tools: i) Need significant investment of time for the user to comprehend, install, and run those pieces of software; ii) Are command-line based without graphical interface; iii) Require device storage and dependencies for installation; iv) Have many parameters and dependencies that might confuse inexperienced users; v) Require specific file formats as input, e.g., PTT files, which are not easily obtainable for inexperienced users who would rather use FASTA and GenBank files; and vi) Are not available anymore, principally web servers. vii) Lastly, the few web tools still available are very limited in many aspects, such as the limited size of the input files, rare flexibilization of parameters, and the lack of identification of flanking sequences, downloadable outputs, post-processed graphical outputs, and features for online sample comparison, or they do not focus solely on Microsatellites motifs (1&#x2013;6&#xa0;bp) but also on other Tandem repeats such as Minisatellites (10&#x2013;30&#xa0;bp) and Satellites (&#x3e;100&#xa0;pb); indeed, in some cases, even if the web service does exist, the full functionality is restricted to the command-line version, limiting the online service to basic and small analysis (<xref ref-type="bibr" rid="B17">Lim et al., 2013</xref>).</p>
<p>In this way, many scientists end up choosing to use command-line tools for full functionality and spend a considerable amount of time learning how to install and execute the software, in addition to performing post-processing data tabulation on other tools or manually, instead of focusing more time on data analysis; thus, there is a need for a web application that can be an easy tool for online analysis that can do the same as command-line tools, filling in the gaps of other software without sacrificing the full-fledged and accurate results already obtained (<xref ref-type="bibr" rid="B26">Oliveira et al., 2008</xref>; <xref ref-type="bibr" rid="B28">Pinheiro et al., 2022</xref>).</p>
<p>Given these lacunae, we present EasySSR, an intuitive web tool that implements command-line IMEx versatile and accurate SSR mining with novel settings by automatizing the analysis from data input, converting individual files, and performing the post-processing analysis of the individual outputs, fully summarizing those data into statistics sheets and graphs available online for the user. It was designed for practical and intuitive use in batch identifying perfect and imperfect SSRs in large-scale data from one or many individual FASTA sequences, draft, or complete genomes, with full functionality and data visualization directly from the web without the need for any software installation, their dependencies, or complicated bioinformatic skills to run, giving the user results that can be easily interpreted, enabling even traditional non-bioinformatician scientists with limited computational experience and resources to use SSRs in their research (<xref ref-type="bibr" rid="B24">Mudunuri and Nagarajaram, 2007</xref>).</p>
</sec>
<sec sec-type="methods" id="s2">
<title>2 Methods</title>
<sec id="s2-1">
<title>2.1 Workflow and implementation</title>
<p>EasySSR is a web tool hosted in a standard Linux server, developed using the Django v4.1.7 framework (<xref ref-type="bibr" rid="B8">Django Software Foundation, 2023</xref>), based on the Python language v3.11, with information stored in a MariaDB database v10.10.2, and it executes several helper scripts in Python and Perl to automate the following summarized workflow in the back-end, as summarized in <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>EasySSR workflow from user input to output. <bold>(A)</bold> In input, EasySSR receives user information, user, and parameters. <bold>(B)</bold> In Step 1, it receives the input, verifies the data, and converts GENBANK to PTT files. <bold>(C)</bold> With each pair of FASTA files-PTT files ready, EasySSR starts Step 2 by analyzing every file with IMEx, repeating the process until all files have been processed. <bold>(D)</bold> Then, in Step 3, EasySSR processes all IMEX outputs, stores the data in a new project at the database, and processes the summarized data into sheets and charts. <bold>(E)</bold> The output is exhibited through a HTML page, and the data are made available for download.</p>
</caption>
<graphic xlink:href="fgene-14-1228552-g001.tif"/>
</fig>
<p>EasySSR receives the User Information&#x2014;User Project name (required), Email (optional); Input Files&#x2014;FASTA files (required), GENBANK files (optional); and Parameters&#x2014;Default or Custom when the user clicks the upload button. EasySSR uses secure HTTPS (Hypertext Transfer Protocol Secure) connections to transfer data between the client and the server. Step 1 starts when the files are uploaded. If the user uploaded GenBank files, the script verifies if every FASTA file has a corresponding GenBank annotation file and if both have the same filename with less than 35 characters. Then, it converts the GenBank files to PTT format through a script in Perl. If no GenBank file was uploaded, EasySSR considers everything as non-coding by default. In the web interface, the process from upload to GBK-PTT conversion is shown as Step 1 to the user. Step 2 starts with a script in Python for batch execution of IMEX v2.1 for each FASTA file. This step might be slower or faster depending on the size of the input files and the complexity of the annotation and the parameters. For Step 3, EasySSR scans the folders generated by IMEX, reads the IMEX TXT outputs, and records each result in the database created for that project. After extraction, the interactive charts and tables from SQL queries in the database are rendered for the web with a color-blind-friendly palette using the Chart and jQuery v3.6 JavaScript libraries with the DataTables plugin. The front-end of EasySSR was encoded with Bootstrap v4.0 and jQuery v3.6 libraries, generating user-appealing interfaces in the web interface and exhibiting the post-processed outputs in HTML format, which are available for download alongside the IMEX outputs. The project data are stored through a project ID in the EasySSR database for a month-long period.</p>
</sec>
<sec id="s2-2">
<title>2.2 Tool validation</title>
<p>In order to validate EasySSR, a web tool with full command-line functionalities that is suitable for large-scale comparative analysis, it was availed by three different perspectives: i) Firstly, to demonstrate the functionality of EasySSR against other web tools, it was compared with the most cited tools that have an active web service with a feature for the identification of Microsatellites. However, as the online tools do not support the analysis of SSRs in large datasets, and this is the main distinguished attribute of EasySSR, performance validation had to be executed in comparison with command-line tools. In this way, for ii), benchmark testing was used for two datasets previously validated by <xref ref-type="bibr" rid="B1">Beier et al. (2017)</xref>, <xref ref-type="bibr" rid="B24">Mudunuri and Nagarajaram (2007)</xref>, in order to measure the efficiency against the main similar software and their specific datasets, for both prokaryotes and eukaryotes, and with FASTA input only or both FASTA and GenBank. The first dataset had a homogeneous set of small artificial prokaryotic chromosomes used for benchmark EasySSR performance while running intraspecific analysis for perfect SSRs, using only FASTA files as input. The second dataset had a heterogeneous set of complete prokaryote genomes, eukaryotic chromosomes, and a human gene and was used for benchmark EasySSR performance while running interspecific analysis for imperfect SSRs, using both FASTA and GenBank files as input. ii) Lastly, to demonstrate EasySSR capacity to process large datasets of complete genomes, the program was executed with a dataset validated by <xref ref-type="bibr" rid="B28">Pinheiro et al. (2022)</xref>, for batch comparison of 54 whole genomes of <italic>Corynebacterium pseudotuberculosis</italic>, running interspecific analysis for perfect SSRs, using both FASTA and GenBank files as input.</p>
<sec id="s2-2-1">
<title>2.2.1 Function comparison against web tools</title>
<p>Many web services offer features for microsatellite mining. However, they are widely different in terms of functionality and the analysis, input, output content, and output return style (<xref ref-type="bibr" rid="B25">Mudunuri et al., 2010b</xref>). In this way, EasySSR was compared to other web tools in order to demonstrate the main functionalities that are common to them or exclusive to our tool. For this validation, six review articles were screened to discover web tools that have a feature for the identification of Microsatellites (<xref ref-type="bibr" rid="B16">Leclercq et al., 2007</xref>; <xref ref-type="bibr" rid="B30">Sharma et al., 2007</xref>; <xref ref-type="bibr" rid="B21">Merkel and Gemmell, 2008</xref>; <xref ref-type="bibr" rid="B25">Mudunuri et al., 2010b</xref>; <xref ref-type="bibr" rid="B17">Lim et al., 2013</xref>; <xref ref-type="bibr" rid="B20">Mathur et al., 2020</xref>). The publishing articles for each tool were analyzed in April 2023, and the platforms were tested through the links available in the articles to check if they were still active. If the tool was functional, the article citation rates were analyzed through Google Scholar, and these data alongside with the search link were tabulated. The 10 most cited web tools were used for features comparison against EasySSR. The features used for comparison were partially based on the ones analyzed by <xref ref-type="bibr" rid="B21">Merkel and Gemmell (2008)</xref>, <xref ref-type="bibr" rid="B25">Mudunuri et al. (2010b)</xref> in their articles. Besides the Citations and Author/Publishing Year, the following categories and features were used in this comparison: i) ANALYSIS: Microsatellite only, Maximum motif length, Perfect SSRs, Imperfect SSRs, Compound SSRs, Flexible Parameters, and Large-scale analysis; ii) INPUT: Limits Max, File Size, Analyze web of many whole genomes, Accepts multiple FASTA files, Integration with NCBI, and Box for cut-and-paste small sequences; iii) OUTPUT CONTENT: Text file, HTML file, PTT file, Coding/Non-coding, Flanking Sequences, Sample comparison sheets, and Sample comparison graphs; iv) OUTPUT RETURN: Web results, Email results, and Download results.</p>
</sec>
<sec id="s2-2-2">
<title>2.2.2 Benchmark testing against web servers and command-line tools</title>
<sec id="s2-2-2-1">
<title>2.2.2.1 Intraspecific analysis for perfect SSRs in prokaryotes, using only FASTA files as input with custom parameters</title>
<p>For this benchmark testing, the dataset employed by <xref ref-type="bibr" rid="B1">Beier et al. (2017)</xref> was used to validate Misa-Web, a set of small barley bacterial artificial chromosomes (BACs) available in the NCBI database under the accession numbers: AC256511.1 (113&#xa0;kb), AC257258.1 (124&#xa0;kb), AC259365.1 (118&#xa0;kb), AC261250.1 (91&#xa0;kb), AC263353.1 (33&#xa0;kb), AC264961.1 (126&#xa0;kb), AC265197.1 (113&#xa0;kb), AC266636.1 (167&#xa0;kb), AC267178.1 (121&#xa0;kb), and AC269605.1 (119&#xa0;kb). For this comparison, the sequence assemblies were obtained with the same version used in their original article, through their NCBI accession numbers, and analyzed for perfect SSRs. Only the FASTA files were used as input in the analysis as the annotation available in NCBI consists only of gaps and has no gene information. This dataset is also available at EasySSR webpage and GitHub as &#x201c;Dataset 1&#x2014;Misa.&#x201d;</p>
<p>The detected microsatellites and execution time of EasySSR were compared against tools that also have settings for perfect SSR search only, also known as Misa-mode, those being the web servers of MISA-web (<xref ref-type="bibr" rid="B1">Beier et al., 2017</xref>) and TRF web (<xref ref-type="bibr" rid="B2">Benson, 1999</xref>) and command-line tools ProGeRF (<xref ref-type="bibr" rid="B18">Lopes et al., 2015</xref>), GMATo (<xref ref-type="bibr" rid="B37">Wang et al., 2013</xref>), mreps (<xref ref-type="bibr" rid="B13">Kolpakov, 2003</xref>), and SciRoKo (<xref ref-type="bibr" rid="B12">Kofler et al., 2007</xref>). The analysis was executed with the same parameters as the original benchmark test: minimum repeat copy number - Mono:5, Di: 5, Tri: 5, Tetra: 5, Penta: 5, Hexa: 5); Imperfection and Mismatches&#x2013;0 (Perfect SSR only&#x2013;Misa mode); dMAX compound SSR&#x2013;0&#xa0;bp.</p>
</sec>
<sec id="s2-2-2-2">
<title>2.2.2.2 Interspecific analysis for imperfect SSR in prokaryotes and eukaryotes, using both FASTA and GenBank files as input, with custom parameters</title>
<p>For the second benchmark testing, the dataset validated by <xref ref-type="bibr" rid="B24">Mudunuri and Nagarajaram (2007)</xref> was used to validate IMEX 1.0 through the analysis of an interspecific sequence set composed of the human atrophin1 gene, 5&#xa0;kb (BC051795); two eukaryote chromosomes - <italic>Plasmodium falciparum</italic> chromosome IV, 1,193&#xa0;kb (NC_004318.1) and yeast chromosome IV, 1,518&#xa0;kb (NC_001136.8); and two complete prokaryote genomes - <italic>Mycobacterium tuberculosis</italic> H37Rv, 4,370&#xa0;kb (NC_000962.2) and <italic>Escherichia coli</italic> K12, 4,596&#xa0;kb (NC_000913.2). The sequences were obtained through their NCBI accession numbers, with the same version as their original article, downloaded as FASTA and GenBank annotation files, which were renamed to: (&#x201c;Ecoli_K12.fasta,&#x201d; &#x201c;Ecoli_K12.gb&#x201d;); (&#x201c;Human_Atrophin1.fasta,&#x201d; &#x201c;Human_Atrophin1.gb&#x201d;); (&#x201c;MTB_H37Rv.fasta,&#x201d; &#x201c;MTB_H37Rv.gb&#x201d;); (&#x201c;Plasmodium_Chr4.fasta,&#x201d; &#x201c;Plasmodium_Chr4.gb&#x201d;); and (&#x201c;Yeast_Chr4.fasta,&#x201d; &#x201c;Yeast_Chr4.fasta&#x201d;), in a way that both FASTA and GenBank have the same filename besides the extensions, and the filename has less than 35 characters. This dataset is also available at EasySSR webpage and GitHub as &#x201c;Dataset 2&#x2014;IMEx.&#x201d;</p>
<p>The detected microsatellites and execution time of EasySSR were compared against tools that also have settings for imperfect SSR search: TRF (<xref ref-type="bibr" rid="B2">Benson, 1999</xref>), IMEx 1.0 (<xref ref-type="bibr" rid="B24">Mudunuri and Nagarajaram, 2007</xref> original article data), IMEx 2.1 (<xref ref-type="bibr" rid="B23">Mudunuri et al., 2010a</xref>), and Sputnik (<xref ref-type="bibr" rid="B22">Morgante et al., 2002</xref>). The following parameters were used, those being the same ones applied by <xref ref-type="bibr" rid="B24">Mudunuri and Nagarajaram, 2007</xref>: minimum repeat copy number&#x2013;Mono:5, Di: 3, Tri: 2, Tetra: 2, Penta: 2, Hexa: 2, Imperfection of all tracts to 10%, mismatches - Mono: 1, Di: 1, Tri: 1, Tetra: 2, Penta: 2, Hexa: 3; with the additional parameters of dMAX cSSR of 0&#xa0;bp, 15&#xa0;bp for flanking sequences, and standardization level 3.</p>
</sec>
</sec>
<sec id="s2-2-3">
<title>2.2.3 Large-scale interspecific analysis for imperfect SSR, using both FASTA and GenBank files as input with default parameters</title>
<p>Differently from the benchmark tests, this comparison aimed to demonstrate the capacity of EasySSR to handle large datasets while being a versatile shortcut for online data analysis. For this, 54 complete genomes of <italic>C. pseudotuberculosis</italic> (CP) were selected, which have been previously studied by <xref ref-type="bibr" rid="B28">Pinheiro et al. (2022)</xref>, who also used IMEx 2.1 as the microsatellite mining tool. The sequences were obtained at NCBI through the accession numbers stated in <xref ref-type="table" rid="T4">Table 4</xref>, with the same version as the ones stated in the original article by <xref ref-type="bibr" rid="B28">Pinheiro et al. (2022)</xref>, and downloaded as FASTA and GenBank annotation files.</p>
<p>For this analysis, the dataset was processed in EasySSR with slightly different parameters, in custom mode and default mode. In general, the main parameters were the same for both analyses: Minimum Repeat Number&#x2013;Mono:12, Di: 6, Tri: 4, Tetra: 3, Penta: 3, Hexa: 3, flanking sequences of size 15&#xa0;bp, dMax compound of 0, Standardization level 3, extracting all types of SSR, and yes for identify coding/non-coding regions, generate alignment, and text outputs. However, the first analysis was conducted by searching for perfect SSRs only, with the same parameters as <xref ref-type="bibr" rid="B28">Pinheiro et al. (2022)</xref>, by using the custom parameters mode and setting the imperfection and mismatches as 0, expecting to have the same results as them. Then, the second analysis was conducted by searching for perfect and imperfect SSRs, using the EasySSR default parameters, which were also based on and adapted from <xref ref-type="bibr" rid="B28">Pinheiro et al. (2022)</xref>, but with Imperfection % - Mono: 10%, Di: 10%, Tri: 10%, Tetra: 10%, Penta: 10%, Hexa:10% and Mismatch in Pattern: Mono: 1; Di:1; Tri:1; Tetra:2; Penta:2; Hexa:2. The results were compared with <xref ref-type="bibr" rid="B28">Pinheiro et al. (2022)</xref> through the graphs and charts generated as the output of EasySSR.</p>
</sec>
</sec>
</sec>
<sec sec-type="results|discussion" id="s3">
<title>3 Results and discussion</title>
<sec id="s3-1">
<title>3.1 Tool overview</title>
<p>EasySSR is an intuitive web server designed in order to facilitate the SSR research, which does not require mandatory registration or work in any browser and is freely available to non-commercial users at <ext-link ext-link-type="uri" xlink:href="https://computationalbiology.ufpa.br/easyssr/">https://computationalbiology.ufpa.br/easyssr/</ext-link>(<xref ref-type="fig" rid="F2">Figure 2A</xref>), with tutorials, usage note, and source code available at <ext-link ext-link-type="uri" xlink:href="https://github.com/engbiopct/EasySSR">https://github.com/engbiopct/EasySSR</ext-link>.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>
<bold>(A)</bold> EasySSR input screen. <bold>(B)</bold> EasySSR loading screen.</p>
</caption>
<graphic xlink:href="fgene-14-1228552-g002.tif"/>
</fig>
<p>It offers many automatized extra features for data visualization and sample comparison, besides the IMEX sensitivity and its advanced functions to identify microsatellites, such as searching perfect microsatellites separately, getting the coding/non-coding information of the microsatellite tracts, generating alignments with consensus microsatellite tracts, restricting the imperfection limit for the repeat unit of each size, setting the imperfection percentage threshold of each repeat size, restricting the minimum number of repeat units of a tract of each size, searching for repeats of a particular size or all sizes, setting the flanking sequence size limit, and standardizing the repeats.</p>
<p>As for the automatized features unique to EasySSR, it can automatically convert GenBank to PTT files, it summarizes SSRs frequencies, abundancy, flanking sequences, and iterations of motifs, producing many outputs ready to download such as PTT files, IMEX HTML/TXT discover-friendly outputs, interactive charts, and summarized data/statistics Excel tables for comparison of the samples, giving the user the data ready for further analysis in a computationally feasible time. This reduces a significant amount of time worth of data tabulation, minimizing tedious manual operations and therefore decreasing the chance of errors.</p>
<p>As the information about compound SSRs is restricted to IMEX HTML files, this version of EasySSR does not include compound SSRs in the summary tables, including only their raw data of each file analyzed in the downloadable folder IMEx outputs, focusing their comparison on perfect and imperfect SSRs and their respective positions in coding/non-coding regions.</p>
<sec id="s3-1-1">
<title>3.1.1 Input files</title>
<p>EasySSR requires only a project name and one or more FASTA files containing nucleotide sequences or genomes (draft/complete) for the identification and comparison of STRs (<xref ref-type="fig" rid="F2">Figure 2A</xref>). If the user intends to identify coding/non-coding regions, a GENBANK file should also be uploaded for each FASTA file. Only the FASTA file is mandatory, whereas the GENBANK file is optional. When an annotation file is not uploaded, the algorithm will automatically assume that all sequences in the FASTA file are non-coding. However, with an annotation file, the algorithm will leverage the provided information to calculate the distribution of motifs in coding and non-coding regions. In the case of a multi-FASTA file input, EasySSR will identify SSRs, but the file will be treated and analyzed as a single draft genome. The algorithm treats each FASTA file as an independent genome, comparing them separately, and utilizes the input FASTA files filename as the sequence name in the EasySSR outputs. This web application uses secure HTTPS (Hypertext Transfer Protocol Secure) connections to transfer data between the client and the server, ensuring that the data are not intercepted during transmission and not used for purposes other than the intended analysis, with the project data being stored in the EasySSR database for a month-long period.</p>
</sec>
<sec id="s3-1-2">
<title>3.1.2 Default parameters</title>
<p>The tool runs with intuitive default or custom flexible parameters and has no limit size for input (<xref ref-type="fig" rid="F2">Figure 2A</xref>). In this way, users can load as many genomes as they want for their analysis, depending only on the computational structure available. The user does not need to input any parameter in the default parameters mode but, rather, just select this option and execute EasySSR. The preset default parameters are based on <xref ref-type="bibr" rid="B28">Pinheiro et al. (2022)</xref>: Repeat Number: 1&#x2013;12, 2&#x2013;6, 3&#x2013;4, 4&#x2013;3, 5-3, and 6&#x2013;3; adapted to allow the imperfection maximum of 10% with 1 or 2 mismatches: Imperfection % (p%): 1%&#x2013;10%, 2%&#x2013;10%, 3%&#x2013;10%, 4%&#x2013;10%, 5%&#x2013;10%, 6%&#x2013;10%; and Mismatch in Pattern: 1&#x2013;1; 2&#x2013;1; 3&#x2013;1; 4&#x2013;2; 5&#x2013;2; 6&#x2013;2. Maximum distance for compound SSR: 0 bp; Standardization Level: Level 3; Flanking Sequences: 15&#xa0;bp; Extract all SSR types, Generate Alignment, and Text Output: &#x201c;Yes.&#x201d; In this way, the user can easily write a project name, input the files to be analyzed, and press the &#x201c;Upload and Run&#x201d; button, as shown in <xref ref-type="fig" rid="F2">Figure 2A</xref>. The loading screen will be then exhibited, as demonstrated in <xref ref-type="fig" rid="F2">Figure 2B</xref>, until the analysis is complete.</p>
</sec>
<sec id="s3-1-3">
<title>3.1.3 Beyond the default parameters</title>
<p>EasySSR Custom mode (<xref ref-type="fig" rid="F3">Figure 3</xref>) enables users to adjust analysis parameters (A to J) based on preferences, with brief descriptions conveniently accessible via the information icon i). This user-friendly feature aids in selecting suitable values, empowering customization to specific requirements. The only mandatory fields for user input in Custom mode are from A to D: (A) Mismatches; (B) Imperfection %. To restrict the analysis to perfect SSR only, also known as Misa-mode, the user can define all the settings in parameters (A) and (B) to 0; (C) Minimum Repeat Number; and (D) Size of Flanking Sequences. The other parameters, from (E) to (J), can be used as the preset: (E) Generate Alignment and (F) Generate Text output are fixed in YES since EasySSR processes those files to generate the summarized outputs, charts, and tables; (G) Identify Coding Regions is preset as YES but can be set as NO; (H) Maximum distance for Compound SSR is preset at 0 but can be set from &#x2212;1 to 100; (I) Standardization level is preset at 3 but can be set as 0, 1, 2, 3, or F; (J) SSR types to extract is preset at 0 to extract all SSR types, but users can set from 1 to 6 to extract only a type of SSR.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Custom parameters interface.</p>
</caption>
<graphic xlink:href="fgene-14-1228552-g003.tif"/>
</fig>
</sec>
<sec id="s3-1-4">
<title>3.1.4 Outputs</title>
<p>After the analysis, the web page is updated automatically, and the EasySSR reports page is exhibited (<xref ref-type="fig" rid="F4">Figure 4</xref>). The user can see a blue button to download the report folder in ZIP format, containing both the files used for input (FASTA, GenBank, and the generated PTT) and the complete IMEX output files for each genome individually, in HTML and TEXT formats comprising summary, align, results, and statistics about compound, perfect, and imperfect.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Easy SSR output screen part 1, with time of analysis, download report folder, and donut comparison charts. Demonstration of EasySSR Reports from the batch comparison of perfect and imperfect SSR in 54 complete genomes of <italic>Corynebacterium pseudotuberculosis</italic> with gene annotation.</p>
</caption>
<graphic xlink:href="fgene-14-1228552-g004.tif"/>
</fig>
<p>Back to the EasySSR Reports interface, the user has 07 interactive donut charts with the comparative analysis of total motifs, perfect, and imperfect proportions, total of perfect SSR per motif class, total of imperfect SSR per motif class, proportion of perfect motifs in coding/non-coding regions, proportion of imperfect motifs in coding/non-coding regions, and the general comparison of SSR in coding/non-coding regions (<xref ref-type="fig" rid="F4">Figure 4</xref>). It also plots 02 interactive bar charts containing the top 10 SSR motifs present in the genomes analyzed (<xref ref-type="fig" rid="F5">Figure 5</xref>). The first stacked bar chart (<xref ref-type="fig" rid="F5">Figure 5A</xref>) depicts the frequency distribution of the motif iterations present in all the analyzed genomes. In contrast, the second chart (<xref ref-type="fig" rid="F5">Figure 5B</xref>) represents the frequency distribution of the motifs across the genomes. The x-axis displays the frequency of the motif (<xref ref-type="fig" rid="F5">Figure 5B</xref>) and motif iteration (<xref ref-type="fig" rid="F5">Figure 5A</xref>) in each genome. At the same time, the stacked bars represent the absolute frequency of the motif (<xref ref-type="fig" rid="F5">Figure 5B</xref>) and motif iteration (<xref ref-type="fig" rid="F5">Figure 5A</xref>) across all genomes. The y-axis ranks the motif (<xref ref-type="fig" rid="F5">Figure 5B</xref>) and motif iterations (<xref ref-type="fig" rid="F5">Figure 5A</xref>) from highest to lowest based on their frequency and presence in the genomes. The top of the y-axis corresponds to the motif (<xref ref-type="fig" rid="F5">Figure 5B</xref>) and motif iteration (<xref ref-type="fig" rid="F5">Figure 5B</xref>) that is present in the highest number of genomes and has the highest absolute frequency in the stacked bar.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Easy SSR output screen part 2, from the large-scale analysis and comparison of perfect and imperfect SSR in 54 complete genomes of <italic>Corynebacterium pseudotuberculosis</italic> with gene annotation. <bold>(A)</bold> Interactive stacked bar chart summarizing the top 10 motifs with iteration present in most genomes, with their frequency per genome. <bold>(B)</bold> Interactive stacked bar chart summarizing the top 10 motifs present in most genomes, with their frequency per genome.</p>
</caption>
<graphic xlink:href="fgene-14-1228552-g005.tif"/>
</fig>
<p>In addition to the charts, EasySSR analysis includes three tables with filters and search options (<xref ref-type="fig" rid="F6">Figure 6</xref>). The first table (<xref ref-type="fig" rid="F6">Figure 6A</xref>) provides data on each motif, including its iterations, Genome, Left Flanking, Right Flanking, Start, and End positions. The second table, Frequency of Motifs per Genome (<xref ref-type="fig" rid="F6">Figure 6B</xref>), has been created to enhance the representation of motif frequency distribution across the different genomes. It offers a detailed count of each motif&#x2019;s occurrence in the genomes and a &#x201c;total&#x201d; column indicating the number of genomes in which each motif is present. This addition offers a more comprehensive and user-friendly view of the data. The third table is the statistic table (<xref ref-type="fig" rid="F6">Figure 6C</xref>). It contains various summarized quantitative data about the perfect and imperfect SSRs identified in each genome. These statistics include the genome size, total SSR count, percentage proportion of SSRs per base pair (calculated using the formula &#x3d; [(SSR&#x2a;100)/genome_size)], total SSR in Coding/Non-coding regions, total SSR per motif class, and subgroup analyses of perfect/imperfect and coding/non-coding SSRs.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Easy SSR output screen part 3, from the large-scale analysis and comparison of perfect and imperfect SSRs in 54 complete genomes of <italic>Corynebacterium pseudotuberculosis</italic>, with gene annotation. <bold>(A)</bold> Data table, <bold>(B)</bold> Frequency of Motifs per Genome table, and <bold>(C)</bold> Statistics table ordered by sequence name.</p>
</caption>
<graphic xlink:href="fgene-14-1228552-g006.tif"/>
</fig>
<p>These data are available for individual download. The plotted charts are in PNG/JPEG format and the tables in CSV, Excel (.xlsx), and PDF formats, also with the copy/print options. The user can save the EasySSR Reports HTML page using their browser option or write down the project number to consult within a month.</p>
</sec>
</sec>
<sec id="s3-2">
<title>3.2 Tool validation</title>
<sec id="s3-2-1">
<title>3.2.1 Function comparison against web tools</title>
<p>Web-tools for microsatellite mining are important as they simplify the search and analysis of microsatellite data; they do not require an investment of time for the user to install and run the software, neither do they require device storage and dependencies for installation (<xref ref-type="bibr" rid="B31">Sousa et al., 2018</xref>). Plenty of web tools have been released over time, but many accession links available in the articles are not functional totally or partially anymore, as is the case with ATRhunter (<xref ref-type="bibr" rid="B38">Wexler et al., 2004</xref>), Tandem Swan (<xref ref-type="bibr" rid="B4">Boeva et al., 2006</xref>), STRING (<xref ref-type="bibr" rid="B27">Parisi et al., 2003</xref>), MICAS and IMEx web (<xref ref-type="bibr" rid="B32">Sreenu, 2003</xref>), MsatFinder (<xref ref-type="bibr" rid="B36">Thurston and Field, 2005</xref>), RISA (<xref ref-type="bibr" rid="B11">Kim et al., 2012</xref>), and LSAT (<xref ref-type="bibr" rid="B3">Biswas et al., 2018</xref>). The web tools still available have a variety of specific features but are very limited in many aspects in comparison to command-line tools. After analyzing the citation rates and checking their availability, we defined the top 10 most-cited SSR web tools that were still operational in April 2023: TRF web (<xref ref-type="bibr" rid="B2">Benson, 1999</xref>), Repeat Masker web (<xref ref-type="bibr" rid="B34">Tarailo-Graovac and Chen, 2009</xref>), Misa-Web (<xref ref-type="bibr" rid="B39">Yang et al., 2018</xref>), Batch Primer3 (<xref ref-type="bibr" rid="B40">You et al., 2008</xref>), Mreps (<xref ref-type="bibr" rid="B13">Kolpakov, 2003</xref>), Websat (<xref ref-type="bibr" rid="B19">Martins et al., 2009</xref>), SSR Locator (<xref ref-type="bibr" rid="B5">da Maia et al., 2008</xref>), STAR (<xref ref-type="bibr" rid="B7">Delgrange and Rivals, 2004</xref>), Imperfect SSR Finder (<xref ref-type="bibr" rid="B33">Stieneke and Eujayl, 2007</xref>), and PolyMorph Predict (<xref ref-type="bibr" rid="B6">Das et al., 2019</xref>), Their features were compared with EasySSR and summarized in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Web tool&#x2019;s function comparison made with EasySSR and the most-cited top 10 web tools available in April 2023. </p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Name</th>
<th align="left">EasySSR</th>
<th align="left">TRF web</th>
<th align="left">Repeat masker web</th>
<th align="left">Misa-web</th>
<th align="left">Batch Primer3</th>
<th align="left">Mreps</th>
<th align="left">Websat</th>
<th align="left">SSR locator</th>
<th align="left">STAR</th>
<th align="left">Imperfect SSR finder</th>
<th align="left">PolyMorph predict&#x2a;</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Citations</td>
<td align="left">This article</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://scholar.google.com/scholar?cites=2912855801419178739&amp;as_sdt=2005&amp;sciodt=0,5&amp;hl=pt-BR">7077</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://scholar.google.com/scholar?cites=523366199556217498&amp;as_sdt=2005&amp;sciodt=0,5&amp;hl=pt-BR">1860</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://scholar.google.com/scholar?cites=16992411000514526523&amp;as_sdt=2005&amp;sciodt=0,5&amp;hl=pt-BR">927</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://scholar.google.com/scholar?cites=7067527376692946995&amp;as_sdt=2005&amp;sciodt=0,5&amp;hl=pt-BR">909</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://scholar.google.com/scholar?cites=7808444266556719865&amp;as_sdt=2005&amp;sciodt=0,5&amp;hl=pt-BR">459</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://scholar.google.com/scholar?cites=16509104044085592718&amp;as_sdt=2005&amp;sciodt=0,5&amp;hl=pt-BR">348</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://scholar.google.com/scholar?cites=2029704038553916697&amp;as_sdt=2005&amp;sciodt=0,5&amp;hl=pt-BR">262</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://scholar.google.com/scholar?cites=6796575892001734157&amp;as_sdt=2005&amp;sciodt=0,5&amp;hl=pt-BR">137</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://scholar.google.com/scholar?cites=14208575714414624570&amp;as_sdt=2005&amp;sciodt=0,5&amp;hl=pt-BR">11</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://scholar.google.com/scholar?cites=17152688138726382804&amp;as_sdt=2005&amp;sciodt=0,5&amp;hl=pt-BR">10</ext-link>
</td>
</tr>
<tr>
<td align="left">Author/Year</td>
<td align="left">This article</td>
<td align="left">Benson 1999</td>
<td align="left">Smit 1996 apud Tarailo-Graovac 2009</td>
<td align="left">Beier 2017</td>
<td align="left">You 2008</td>
<td align="left">Kolpakov 2003</td>
<td align="left">Martins 2009</td>
<td align="left">Da Maia 2008</td>
<td align="left">Delgrange 2004</td>
<td align="left">Stieneke 2007</td>
<td align="left">Das 2019</td>
</tr>
<tr>
<td colspan="12" align="center">ANALYSIS</td>
</tr>
<tr>
<td align="left">Microsatellites only</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
</tr>
<tr>
<td align="left">Maximum motif length</td>
<td align="left">1&#x2013;6&#xa0;pb</td>
<td align="left">1&#x2013;2000&#xa0;pb</td>
<td align="left">No limit</td>
<td align="left">1&#x2013;6&#xa0;pb</td>
<td align="left">2&#x2013;6&#xa0;pb</td>
<td align="left">No limit</td>
<td align="left">1&#x2013;6&#xa0;pb</td>
<td align="left">2&#x2013;10&#xa0;pb</td>
<td align="left">No limit</td>
<td align="left">2&#x2013;10&#xa0;pb</td>
<td align="left">1&#x2013;6&#xa0;pb</td>
</tr>
<tr>
<td align="left">Perfect SSRs</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
</tr>
<tr>
<td align="left">Imperfect SSRs</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">No</td>
</tr>
<tr>
<td align="left">Compound SSRs</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
</tr>
<tr>
<td align="left">Flexible Parameters</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">No</td>
</tr>
<tr>
<td align="left">Large-scale analysis</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
</tr>
<tr>
<td colspan="12" align="center">INPUT</td>
</tr>
<tr>
<td align="left">Limits Max. File Size</td>
<td align="left">No</td>
<td align="left">10&#xa0;Mb</td>
<td align="left">10&#xa0;Mb</td>
<td align="left">2&#xa0;Mb</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">150&#xa0;kb</td>
<td align="left">No</td>
<td align="left">1&#xa0;Mb</td>
<td align="left">No</td>
<td align="left">No</td>
</tr>
<tr>
<td align="left">Analyze web of many whole genomes</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
</tr>
<tr>
<td align="left">Accepts multiple FASTA files</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
</tr>
<tr>
<td align="left">Integration with NCBI</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
</tr>
<tr>
<td align="left">Box for cut and paste small sequences</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">No</td>
</tr>
<tr>
<td colspan="12" align="center">OUTPUT CONTENT</td>
</tr>
<tr>
<td align="left">Text file</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
</tr>
<tr>
<td align="left">HTML file</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">No</td>
</tr>
<tr>
<td align="left">PTT file</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
</tr>
<tr>
<td align="left">Coding/Non-coding</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
</tr>
<tr>
<td align="left">Flanking Sequences</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
</tr>
<tr>
<td align="left">Sample comparison sheets</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
</tr>
<tr>
<td align="left">Sample comparison graphs</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">Yes</td>
</tr>
<tr>
<td colspan="12" align="center">OUTPUT RETURN</td>
</tr>
<tr>
<td align="left">Web results</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">No</td>
</tr>
<tr>
<td align="left">Email results</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">Yes</td>
</tr>
<tr>
<td align="left">Download results</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">No</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
<td align="left">Yes</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>&#x201c;Yes&#x201d; to facilitate easier identification of tools that possess the specific feature.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>The main limitations observed were the limited size of the input files, rare flexibilization of parameters, and the lack of identification of flanking sequences, downloadable outputs, summarized and post-processed graphical outputs, and features for online sample comparison, and that there is no exclusive focus on Microsatellites motifs (1&#x2013;6&#xa0;bp) but also on other Tandem repeats such as Minisatellites (10&#x2013;30&#xa0;bp) and Satellites (&#x3e;100&#xa0;pb). In some cases, even if the web service does exist, the full functionality is restricted to the command-line version, limiting the online service to basic and small analysis.</p>
<p>TRF (<xref ref-type="bibr" rid="B2">Benson, 1999</xref>) and Repeats Masker (<xref ref-type="bibr" rid="B34">Tarailo-Graovac and Chen, 2009</xref>) are by far the most used tools, according to the citation rate. Alongside Mreps (<xref ref-type="bibr" rid="B13">Kolpakov, 2003</xref>) and STAR (<xref ref-type="bibr" rid="B7">Delgrange and Rivals, 2004</xref>), they are tools that are not limited to microsatellites but aim to identify all tandem repeats, including other types such as Minisatellites and Satellites. STAR is a tool focused on locating a given motif in a DNA sequence, instead of screening all motifs like the other Tandem Repeat tools (<xref ref-type="bibr" rid="B7">Delgrange and Rivals, 2004</xref>). To individuals who need to focus just on microsatellites, SSR-specific web applications such as EasySSR, Misa-web (<xref ref-type="bibr" rid="B1">Beier et al., 2017</xref>), Websat (<xref ref-type="bibr" rid="B19">Martins et al., 2009</xref>), SSR Locator (<xref ref-type="bibr" rid="B5">da Maia et al., 2008</xref>), and Imperfect SSR finder (<xref ref-type="bibr" rid="B33">Stieneke and Eujayl, 2007</xref>) may be more appropriate due to their specific range of motifs.</p>
<p>Batch Primer3 (<xref ref-type="bibr" rid="B40">You et al., 2008</xref>), Websat (<xref ref-type="bibr" rid="B19">Martins et al., 2009</xref>), and Polymorph predict (<xref ref-type="bibr" rid="B6">Das et al., 2019</xref>), in contrast to EasySSR, have integrated the primer design function. Nevertheless, at the time this work was being produced, Polymorph predict (<xref ref-type="bibr" rid="B6">Das et al., 2019</xref>) was malfunctioning by running only their native sample data (&#x201c;Chromosome 2&#x201d;) instead of the user input. Websat (<xref ref-type="bibr" rid="B19">Martins et al., 2009</xref>) restricts accepting input files containing more than 150,000 characters. Furthermore, its primary focus lies in designing primers for a limited number of manually selected SSRs, making it unsuitable for users needing comprehensive, automated online analysis on a large scale, a capability provided by BatchPrimer3 and EasySSR. BatchPrimer3 (<xref ref-type="bibr" rid="B40">You et al., 2008</xref>) functions well for large-scale primer analysis and SSR screening because the output is a list containing the identified SSRs and their respective flanking primers with details, statistics, and outputs in HTML, Text file, and Excel, but it does not analyze imperfect and compound SSRs, nor does it determine whether they are in coding or non-coding regions, and it does not perform online sample comparison like EasySSR.</p>
<p>The command-line version of Misa (<xref ref-type="bibr" rid="B35">Thiel et al., 2003</xref>; <xref ref-type="bibr" rid="B1">Beier et al., 2017</xref>) is a versatile tool that provides analysis of perfect and compound SSRs, being one of the gold standards in SSR mining. Many tools, such as Polymorph predict (<xref ref-type="bibr" rid="B6">Das et al., 2019</xref>), integrate Misa in their analysis, while others write additional advanced scripts to process Misa outputs, such as <xref ref-type="bibr" rid="B9">Galasso and Ponzoni (2015)</xref>. However, many of the applications are limited to computational experts who can develop scripts or at least comprehend how to execute them in the command-line. For non-experienced users, command-line tools are not as user-friendly as online services. Misa also has a web-server, but it does not provide the user all the features and capabilities of the command line, accepting only a single file with a maximum size of 2&#xa0;Mb as input. Unfortunately, many users may find this to be a significant impediment to their research because a single prokaryote genome may be larger than 2&#xa0;Mb. Misa-web results are two files: raw SSR data and statistics, not shown on a web interface but instead transmitted over email. On the other hand, EasySSR is able to process many genomes in a single run, with no maximum or minimum size limit, and summarize and compare them. It analyzes not only perfect and compound SSRs but also imperfect SSRs, offering the user the flexibility to include or exclude imperfects from their SSR mining. By running IMEX (<xref ref-type="bibr" rid="B24">Mudunuri and Nagarajaram, 2007</xref>) for SSR identification, EasySSR has the same or greater accuracy than Misa, as shown through the benchmark tests in <xref ref-type="table" rid="T2">Table 2</xref>. Furthermore, EasySSR is a web-based service that offers more functionalities with the same analysis as command-line tools, identifies coding/non-coding regions, and performs the post-processing and data comparison instead of giving the user only the raw data as output.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Comparison of detected perfect microsatellites and execution time (in seconds) of SSR tools analyzed by Beier 2017 and EasySSR.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Sequence</th>
<th align="left">GMATo</th>
<th align="left">TRF</th>
<th align="left">Mreps</th>
<th align="left">SciRoKo</th>
<th align="left">ProGeRF</th>
<th align="left">MISA-web</th>
<th align="left">EasySSR</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">AC256511.1 (113&#xa0;kb)</td>
<td align="left">549</td>
<td align="left">580</td>
<td align="left">56</td>
<td align="left">549</td>
<td align="left">560</td>
<td align="left">549</td>
<td align="left">588</td>
</tr>
<tr>
<td align="left">AC257258.1 (124&#xa0;kb)</td>
<td align="left">938</td>
<td align="left">943</td>
<td align="left">85</td>
<td align="left">938</td>
<td align="left">901</td>
<td align="left">938</td>
<td align="left">984</td>
</tr>
<tr>
<td align="left">AC259365.1 (118&#xa0;kb)</td>
<td align="left">641</td>
<td align="left">666</td>
<td align="left">76</td>
<td align="left">641</td>
<td align="left">628</td>
<td align="left">641</td>
<td align="left">666</td>
</tr>
<tr>
<td align="left">AC261250.1 (91&#xa0;kb)</td>
<td align="left">498</td>
<td align="left">457</td>
<td align="left">60</td>
<td align="left">498</td>
<td align="left">456</td>
<td align="left">498</td>
<td align="left">529</td>
</tr>
<tr>
<td align="left">AC263353.1 (33&#xa0;kb)</td>
<td align="left">153</td>
<td align="left">173</td>
<td align="left">&#x2013;</td>
<td align="left">153</td>
<td align="left">142</td>
<td align="left">153</td>
<td align="left">167</td>
</tr>
<tr>
<td align="left">AC264961.1 (126&#xa0;kb)</td>
<td align="left">654</td>
<td align="left">620</td>
<td align="left">&#x2013;</td>
<td align="left">654</td>
<td align="left">605</td>
<td align="left">654</td>
<td align="left">728</td>
</tr>
<tr>
<td align="left">AC265197.1 (113&#xa0;kb)</td>
<td align="left">505</td>
<td align="left">496</td>
<td align="left">44</td>
<td align="left">505</td>
<td align="left">503</td>
<td align="left">505</td>
<td align="left">549</td>
</tr>
<tr>
<td align="left">AC266636.1 (167&#xa0;kb)</td>
<td align="left">839</td>
<td align="left">865</td>
<td align="left">79</td>
<td align="left">839</td>
<td align="left">811</td>
<td align="left">839</td>
<td align="left">861</td>
</tr>
<tr>
<td align="left">AC267178.1 (121&#xa0;kb)</td>
<td align="left">517</td>
<td align="left">530</td>
<td align="left">46</td>
<td align="left">516</td>
<td align="left">496</td>
<td align="left">517</td>
<td align="left">540</td>
</tr>
<tr>
<td align="left">AC269605.1 (119&#xa0;kb)</td>
<td align="left">728</td>
<td align="left">676</td>
<td align="left">76</td>
<td align="left">728</td>
<td align="left">700</td>
<td align="left">728</td>
<td align="left">762</td>
</tr>
<tr>
<td align="left">Sum</td>
<td align="left">6,022</td>
<td align="left">6,006</td>
<td align="left">522</td>
<td align="left">6,021</td>
<td align="left">5,802</td>
<td align="left">6,022</td>
<td align="left">6,374</td>
</tr>
<tr>
<td align="left">Execute time per batch (seconds)</td>
<td align="left">7.5</td>
<td align="left">30.7</td>
<td align="left">1.2</td>
<td align="left">0.6</td>
<td align="left">21</td>
<td align="left">1.8</td>
<td align="left">5</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Among the webtools, Imperfect SSR finder (<xref ref-type="bibr" rid="B33">Stieneke and Eujayl, 2007</xref>) and EasySSR are the only ones to be able to analyze perfect, imperfect, and compound SSR. However, even though Imperfect SSR finder has no cap for input size, it does not accept more than one FASTA file, does not compare samples, has no information in the output about flanking sequences or the SSR position in coding non-coding regions, and does not generate user-friendly outputs as charts.</p>
<p>An overall comparison of EasySSR and the most-cited 10 web tools for SSR mining shows that EasySSR clearly distinguishes itself by being a web tool that accepts for input both multi-FASTA and multiple FASTA files, in the same run, without a maximum size limit. Among all web tools, EasySSR is the only one to have the same features as command-line tools, being able to identify coding/non-coding information if an annotation file is uploaded, compare large datasets, and return processed outputs for online or local analyses.</p>
</sec>
<sec id="s3-2-2">
<title>3.2.2 Benchmark testing against web servers and command-line tools</title>
<sec id="s3-2-2-1">
<title>3.2.2.1 Intraspecific analysis for perfect SSR in prokaryotes, using only FASTA files as input</title>
<p>The benchmark results of this analysis are summarized in <xref ref-type="table" rid="T2">Table 2</xref>. <xref ref-type="bibr" rid="B1">Beier et al. (2017)</xref> did not include IMEX results in their comparison with Misa-Web because they reportedly could not execute the tool command-line mode due to operating system incompatibility. However, in the current analysis with EasySSR, a web tool that is IMEX based, the number of SSRs identified was greater than Misa-web, GMATo, Mreps, SciRoKo, ProGeRF, and TRF, and the analysis was conducted within the average time taken by the other programs, demonstrating that our algorithm has equal or higher sensibility with the same parameters, giving the user the outputs already processed in charts and tables in 5&#xa0;s, as demonstrated through <xref ref-type="fig" rid="F7">Figure 7</xref>, with interactive and detailed results.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Demonstration of EasySSR Reports from the batch comparison of perfect SSRs in 10 BAC genomes without gene annotation. <bold>(A)</bold> EasySSR comparison charts with graphs for imperfect SSRs are blank due to the parameters set for mining perfect SSRs only, and coding/non-coding graphs are all in one color because no annotation file was input <bold>(B)</bold> EasySSR statistics table reports in web mode, with all coding information as 0 because no annotation file was input.</p>
</caption>
<graphic xlink:href="fgene-14-1228552-g007.tif"/>
</fig>
<p>Besides the raw amount of perfect SSR found, the EasySSR statistics table (<xref ref-type="fig" rid="F7">Figure 7B</xref>) also gives the user categorized information about how many of the microsatellites found were Mono, Di, Tri, Tetra, Penta, and Hexanucleotide motifs. This information is also summarized visually into the graphs (<xref ref-type="fig" rid="F7">Figure 7A</xref>). In <xref ref-type="fig" rid="F7">Figure 7A</xref>, it is possible to notice that the graphs for imperfect SSRs are blank, due to the parameters set that searched for perfect SSR only. Moreover, in <xref ref-type="fig" rid="F7">Figure 7A</xref>, the charts to compare the position of SSRs in coding/non-coding appear all in the same color, indicating that all SSRs were found in non-coding regions. This happens when no annotation file is uploaded by the user, in a way that the algorithm is set to consider everything in the FASTA file as non-coding by default.</p>
</sec>
<sec id="s3-2-2-2">
<title>3.2.2.2 Interspecific analysis for imperfect SSR in prokaryotes and eukaryotes, using both FASTA and GenBank files as input</title>
<p>The benchmark test was carried out by running the &#x201c;Dataset 2&#x2014;IMEx&#x201d; through the software tools EasySSR, TRF (<xref ref-type="bibr" rid="B2">Benson, 1999</xref>), Sputnik (<xref ref-type="bibr" rid="B22">Morgante et al., 2002</xref>), IMEx 1.0, and IMEx 2.1 (<xref ref-type="bibr" rid="B24">Mudunuri and Nagarajaram, 2007</xref>; <xref ref-type="bibr" rid="B23">Mudunuri et al., 2010a</xref>). We ran both versions of the IMEx program to compare the findings to version 1.0 tested in the article. <xref ref-type="table" rid="T3">Table 3</xref> summarizes the findings, which were consistent with Mudunuri&#x2019;s original 2007 article.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Comparison of detected microsatellites and execution time (in seconds) of SSR tools analyzed by <xref ref-type="bibr" rid="B24">Mudunuri and Nagarajaram (2007)</xref>, IMEX 2.1, and EasySSR.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Sequence</th>
<th align="left">TRF</th>
<th align="left">Sputnik</th>
<th align="left">IMEx 1.0 (2007)</th>
<th align="left">Imex 2.1 (2023)</th>
<th align="left">EasySSR</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Yeast Chr4 (1,531&#xa0;Kb)</td>
<td align="left">7308</td>
<td align="left">2,831</td>
<td align="left">39,759</td>
<td align="left">40,239</td>
<td align="left">40,239</td>
</tr>
<tr>
<td align="left">Plasmodium Chr4 (1,204&#xa0;Kb)</td>
<td align="left">25,601</td>
<td align="left">10,810</td>
<td align="left">54,232</td>
<td align="left">55,693</td>
<td align="left">55,693</td>
</tr>
<tr>
<td align="left">MTB H37Rv (4,411&#xa0;Kb)</td>
<td align="left">16,439</td>
<td align="left">9,412</td>
<td align="left">111,113</td>
<td align="left">111,583</td>
<td align="left">111,583</td>
</tr>
<tr>
<td align="left">Human Atrophin 1 (4,43&#xa0;Kb)</td>
<td align="left">50</td>
<td align="left">19</td>
<td align="left">146</td>
<td align="left">146</td>
<td align="left">146</td>
</tr>
<tr>
<td align="left">
<italic>E.coli</italic> K12 (4,639&#xa0;Kb)</td>
<td align="left">12,043</td>
<td align="left">5,387</td>
<td align="left">105,392</td>
<td align="left">106,243</td>
<td align="left">106,243</td>
</tr>
<tr>
<td align="left">Sum</td>
<td align="left">61,441</td>
<td align="left">28,459</td>
<td align="left">310,642</td>
<td align="left">313,904</td>
<td align="left">313,904</td>
</tr>
<tr>
<td align="left">Execute time per batch (seconds)</td>
<td align="left">108.5</td>
<td align="left">402.5</td>
<td align="left">30.8</td>
<td align="left">51.7</td>
<td align="left">72.0</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>IMEX 1.0 had already exceeded TRF and Sputnik in terms of sensibility and time since the 2007 article (<xref ref-type="bibr" rid="B24">Mudunuri and Nagarajaram, 2007</xref>). Many features were added to IMEX 2.1, which increased the analysis time slightly, although it is still less than the other tools evaluated. EasySSR is an online application that uses IMEx 2.1 for SSR mining; therefore, it has the same sensibility as this software and performs additional data analysis and output processing with friendly outputs on the web. Due to Internet speed and computational availability, EasySSR online analysis may be slightly slower than the standalone command-line IMEx 2.1; however, it still easily surpassed command-line TRF and Sputnik in terms of sensitivity and time benchmarks (<xref ref-type="table" rid="T3">Table 3</xref>). EasySSR compensates for any additional processing time spent by the automated results with post-processed information, saving the user time that would otherwise be spent during data tabulation and analysis.</p>
<p>As this analysis was conducted including imperfect and perfect SSRs and providing the GenBank annotation file as well, EasySSR outputs provided all the information in the graphics and tables regarding SSRs and their position in coding and non-coding regions, as demonstrated in <xref ref-type="fig" rid="F8">Figure 8</xref>. In this way, besides the raw IMEx outputs, which are also available for download in the EasySSR outputs page for further analysis, the user can easily know the comparative proportion through the interactive charts for the whole sample of SSRs by coding/non-coding regions or motif classes, as perfect SSR, imperfect SSR, and in total (<xref ref-type="fig" rid="F8">Figure 8A</xref>). The user can also run EasySSR with a single file per time in order to obtain individual charts for each genome.</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Demonstration of EasySSR Reports from the batch comparison of perfect and imperfect SSR in five sequences with gene annotation: human atrophin1 gene, Plasmodium falciparum chromosome IV, yeast chromosome IV, <italic>Mycobacterium tuberculosis</italic> H37Rv, and <italic>Escherichia coli</italic> K12. <bold>(A)</bold> Comparison charts and <bold>(B)</bold> statistics table reports in print mode.</p>
</caption>
<graphic xlink:href="fgene-14-1228552-g008.tif"/>
</fig>
<p>
<xref ref-type="fig" rid="F8">Figure 8B</xref> depicts the &#x201c;print&#x201d; version of the statistics table, which is also available through a button on the EasySSR reports page alongside the &#x201c;excel,&#x201d; &#x201c;csv,&#x201d; &#x201c;pdf,&#x201d; and &#x201c;copy&#x201d; alternative buttons that can be seen in <xref ref-type="fig" rid="F7">Figure 7B</xref>. In this mode, the viewer can get a panoramic view, which includes extra columns that were previously hidden behind the scroll bar in the visualization. Because only perfect SSR were studied in the previous analysis, there was no need to split the total SSR into perfect and imperfect. However, because imperfection is now considered, more columns must be examined. The statistics table contains comprehensive information encompassing the total number of SSRs, along with subtotals for perfect and imperfect SSRs, coding and non-coding classifications, and the proportions of the motifs (<xref ref-type="fig" rid="F8">Figure 8B</xref>).</p>
</sec>
</sec>
<sec id="s3-2-3">
<title>3.2.3 Large-scale interspecific analysis for imperfect SSR, using both FASTA and GenBank files as input with default parameters</title>
<p>EasySSR was run two times for the dataset containing 54 complete genomes of <italic>C. pseudotuberculosis (CP)</italic>: i) With custom parameters, mining perfect SSR only, and ii) With default parameters, mining both perfect and imperfect SSR.</p>
<p>With EasySSR, which also runs IMEx as the microsatellite mining tool, it was possible to locate all SSR in coding and non-coding regions and to visualize the proportion through charts (<xref ref-type="fig" rid="F4">Figures 4</xref>, <xref ref-type="fig" rid="F5">5</xref>) or generate new charts from the data available in the EasySSR statistic, motif frequency, and summary tables (<xref ref-type="fig" rid="F6">Figure 6</xref>). The analysis for perfect SSR only was completed within 5&#xa0;min and 38&#xa0;s (<xref ref-type="fig" rid="F9">Figure 9</xref>), while the analysis for perfect and imperfect took 8&#xa0;min and 41&#xa0;s (<xref ref-type="fig" rid="F4">Figure 4</xref>). The complete output datasheets for perfect SSR and perfect/imperfect analysis of the 54 complete genomes of <italic>C. pseudotuberculosis</italic> are available in <xref ref-type="sec" rid="s10">Supplementary Table S1</xref>.</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>Easy SSR output screen from the large-scale analysis and comparison of perfect SSR in 54 complete genomes of <italic>Corynebacterium pseudotuberculosis</italic> with gene annotation. <bold>(A)</bold> Comparison charts and <bold>(B)</bold> statistics table reports ordered by total SSR.</p>
</caption>
<graphic xlink:href="fgene-14-1228552-g009.tif"/>
</fig>
<p>The EasySSR quantitative results for perfect SSR were in concordance with those stated by <xref ref-type="bibr" rid="B28">Pinheiro et al. (2022)</xref>, as demonstrated in <xref ref-type="table" rid="T4">Table 4</xref>, and the current analysis included further comparison of the motif classes proportions. In total, 2,891 perfect SSR, 2,613 in coding regions, and 278 in non-coding regions were found, with 30 mono, 11 di, 1,301 tri, 1,201 tetra, 189 penta, and 159 hexanucleotides as proportions demonstrated in <xref ref-type="fig" rid="F9">Figure 9A</xref> and with data and accession numbers available in <xref ref-type="table" rid="T4">Table 4</xref> ordered by sequence name. The genomes had an average incidence of 53,5 perfect SSRs. Most genomes have less than 57 SSRs, ranging from 48 (CP_262, <italic>equi biovar</italic>) to 57. CP_258, CP_38, CP_CIP and CP_MEX30 (<italic>equi biovar</italic>), were the only ones to have 57 perfect SSR, however the distribution of those microsatellites is not the same in all four sequences. As shown in <xref ref-type="fig" rid="F9">Figure 9B</xref>, in CP_258 and CP_CIP, their distribution pattern (Simple Sequence Repeats Signature) is 49 SSR in coding to 8 SSR in non-coding regions, with 0 mono, 0 di, 25 tri, 23 tetra, 6 penta, and 3 hexanucleotides in both strains. Meanwhile, the distribution for CP_38 (2.33731&#xa0;mb) is 47 coding/10 non-coding, with 0 mono, 2 di, 24 tri, 23 tetra, 6 penta, and 2 hexanucleotides, while the distribution for CP_MEX30 (2.33751&#xa0;mb) was 50 coding/7 non-coding, 3 mono, 1 di, 24 tri, 24 tetra, 6 penta, and 3 hexanucleotides.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Perfect microsatellite identified for 54 complete genomes of <italic>Corynebacterium pseudotuberculosis</italic>.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Sequence</th>
<th align="left">Accession</th>
<th align="left">Biovar</th>
<th align="left">Size (Mb)</th>
<th align="left">Total PerfectSSR</th>
<th align="left">Total coding</th>
<th align="left">Total non-coding</th>
<th align="left">Mono</th>
<th align="left">Di</th>
<th align="left">Tri</th>
<th align="left">Tetra</th>
<th align="left">Penta</th>
<th align="left">Hexa</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">CP_04MAT</td>
<td align="left">CP036469.1</td>
<td align="left">Ovis</td>
<td align="left">2.33801</td>
<td align="left">53</td>
<td align="left">49</td>
<td align="left">4</td>
<td align="left">1</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">22</td>
<td align="left">3</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_1002B</td>
<td align="left">CP012837.1</td>
<td align="left">Ovis</td>
<td align="left">2.33831</td>
<td align="left">54</td>
<td align="left">49</td>
<td align="left">5</td>
<td align="left">2</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">22</td>
<td align="left">3</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_106A</td>
<td align="left">CP003082.1</td>
<td align="left">Equi</td>
<td align="left">2.33835</td>
<td align="left">54</td>
<td align="left">48</td>
<td align="left">6</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">21</td>
<td align="left">6</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_226</td>
<td align="left">CP010889.1</td>
<td align="left">Ovis</td>
<td align="left">2.33783</td>
<td align="left">53</td>
<td align="left">50</td>
<td align="left">3</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">25</td>
<td align="left">21</td>
<td align="left">3</td>
<td align="left">4</td>
</tr>
<tr>
<td align="left">CP_258</td>
<td align="left">CP003540.3</td>
<td align="left">Equi</td>
<td align="left">2.33749</td>
<td align="left">57</td>
<td align="left">49</td>
<td align="left">8</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">25</td>
<td align="left">23</td>
<td align="left">6</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_262</td>
<td align="left">CP012022.2</td>
<td align="left">Equi</td>
<td align="left">2.33757</td>
<td align="left">48</td>
<td align="left">44</td>
<td align="left">4</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">22</td>
<td align="left">23</td>
<td align="left">1</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">CP_267</td>
<td align="left">CP003407.1</td>
<td align="left">Ovis</td>
<td align="left">2.33790</td>
<td align="left">54</td>
<td align="left">50</td>
<td align="left">4</td>
<td align="left">1</td>
<td align="left">0</td>
<td align="left">25</td>
<td align="left">21</td>
<td align="left">3</td>
<td align="left">4</td>
</tr>
<tr>
<td align="left">CP_29156</td>
<td align="left">CP010795.2</td>
<td align="left">Ovis</td>
<td align="left">2.33775</td>
<td align="left">53</td>
<td align="left">50</td>
<td align="left">3</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">25</td>
<td align="left">21</td>
<td align="left">3</td>
<td align="left">4</td>
</tr>
<tr>
<td align="left">CP_31</td>
<td align="left">CP003421.4</td>
<td align="left">Equi</td>
<td align="left">2.33727</td>
<td align="left">53</td>
<td align="left">47</td>
<td align="left">6</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">23</td>
<td align="left">4</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">CP_316</td>
<td align="left">CP003077.2</td>
<td align="left">Equi</td>
<td align="left">2.33750</td>
<td align="left">52</td>
<td align="left">48</td>
<td align="left">4</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">23</td>
<td align="left">2</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_32</td>
<td align="left">CP015183.1</td>
<td align="left">Equi</td>
<td align="left">2.33730</td>
<td align="left">55</td>
<td align="left">47</td>
<td align="left">8</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">23</td>
<td align="left">6</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">CP_33</td>
<td align="left">CP015184.1</td>
<td align="left">Equi</td>
<td align="left">2.33729</td>
<td align="left">55</td>
<td align="left">47</td>
<td align="left">8</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">23</td>
<td align="left">6</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">CP_34</td>
<td align="left">CP015192.1</td>
<td align="left">Equi</td>
<td align="left">2.33733</td>
<td align="left">55</td>
<td align="left">47</td>
<td align="left">8</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">23</td>
<td align="left">6</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">CP_35</td>
<td align="left">CP015185.1</td>
<td align="left">Equi</td>
<td align="left">2.33732</td>
<td align="left">55</td>
<td align="left">47</td>
<td align="left">8</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">23</td>
<td align="left">6</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">CP_36</td>
<td align="left">CP015186.1</td>
<td align="left">Equi</td>
<td align="left">2.33734</td>
<td align="left">54</td>
<td align="left">46</td>
<td align="left">8</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">23</td>
<td align="left">23</td>
<td align="left">6</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">CP_38</td>
<td align="left">CP015187.1</td>
<td align="left">Equi</td>
<td align="left">2.33731</td>
<td align="left">57</td>
<td align="left">47</td>
<td align="left">10</td>
<td align="left">0</td>
<td align="left">2</td>
<td align="left">24</td>
<td align="left">23</td>
<td align="left">6</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">CP_38MAT</td>
<td align="left">CP036457.1</td>
<td align="left">Ovis</td>
<td align="left">2.33771</td>
<td align="left">53</td>
<td align="left">48</td>
<td align="left">5</td>
<td align="left">2</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">21</td>
<td align="left">3</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_39</td>
<td align="left">CP015188.1</td>
<td align="left">Equi</td>
<td align="left">2.33728</td>
<td align="left">56</td>
<td align="left">47</td>
<td align="left">9</td>
<td align="left">0</td>
<td align="left">1</td>
<td align="left">24</td>
<td align="left">23</td>
<td align="left">6</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">CP_43</td>
<td align="left">CP015189.1</td>
<td align="left">Equi</td>
<td align="left">2.33756</td>
<td align="left">56</td>
<td align="left">46</td>
<td align="left">10</td>
<td align="left">0</td>
<td align="left">2</td>
<td align="left">23</td>
<td align="left">23</td>
<td align="left">6</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">CP_46</td>
<td align="left">CP015190.1</td>
<td align="left">Equi</td>
<td align="left">2.33755</td>
<td align="left">56</td>
<td align="left">46</td>
<td align="left">10</td>
<td align="left">0</td>
<td align="left">2</td>
<td align="left">23</td>
<td align="left">23</td>
<td align="left">6</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">CP_48</td>
<td align="left">CP015191.1</td>
<td align="left">Equi</td>
<td align="left">2.33735</td>
<td align="left">55</td>
<td align="left">46</td>
<td align="left">9</td>
<td align="left">0</td>
<td align="left">1</td>
<td align="left">23</td>
<td align="left">23</td>
<td align="left">6</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">CP_Cap1W</td>
<td align="left">CP034411.1</td>
<td align="left">Ovis</td>
<td align="left">2.33817</td>
<td align="left">53</td>
<td align="left">49</td>
<td align="left">4</td>
<td align="left">1</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">22</td>
<td align="left">3</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_CAP3W</td>
<td align="left">CP026500.1</td>
<td align="left">Ovis</td>
<td align="left">2.33818</td>
<td align="left">52</td>
<td align="left">49</td>
<td align="left">3</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">22</td>
<td align="left">3</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_CAPJ4</td>
<td align="left">CP026499.1</td>
<td align="left">Ovis</td>
<td align="left">2.33808</td>
<td align="left">53</td>
<td align="left">49</td>
<td align="left">4</td>
<td align="left">1</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">22</td>
<td align="left">3</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_CAPMI03</td>
<td align="left">CP035717.1</td>
<td align="left">Ovis</td>
<td align="left">2.33812</td>
<td align="left">51</td>
<td align="left">48</td>
<td align="left">3</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">23</td>
<td align="left">22</td>
<td align="left">3</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_CIP</td>
<td align="left">CP003061.3</td>
<td align="left">Equi</td>
<td align="left">2.33748</td>
<td align="left">57</td>
<td align="left">49</td>
<td align="left">8</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">25</td>
<td align="left">23</td>
<td align="left">6</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_Cp162</td>
<td align="left">CP003652.3</td>
<td align="left">Equi</td>
<td align="left">2.33736</td>
<td align="left">50</td>
<td align="left">47</td>
<td align="left">3</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">22</td>
<td align="left">23</td>
<td align="left">2</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_E19</td>
<td align="left">CP012136.1</td>
<td align="left">Equi</td>
<td align="left">2.33753</td>
<td align="left">52</td>
<td align="left">49</td>
<td align="left">3</td>
<td align="left">1</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">22</td>
<td align="left">2</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_E55</td>
<td align="left">CP014341.1</td>
<td align="left">Ovis</td>
<td align="left">2.33829</td>
<td align="left">55</td>
<td align="left">51</td>
<td align="left">4</td>
<td align="left">2</td>
<td align="left">0</td>
<td align="left">25</td>
<td align="left">23</td>
<td align="left">2</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_I19</td>
<td align="left">CP002251.3</td>
<td align="left">Ovis</td>
<td align="left">2.33821</td>
<td align="left">54</td>
<td align="left">51</td>
<td align="left">3</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">25</td>
<td align="left">22</td>
<td align="left">3</td>
<td align="left">4</td>
</tr>
<tr>
<td align="left">CP_I37</td>
<td align="left">CP017384.1</td>
<td align="left">Equi</td>
<td align="left">2.33742</td>
<td align="left">51</td>
<td align="left">47</td>
<td align="left">4</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">23</td>
<td align="left">22</td>
<td align="left">3</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_MB11</td>
<td align="left">CP013260.2</td>
<td align="left">Equi</td>
<td align="left">2.33741</td>
<td align="left">52</td>
<td align="left">48</td>
<td align="left">4</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">23</td>
<td align="left">2</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_MB14</td>
<td align="left">CP013261.1</td>
<td align="left">Equi</td>
<td align="left">2.33740</td>
<td align="left">53</td>
<td align="left">49</td>
<td align="left">4</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">25</td>
<td align="left">23</td>
<td align="left">2</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_MB20</td>
<td align="left">CP016829.1</td>
<td align="left">Equi</td>
<td align="left">2.33739</td>
<td align="left">54</td>
<td align="left">50</td>
<td align="left">4</td>
<td align="left">1</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">24</td>
<td align="left">2</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_MB30</td>
<td align="left">CP013262.2</td>
<td align="left">Equi</td>
<td align="left">2.33752</td>
<td align="left">52</td>
<td align="left">48</td>
<td align="left">4</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">23</td>
<td align="left">2</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_MB66</td>
<td align="left">CP013263.1</td>
<td align="left">Equi</td>
<td align="left">2.33737</td>
<td align="left">53</td>
<td align="left">49</td>
<td align="left">4</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">24</td>
<td align="left">2</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_MEX1</td>
<td align="left">CP017711.1</td>
<td align="left">Ovis</td>
<td align="left">2.33827</td>
<td align="left">51</td>
<td align="left">47</td>
<td align="left">4</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">21</td>
<td align="left">3</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_MEX2</td>
<td align="left">CP046644.1</td>
<td align="left">Ovis</td>
<td align="left">2.33809</td>
<td align="left">51</td>
<td align="left">47</td>
<td align="left">4</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">21</td>
<td align="left">3</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_MEX25</td>
<td align="left">CP013697.1</td>
<td align="left">Ovis</td>
<td align="left">2.33813</td>
<td align="left">55</td>
<td align="left">50</td>
<td align="left">5</td>
<td align="left">1</td>
<td align="left">0</td>
<td align="left">26</td>
<td align="left">21</td>
<td align="left">3</td>
<td align="left">4</td>
</tr>
<tr>
<td align="left">CP_MEX29</td>
<td align="left">CP016826.1</td>
<td align="left">Ovis</td>
<td align="left">2.33780</td>
<td align="left">55</td>
<td align="left">51</td>
<td align="left">4</td>
<td align="left">1</td>
<td align="left">0</td>
<td align="left">25</td>
<td align="left">22</td>
<td align="left">3</td>
<td align="left">4</td>
</tr>
<tr>
<td align="left">CP_MEX30</td>
<td align="left">CP017291.1</td>
<td align="left">Equi</td>
<td align="left">2.33751</td>
<td align="left">57</td>
<td align="left">50</td>
<td align="left">7</td>
<td align="left">3</td>
<td align="left">1</td>
<td align="left">24</td>
<td align="left">24</td>
<td align="left">2</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_MEX31</td>
<td align="left">CP017292.1</td>
<td align="left">Equi</td>
<td align="left">2.33754</td>
<td align="left">54</td>
<td align="left">48</td>
<td align="left">6</td>
<td align="left">0</td>
<td align="left">2</td>
<td align="left">24</td>
<td align="left">23</td>
<td align="left">2</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_OVID04</td>
<td align="left">CP035640.1</td>
<td align="left">Ovis</td>
<td align="left">2.33810</td>
<td align="left">51</td>
<td align="left">48</td>
<td align="left">3</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">21</td>
<td align="left">3</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_OVIOS02</td>
<td align="left">CP035679.1</td>
<td align="left">Ovis</td>
<td align="left">2.33793</td>
<td align="left">53</td>
<td align="left">49</td>
<td align="left">4</td>
<td align="left">1</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">22</td>
<td align="left">3</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_OVIZ01</td>
<td align="left">CP035678.1</td>
<td align="left">Ovis</td>
<td align="left">2.33781</td>
<td align="left">52</td>
<td align="left">48</td>
<td align="left">4</td>
<td align="left">1</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">21</td>
<td align="left">3</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_PA01</td>
<td align="left">CP013327.1</td>
<td align="left">Ovis</td>
<td align="left">2.33777</td>
<td align="left">53</td>
<td align="left">49</td>
<td align="left">4</td>
<td align="left">1</td>
<td align="left">0</td>
<td align="left">25</td>
<td align="left">21</td>
<td align="left">3</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_PA02</td>
<td align="left">CP015309.1</td>
<td align="left">Ovis</td>
<td align="left">2.33834</td>
<td align="left">51</td>
<td align="left">48</td>
<td align="left">3</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">23</td>
<td align="left">22</td>
<td align="left">3</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_PA04</td>
<td align="left">CP019587.1</td>
<td align="left">Ovis</td>
<td align="left">2.33773</td>
<td align="left">56</td>
<td align="left">48</td>
<td align="left">8</td>
<td align="left">5</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">21</td>
<td align="left">3</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_PA07</td>
<td align="left">CP024457.1</td>
<td align="left">Ovis</td>
<td align="left">2.33820</td>
<td align="left">51</td>
<td align="left">48</td>
<td align="left">3</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">21</td>
<td align="left">3</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_PAT10</td>
<td align="left">CP002924.1</td>
<td align="left">Ovis</td>
<td align="left">2.33830</td>
<td align="left">56</td>
<td align="left">51</td>
<td align="left">5</td>
<td align="left">2</td>
<td align="left">0</td>
<td align="left">25</td>
<td align="left">22</td>
<td align="left">3</td>
<td align="left">4</td>
</tr>
<tr>
<td align="left">CP_PAT14</td>
<td align="left">CP047603.1</td>
<td align="left">Ovis</td>
<td align="left">2.33825</td>
<td align="left">54</td>
<td align="left">51</td>
<td align="left">3</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">25</td>
<td align="left">22</td>
<td align="left">3</td>
<td align="left">4</td>
</tr>
<tr>
<td align="left">CP_PAT16</td>
<td align="left">CP046641.1</td>
<td align="left">Ovis</td>
<td align="left">2.33815</td>
<td align="left">54</td>
<td align="left">51</td>
<td align="left">3</td>
<td align="left">0</td>
<td align="left">0</td>
<td align="left">25</td>
<td align="left">22</td>
<td align="left">3</td>
<td align="left">4</td>
</tr>
<tr>
<td align="left">CP_PO22241</td>
<td align="left">CP013698.1</td>
<td align="left">Ovis</td>
<td align="left">2.33816</td>
<td align="left">53</td>
<td align="left">49</td>
<td align="left">4</td>
<td align="left">1</td>
<td align="left">0</td>
<td align="left">25</td>
<td align="left">21</td>
<td align="left">3</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">CP_PO2695</td>
<td align="left">CP012695.1</td>
<td align="left">Ovis</td>
<td align="left">2.33826</td>
<td align="left">54</td>
<td align="left">49</td>
<td align="left">5</td>
<td align="left">2</td>
<td align="left">0</td>
<td align="left">24</td>
<td align="left">22</td>
<td align="left">3</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">Total</td>
<td align="left">54</td>
<td align="left">Ovis &#x3d; 28; Equi &#x3d; 26</td>
<td align="left">-</td>
<td align="left">2,891</td>
<td align="left">2,613</td>
<td align="left">278</td>
<td align="left">30</td>
<td align="left">11</td>
<td align="left">1,301</td>
<td align="left">1,201</td>
<td align="left">189</td>
<td align="left">159</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In the analysis where imperfect microsatellites were allowed, the Simple Sequence Repeats Signature changed. The total of the SSRs identified was 68,942 SSR, 60,390 in coding regions, and 8,552 in non-coding regions, with 50 mono, 4,268 di, 37,411 tri, 23,025 tetra, 2,524 penta, and 1,664 hexanucleotides, with a proportion of 2,146 perfect SSRs to 66,796 imperfect SSRs (<xref ref-type="fig" rid="F4">Figure 4</xref>). The genomes had an average incidence of 40 perfect SSRs and 1,237 imperfect SSRs per genome, as shown in the data summarized in <xref ref-type="fig" rid="F6">Figures 6</xref>, <xref ref-type="fig" rid="F10">10</xref> through different visualization modes, with <xref ref-type="fig" rid="F6">Figure 6B</xref> representing the output as shown in the EasySSR output page and <xref ref-type="fig" rid="F10">Figure 10</xref> showing the complete table ordered by sequence name for better comparison with <xref ref-type="table" rid="T4">Table 4</xref> (Perfect SSRs output). The perfect SSRs found ranges from 33 (CP_262, <italic>equi biovar</italic>) to 44 (CP_PAT10, <italic>ovis biovar</italic>). CP_258, CP_CIP, CP_38, and CP_MEX30 had, respectively 40, 40, 38, and 43 perfect SSRs. The distribution of perfect SSRs was the same in CP_258 and CP_CIP with Mono: 0; Di: 0; Tri: 18; Tetra: 17; Penta: 2; and Hexa: 3. It is possible to notice that when mismatches were allowed in a tract, EasySSR through the IMEx algorithm could extend tracts that were previously interrupted by an imperfection and considered as perfect because it had passed the repetition cutoff when they were actually part of longer imperfect tracts; thus, the average amount of perfect SSRs per genome decreased from 53.5 to 40 in the analysis that included imperfections.</p>
<fig id="F10" position="float">
<label>FIGURE 10</label>
<caption>
<p>Demonstration of EasySSR Reports from the batch comparison of perfect and imperfect SSR in 54 sequences of <italic>Corynebacterium pseudotuberculosis</italic> with annotation. Statistics table reports in Excel mode optimized for visualization of the complete output with all columns and rows.</p>
</caption>
<graphic xlink:href="fgene-14-1228552-g010.tif"/>
</fig>
<p>
<xref ref-type="bibr" rid="B14">Laskar et al. (2021)</xref>, (<xref ref-type="bibr" rid="B15">2022</xref>), <xref ref-type="bibr" rid="B10">Jilani and Ali (2022)</xref> used similar information about incidence, prevalence, composition, and localization in their studies of Simple Sequence Repeats Signature in viruses using IMEx. Those analyses might seem basic, but they require a lot of data tabulation before the tables are ready for analysis, a feature that is already automated by EasySSR. This is a small demonstration of the versatility of EasySSR output, which made this analysis possible in minutes due to the processed information given as a result, allowing the researcher to invest their time in further analysis that otherwise would be too time demanding.</p>
<p>EasySSR bar charts show the top 10 most-frequent motifs present in all the strains (<xref ref-type="fig" rid="F5">Figure 5</xref>). They are interactive graphs that can be used to remove specific strains from visualization or verify how many times that specific motif was found in different loci in that genome. In this way, it is possible to verify that the GCT, TGC, and GCA were present in all the 54 genomes used by <xref ref-type="bibr" rid="B28">Pinheiro et al. (2022)</xref>. The amount of GCT motifs present in a genome varied from 26 to 37 different loci, for example, (<xref ref-type="fig" rid="F5">Figure 5A</xref>). It might present itself as a useful shortcut tool to marker development. <xref ref-type="bibr" rid="B28">Pinheiro et al. (2022)</xref> identified CAC and GGAA as putative markers based on their differential localization in the biovars. EasySSR did not reach the same results for those markers as it has a different approach, where the bar charts demonstrate quantitatively how many times the motif appears in each genome and ranks them based on how many genomes of the dataset are present, aiming to find motifs that are common to all sequences. However, EasySSR can also be used for analysis, such as the one conducted by <xref ref-type="bibr" rid="B28">Pinheiro et al. (2022)</xref>, as their EasySSR summary table contains information about the motif, iteration, and position (start and end), and it is easily downloadable in friendly formats such as &#x201c;xlsx&#x201d; and &#x201c;.csv&#x201d; that can be imported for further analysis using others statistic tools present in the R programming language, for example,. In this way, EasySSR outputs are versatile and can be used as a guide for visual analysis through the interactive graphs or processed by other tools with any approach the user wants.</p>
</sec>
</sec>
</sec>
<sec sec-type="conclusion" id="s4">
<title>4 Conclusion</title>
<p>Despite the versatility of the existing web tools for microsatellite analysis, EasySSR presents an innovative web technology that implements the popular IMEx 2.1 algorithm under novel settings, with a friendly interface suitable for experts and non-experienced scientists to realize online SSR analysis with the same accuracy and features as command-line tools. Easy SSR automatizes the SSR mining in batch analysis, for small or large datasets, from receiving many FASTA input files, converting, generating raw SSR outputs for each file, and processing those outputs in a comparative approach, with additional comprehensible results summarized into interactive charts and tables, giving the user the results ready for further analysis in minutes and reducing a significant amount of time worth of data tabulation.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s10">Supplementary Material</xref>, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>SA and RR conceived the idea of the program and together with VF developed the tool. SA, CD, and AS evaluated the biological and computational information and defined the functions to be inserted. All authors contributed to the article and approved the submitted version.</p>
</sec>
<sec id="s7">
<title>Funding</title>
<p>This work has been supported by the CNPq (National Council for Scientific and Technological Development) project &#x23;312316/2022-4, Secretary of State for Science, Technology, and Professional and Technological Education (SECTET), and Dean&#x2019;s Office for Research and Graduate Studies/Federal University of Par&#xe1;&#x2013;PROPESP/UFPA (PAPQ). PROCAD-AM (NATIONAL PROGRAM FOR ACADEMIC COOPERATION IN THE AMAZON) from CAPES, under project No. 88881.200563/2018-01.</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fgene.2023.1228552/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fgene.2023.1228552/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Table1.XLSX" id="SM1" mimetype="application/XLSX" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Beier</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Thiel</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>M&#xfc;nch</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Scholz</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Mascher</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>MISA-Web: a web server for microsatellite prediction</article-title>. <source>Bioinformatics</source> <volume>33</volume>, <fpage>2583</fpage>&#x2013;<lpage>2585</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btx198</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Benson</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>1999</year>). <article-title>Tandem repeats finder: a program to analyze DNA sequences</article-title>. <source>Nucleic Acids Res.</source> <volume>27</volume>, <fpage>573</fpage>&#x2013;<lpage>580</lpage>. <pub-id pub-id-type="doi">10.1093/nar/27.2.573</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Biswas</surname>
<given-names>M. K.</given-names>
</name>
<name>
<surname>Natarajan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Biswas</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Nath</surname>
<given-names>U. K.</given-names>
</name>
<name>
<surname>Park</surname>
<given-names>J.-I.</given-names>
</name>
<name>
<surname>Nou</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Lsat: liliaceae simple sequences analysis tool, a web server</article-title>. <source>Bioinformation</source> <volume>14</volume>, <fpage>181</fpage>&#x2013;<lpage>182</lpage>. <pub-id pub-id-type="doi">10.6026/97320630014181</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Boeva</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Regnier</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Papatsenko</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Makeev</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Short fuzzy tandem repeats in genomic sequences, identification, and possible role in regulation of gene expression</article-title>. <source>Bioinformatics</source> <volume>22</volume>, <fpage>676</fpage>&#x2013;<lpage>684</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btk032</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>da Maia</surname>
<given-names>L. C.</given-names>
</name>
<name>
<surname>Palmieri</surname>
<given-names>D. A.</given-names>
</name>
<name>
<surname>de Souza</surname>
<given-names>V. Q.</given-names>
</name>
<name>
<surname>Kopp</surname>
<given-names>M. M.</given-names>
</name>
<name>
<surname>de Carvalho</surname>
<given-names>F. I. F.</given-names>
</name>
<name>
<surname>Costa de Oliveira</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>SSR locator: tool for simple sequence repeat discovery integrated with primer design and PCR simulation</article-title>. <source>Int. J. Plant Genomics</source> <volume>2008</volume>, <fpage>412696</fpage>&#x2013;<lpage>412699</lpage>. <pub-id pub-id-type="doi">10.1155/2008/412696</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Das</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Arora</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Jaiswal</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Iquebal</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Angadi</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Fatma</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>PolyMorphPredict: a universal web-tool for rapid polymorphic microsatellite marker discovery from whole genome and transcriptome data</article-title>. <source>Front. Plant Sci.</source> <volume>9</volume>, <fpage>1966</fpage>. <pub-id pub-id-type="doi">10.3389/fpls.2018.01966</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Delgrange</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Rivals</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Star: an algorithm to search for tandem approximate repeats</article-title>. <source>Bioinformatics</source> <volume>20</volume>, <fpage>2812</fpage>&#x2013;<lpage>2820</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bth335</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="web">
<collab>Django Software Foundation</collab> (<year>2023</year>). <article-title>Django makes it easier to build better web apps more quickly and with less code</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.djangoproject.com/">https://www.djangoproject.com/</ext-link>
</comment>.</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Galasso</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Ponzoni</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>In Silico Exploration of Cannabis sativa L. Genome for Simple Sequence Repeats (SSRs)</article-title>. <source>Am. J. Plant Sci.</source> <volume>06</volume>, <fpage>3244</fpage>&#x2013;<lpage>3250</lpage>. <pub-id pub-id-type="doi">10.4236/ajps.2015.619315</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jilani</surname>
<given-names>M. G.</given-names>
</name>
<name>
<surname>Ali</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Assessment of simple sequence repeats signature in hepatitis E virus (HEV) genomes</article-title>. <source>J. Genet. Eng. Biotechnol.</source> <volume>20</volume>, <fpage>73</fpage>. <pub-id pub-id-type="doi">10.1186/s43141-022-00365-w</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kim</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>J.-P.</given-names>
</name>
<name>
<surname>Ahmad</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Oh</surname>
<given-names>S.-K.</given-names>
</name>
<name>
<surname>Kwon</surname>
<given-names>S.-Y.</given-names>
</name>
<name>
<surname>Hur</surname>
<given-names>C.-G.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Risa: a new web-tool for rapid identification of SSRs and analysis of primers</article-title>. <source>Genes Genomics</source> <volume>34</volume>, <fpage>583</fpage>&#x2013;<lpage>590</lpage>. <pub-id pub-id-type="doi">10.1007/s13258-012-0032-x</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kofler</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Schl&#xf6;tterer</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Lelley</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>SciRoKo: a new tool for whole genome microsatellite search and investigation</article-title>. <source>Bioinformatics</source> <volume>23</volume>, <fpage>1683</fpage>&#x2013;<lpage>1685</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btm157</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kolpakov</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Bana</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Kucherov</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>mreps: efficient and flexible detection of tandem repeats in DNA</article-title>. <source>Nucleic Acids Res.</source> <volume>31</volume>, <fpage>3672</fpage>&#x2013;<lpage>3678</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkg617</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Laskar</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Jilani</surname>
<given-names>M. G.</given-names>
</name>
<name>
<surname>Ali</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Implications of genome simple sequence repeats signature in 98 Polyomaviridae species</article-title>. <source>3 Biotech.</source> <volume>11</volume>, <fpage>35</fpage>. <pub-id pub-id-type="doi">10.1007/s13205-020-02583-w</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Laskar</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Jilani</surname>
<given-names>M. G.</given-names>
</name>
<name>
<surname>Nasrin</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Ali</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Microsatellite signature of reference genome sequence of SARS-CoV-2 and 32 species of coronaviridae family</article-title>. <source>Int. J. Infect.</source> <volume>9</volume>, <fpage>e122019</fpage>. <pub-id pub-id-type="doi">10.5812/iji-122019</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Leclercq</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Rivals</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Jarne</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Detecting microsatellites within genomes: significant variation among algorithms</article-title>. <source>BMC Bioinforma.</source> <volume>8</volume>, <fpage>125</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-8-125</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lim</surname>
<given-names>K. G.</given-names>
</name>
<name>
<surname>Kwoh</surname>
<given-names>C. K.</given-names>
</name>
<name>
<surname>Hsu</surname>
<given-names>L. Y.</given-names>
</name>
<name>
<surname>Wirawan</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Review of tandem repeat search tools: a systematic approach to evaluating algorithmic performance</article-title>. <source>Brief. Bioinform.</source> <volume>14</volume>, <fpage>67</fpage>&#x2013;<lpage>81</lpage>. <pub-id pub-id-type="doi">10.1093/bib/bbs023</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lopes</surname>
<given-names>R. S.</given-names>
</name>
<name>
<surname>Moraes</surname>
<given-names>W. J. L.</given-names>
</name>
<name>
<surname>Rodrigues</surname>
<given-names>T. D. S.</given-names>
</name>
<name>
<surname>Bartholomeu</surname>
<given-names>D. C.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>ProGeRF: proteome and genome repeat finder utilizing a fast parallel hash function</article-title>. <source>Biomed. Res. Int.</source> <volume>2015</volume>, <fpage>394157</fpage>&#x2013;<lpage>394159</lpage>. <pub-id pub-id-type="doi">10.1155/2015/394157</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Martins</surname>
<given-names>W. S.</given-names>
</name>
<name>
<surname>Lucas</surname>
<given-names>D. C. S.</given-names>
</name>
<name>
<surname>Neves</surname>
<given-names>K. F. S.</given-names>
</name>
<name>
<surname>Bertioli</surname>
<given-names>D. J.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>WebSat - a web software for MicroSatellite marker development</article-title>. <source>Bioinformation</source> <volume>3</volume>, <fpage>282</fpage>&#x2013;<lpage>283</lpage>. <pub-id pub-id-type="doi">10.6026/97320630003282</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mathur</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Tyagi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kataria</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A comparative study of various simple sequence repeats identification tools using Aspergillus fumigatus genome</article-title>. <source>J. Bioinfo Comp. Genom</source> <volume>3</volume>, <fpage>1</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.17303/jbcg.2020.3.102</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Merkel</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Gemmell</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Detecting short tandem repeats from genome data: opening the software black box</article-title>. <source>Brief. Bioinform.</source> <volume>9</volume>, <fpage>355</fpage>&#x2013;<lpage>366</lpage>. <pub-id pub-id-type="doi">10.1093/bib/bbn028</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Morgante</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hanafey</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Powell</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Microsatellites are preferentially associated with nonrepetitive DNA in plant genomes</article-title>. <source>Nat. Genet.</source> <volume>30</volume>, <fpage>194</fpage>&#x2013;<lpage>200</lpage>. <pub-id pub-id-type="doi">10.1038/ng822</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mudunuri</surname>
<given-names>S. B.</given-names>
</name>
<name>
<surname>Kumar</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Rao</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>Pallamsetty</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Nagarajaram</surname>
<given-names>H. A.</given-names>
</name>
</person-group> (<year>2010a</year>). <article-title>G-IMEx: a comprehensive software tool for detection of microsatellites from genome sequences</article-title>. <source>Bioinformation</source> <volume>5</volume>, <fpage>221</fpage>&#x2013;<lpage>223</lpage>. <pub-id pub-id-type="doi">10.6026/97320630005221</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mudunuri</surname>
<given-names>S. B.</given-names>
</name>
<name>
<surname>Nagarajaram</surname>
<given-names>H. A.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>IMEx: imperfect microsatellite extractor</article-title>. <source>Bioinformatics</source> <volume>23</volume>, <fpage>1181</fpage>&#x2013;<lpage>1187</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btm097</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Mudunuri</surname>
<given-names>S. B.</given-names>
</name>
<name>
<surname>Rao</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>Pallamsetty</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Nagarajaram</surname>
<given-names>H. A.</given-names>
</name>
</person-group> (<year>2010b</year>). &#x201c;<article-title>Comparative analysis of microsatellite detecting software: a significant variation in results and influence of parameters</article-title>,&#x201d; in <source>Proceedings of the international symposium on biocomputing</source>. Editor <person-group person-group-type="editor">
<name>
<surname>Tulpan</surname>
<given-names>D.</given-names>
</name>
</person-group> (<publisher-loc>New York, NY, USA</publisher-loc>: <publisher-name>ACM</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1145/1722024.1722068</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Oliveira</surname>
<given-names>E. J. de</given-names>
</name>
<name>
<surname>Dantas</surname>
<given-names>J. L. L.</given-names>
</name>
<name>
<surname>Castellen</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Machado</surname>
<given-names>M. D.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Identifica&#xe7;&#xe3;o de microssat&#xe9;lites para o mamoeiro por meio da explora&#xe7;&#xe3;o do banco de dados de DNA</article-title>. <source>Rev. Bras. Frutic.</source> <volume>30</volume>, <fpage>841</fpage>&#x2013;<lpage>845</lpage>. <pub-id pub-id-type="doi">10.1590/s0100-29452008000300049</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Parisi</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>De Fonzo</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Aluffi-Pentini</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>String: finding tandem repeats in DNA sequences</article-title>. <source>Bioinformatics</source> <volume>19</volume>, <fpage>1733</fpage>&#x2013;<lpage>1738</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btg268</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pinheiro</surname>
<given-names>K. C.</given-names>
</name>
<name>
<surname>Gois</surname>
<given-names>B. V. A.</given-names>
</name>
<name>
<surname>Nogueira</surname>
<given-names>W. G.</given-names>
</name>
<name>
<surname>Ara&#xfa;jo</surname>
<given-names>F. A.</given-names>
</name>
<name>
<surname>Queiroz</surname>
<given-names>A. L. C.</given-names>
</name>
<name>
<surname>Cardenas-Alegria</surname>
<given-names>O.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>
<italic>In silico</italic> approach to identify microsatellite candidate biomarkers to differentiate the biovar of Corynebacterium pseudotuberculosis genomes</article-title>. <source>Front. Bioinforma.</source> <volume>2</volume>, <fpage>931583</fpage>. <pub-id pub-id-type="doi">10.3389/fbinf.2022.931583</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sharma</surname>
<given-names>P. C.</given-names>
</name>
<name>
<surname>Grover</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kahl</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Mining microsatellites in eukaryotic genomes</article-title>. <source>Trends Biotechnol.</source> <volume>25</volume>, <fpage>490</fpage>&#x2013;<lpage>498</lpage>. <pub-id pub-id-type="doi">10.1016/j.tibtech.2007.07.013</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sousa</surname>
<given-names>A. L.</given-names>
</name>
<name>
<surname>Mau&#xe9;s</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Lobato</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Franco</surname>
<given-names>E. F.</given-names>
</name>
<name>
<surname>Pinheiro</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Ara&#xfa;jo</surname>
<given-names>F.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>PhageWeb &#x2013; web interface for rapid identification and characterization of prophages in bacterial genomes</article-title>. <source>Front. Genet.</source> <volume>9</volume>, <fpage>1</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.3389/fgene.2018.00644</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sreenu</surname>
<given-names>V. B.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>MICdb: database of prokaryotic microsatellites</article-title>. <source>Nucleic Acids Res.</source> <volume>31</volume>, <fpage>106</fpage>&#x2013;<lpage>108</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkg002</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Stieneke</surname>
<given-names>D. L.</given-names>
</name>
<name>
<surname>Eujayl</surname>
<given-names>I. A.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Imperfect SSR finder</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="http://ssr.nwisrl.ars.usda.gov/">http://ssr.nwisrl.ars.usda.gov/</ext-link>
</comment>.</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tarailo&#x2010;Graovac</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Using RepeatMasker to identify repetitive elements in genomic sequences</article-title>. <source>Curr. Protoc. Bioinforma.</source> <volume>25</volume>, <fpage>4.10.1</fpage>&#x2013;<lpage>4.10.14</lpage>. <pub-id pub-id-type="doi">10.1002/0471250953.bi0410s25</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Thiel</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Michalek</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Varshney</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Graner</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>Exploiting EST databases for the development and characterization of gene-derived SSR-markers in barley (Hordeum vulgare L)</article-title>. <source>Theor. Appl. Genet.</source> <volume>106</volume>, <fpage>411</fpage>&#x2013;<lpage>422</lpage>. <pub-id pub-id-type="doi">10.1007/s00122-002-1031-0</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Thurston</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Field</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Msatfinder: detection and characterisation of microsatellites. CEH oxford, mansf. Road, oxford OX1 3SR</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="http://www.genomics.ceh.ac.uk/msatfinder/">http://www.genomics.ceh.ac.uk/msatfinder/</ext-link>
</comment>.</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>GMATo: a novel tool for the identification and analysis of microsatellites in large genomes</article-title>. <source>Bioinformation</source> <volume>9</volume>, <fpage>541</fpage>&#x2013;<lpage>544</lpage>. <pub-id pub-id-type="doi">10.6026/97320630009541</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Wexler</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yakhini</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Kashi</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Geiger</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2004</year>). &#x201c;<article-title>Finding approximate tandem repeats in genomic sequences</article-title>,&#x201d; in <conf-name>Proceedings of the eighth annual international conference on Computational molecular biology - RECOMB &#x2019;04</conf-name>, <conf-loc>San Diego California USA</conf-loc>, <conf-date>March 27 - 31, 2004</conf-date> (<publisher-loc>New York, New York, USA</publisher-loc>: <publisher-name>ACM Press</publisher-name>), <fpage>223</fpage>&#x2013;<lpage>232</lpage>. <pub-id pub-id-type="doi">10.1145/974614.974644</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jia</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Isolation of novel microsatellite markers and their application for genetic diversity and parentage analyses in sika deer</article-title>. <source>Gene</source> <volume>643</volume>, <fpage>68</fpage>&#x2013;<lpage>73</lpage>. <pub-id pub-id-type="doi">10.1016/j.gene.2017.12.007</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>You</surname>
<given-names>F. M.</given-names>
</name>
<name>
<surname>Huo</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Gu</surname>
<given-names>Y. Q.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hane</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2008</year>). <article-title>BatchPrimer3: a high throughput web application for pcr and sequencing primer design</article-title>. <source>BMC Bioinforma.</source> <volume>9</volume>, <fpage>253</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-9-253</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>