<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">844385</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2022.844385</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>A High-Quality Genome Assembly of <italic>Sorghum dochna</italic>
</article-title>
<alt-title alt-title-type="left-running-head">Chen et al.</alt-title>
<alt-title alt-title-type="right-running-head">Genmone of <italic>Sorghum dochna</italic>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Chen</surname>
<given-names>Yu</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1607610/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Yongbai</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Hongjie</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Sun</surname>
<given-names>Juan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/915737/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ma</surname>
<given-names>Lichao</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Miao</surname>
<given-names>Fuhong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Zixin</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Cheng</surname>
<given-names>Yang</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Huang</surname>
<given-names>Jianwei</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Yang</surname>
<given-names>Guofeng</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/510934/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Zengyu</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/27852/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>College of Grassland Science, Qingdao Agricultural University</institution>, <addr-line>Qingdao</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Key Laboratory of National Forestry and Grassland Administration on Grassland Resources and Ecology in the Yellow River Delta, Qingdao Agricultural University</institution>, <addr-line>Qingdao</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>College of Animal Science, Qingdao Agricultural University</institution>, <addr-line>Qingdao</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Berry Genomics Corporation</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/806696/overview">Luis Herrera-Estrella</ext-link>, Texas Tech University, United States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/121109/overview">Victor A Albert</ext-link>, University at Buffalo, United States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/745274/overview">Chi Yang</ext-link>, Fujian Academy of Agricultural Sciences, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Guofeng Yang, <email>yanggf@qau.edu.cn</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Plant Genomics, a section of the journal Frontiers in Genetics</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>12</day>
<month>08</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>13</volume>
<elocation-id>844385</elocation-id>
<history>
<date date-type="received">
<day>28</day>
<month>12</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>24</day>
<month>05</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2022 Chen, Zhang, Wang, Sun, Ma, Miao, Zhang, Cheng, Huang, Yang and Wang.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Chen, Zhang, Wang, Sun, Ma, Miao, Zhang, Cheng, Huang, Yang and Wang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Sweet sorghum (<italic>Sorghum dochna</italic>) is a high-quality bio-energy crop that also serves as food for humans and animals. However, there is little information on the genomic characteristics of <italic>S. dochna</italic>. In this study, we presented a high-quality assembly of <italic>S. dochna</italic> with PacBio long reads, Illumina short reads, high-throughput chromosome capture technology (Hi-C) sequencing data, gene annotation, and a comparative genome analysis. The results showed that the genome of <italic>S. dochna</italic> was assembled to 777&#xa0;Mb with a contig N50 of 553.47&#xa0;kb and a scaffold N50 of 727.11&#xa0;kb. In addition, the gene annotation predicted 37,971 genes and 39,937 transcripts in the genome of <italic>S. dochna</italic>. A Venn analysis revealed a set of 7,988 common gene annotations by integrating five databases. A Cafe software analysis showed that 191 gene families were significantly expanded, while 3,794 were significantly contracted in <italic>S. dochna</italic>. A GO enrichment analysis showed that the expanded gene families were primarily clustered in the metabolic process, DNA reconstruction, and DNA binding among others. The high-quality genome map constructed in this study provides a biological basis for the future analysis of the biological characteristics of <italic>S. dochna</italic>, which is crucial for its breeding.</p>
</abstract>
<kwd-group>
<kwd>
<italic>Sorghum dochna</italic>
</kwd>
<kwd>genome</kwd>
<kwd>assembly</kwd>
<kwd>comparative genome analysis</kwd>
<kwd>Hi-C</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Introduction</title>
<p>
<italic>Sorghum dochna</italic> belongs to the Gramineae family and has high sugar content in its stalks. Typically, it is a perennial crop except in frost-prone areas. Relevant historical records indicate that <italic>S. dochna</italic> was initially grown in India and Myanmar. During the mid-19th century, the United States introduced the <italic>S. dochna</italic> variety &#x201c;Amber&#x201d; from south China for cultivation, resulting in the annual production of <italic>S. dochna</italic> syrup as high as 111.56 million liters. Currently, <italic>S. dochna</italic> is cultivated in all continents of the world (<xref ref-type="bibr" rid="B10">Gnansounou et al., 2005</xref>).</p>
<p>Considering that most global economies are moving toward low-carbon energy sources, bio-renewable energy may replace oil and coal (<xref ref-type="bibr" rid="B1">Antonopoulou et al., 2008</xref>). <italic>S. dochna</italic> is an ideal bio-energy crop owing to its high photosynthetic efficiency, high resistance to stress, high sugar content (<xref ref-type="bibr" rid="B8">Erdei et al., 2009</xref>), high yield, and drought resistance. Thus, <italic>S. dochna</italic> can be used as an excellent silage material. In addition, it tastes delicious and is suitable for livestock consumption. Moreover, <italic>S. dochna</italic> as human food can be eaten raw or used as a raw material for making sugar, wine, and other related products. After threshing, <italic>S. dochna</italic> tassels can also be used to make brooms and cookware. Currently, <italic>S. dochna</italic> is economically valuable. There is a need for more insight into its biological mechanisms and genomic characteristics to more efficiently utilize its biological value.</p>
<p>To date, many studies have provided sufficient data for elucidating the <italic>S. bicolor</italic> genome (<xref ref-type="bibr" rid="B24">Paterson et al., 2009</xref>). In addition, the exploitation of <italic>S. bicolor</italic> as human food has increased worldwide. However, studies on the genomic analysis of <italic>S. dochna</italic> are limited. Moreover, the genomic characteristics of <italic>S. dochna</italic> are poorly understood. In this study, a genome assembly of <italic>S. dochna</italic> was constructed at the chromosome level using PacBio long-read, Illumina short-read, and high-throughput chromosome capture technology (Hi-C) sequencing data (<xref ref-type="bibr" rid="B16">Jin et al., 2021</xref>). This study provides valuable genomic data that can be used to conduct further research on the economic value of <italic>S. dochna</italic>. In addition, the findings of this study will facilitate comparative genomic analyses with other Gramineae forage plants.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>Materials and Methods</title>
<sec id="s2-1">
<title>Materials Collection</title>
<p>
<italic>S. dochna</italic> variety De Sheng was selected and cultivated in soil at the Research Center of Grassland, Agriculture, and Animal Husbandry of Qingdao Agricultural University (Qingdao, China). The <italic>S. dochna</italic> seeds were washed once with distilled water and disinfected with 75% alcohol for 1&#xa0;min and NaClO for 7&#x2013;8&#xa0;min. After that, the seeds were dried and planted in sterilized nutrient soil. The leaves of 45-day-old seedlings were harvested (<xref ref-type="fig" rid="F1">Figure 1</xref>), frozen in liquid nitrogen, and then stored at &#x2212;80&#xb0;C for subsequent analysis.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Morphological characteristics of <italic>Sorghum dochna</italic> as shown in photographs that display a whole plant, leaf, and root.</p>
</caption>
<graphic xlink:href="fgene-13-844385-g001.tif"/>
</fig>
</sec>
<sec id="s2-2">
<title>DNA and RNA Extraction</title>
<p>Total genomic DNA was extracted from the leaves using a Tiangen DNAsecure Novel Plant Genomic DNA Extraction Kit (Dp320-03) according to the manufacturer&#x2019;s instructions (Tiangen, Beijing, China). Total RNA was extracted using an EASYspin Plus Polysaccharide Polyphenols/Complex Plant RNA Rapid Extraction Kit following the manufacturer&#x2019;s instructions.</p>
</sec>
<sec id="s2-3">
<title>Survey Analysis</title>
<p>Raw sequence data generated by the Illumina platform (San Diego, CA, United States) were filtered by the following criteria: filtered reads with adapter sequences, filtered reads with N bases &#x3e;3, and filtered reads with low-quality bases (&#x2264;5) more than 20% (<xref ref-type="bibr" rid="B18">Li et al., 2009</xref>). The K-mer analysis was performed using jellyfish to estimate the genome size and sample heterozygosity. The genome size can be estimated using the K-mer analysis (<xref ref-type="bibr" rid="B3">Chikhi and Medvedev, 2014</xref>). The distribution of K-mer depends on the characteristic of the genome and follows Poisson distribution. We estimated the genome size of <italic>S. dochna</italic> using the following formula: genome size &#x3d; (total number of 17-mer)/(position of peak depth).</p>
</sec>
<sec id="s2-4">
<title>Genome Assembly and Quality Validation</title>
<sec id="s2-4-1">
<title>Hi-Fi Assembly</title>
<p>We constructed a PCR-free SMRTbell library by repairing and connecting the high-quality purified genome and sequencing it by PacBio (Menlo Park, CA, United States) SMRT technology. After the library was constructed, its size was detected using an Agilent 2100 (Agilent Technologies, Santa Clara, CA, United States) fragment analyzer capillary electrophoresis or pulsed field electrophoresis. After the library was calculated by a PacBio calculator, sequencing primers and sequencing enzymes were combined into the SMRTbell template in proportion and then sequenced by diffusion loading. To obtain high-fidelity reads (Hi-Fi reads), we used SMRTlink software to conduct the subreads obtained previously for circular consensus sequencing (CCS) processing. The main parameters were min passes &#x3d; 3 and min RQ &#x3d; 0.99 (<xref ref-type="bibr" rid="B26">Sim et al., 2022</xref>).</p>
<p>The original data after sequencing were filtered and then assembled with hifiasm (<xref ref-type="bibr" rid="B9">Feng et al., 2021</xref>). First, an all vs. all comparison was used to correct the sequencing error. Second, after correction, a read overlap comparison was used again to construct a phased string graph. Finally, the contigs were generated according to the overlapping graph. The final genome sequence was obtained after de heterozygosity to generate de pseudo contigs (<xref ref-type="bibr" rid="B33">Koren et al., 2018</xref>).</p>
</sec>
<sec id="s2-4-2">
<title>Hi-C Assisted Genome Assembly</title>
<p>Raw image data files sequenced by a high-throughput sequencer (Illumina HiSeq 2500) were analyzed by base calling and transformed into sequenced reads. Raw sequencing data were stored in the FASTQ (Fq) file format. The raw reads obtained by sequencing contained a small number of articulated, repetitive, and low-quality reads, which could have affected the quality of comparison and the subsequent analysis. Therefore, we filtered the raw data to obtain clean reads. A total of 10,000 pairs of sequenced reads were randomly selected from the Hi-C sequencing library data and compared to the NT database using BLAST. The top 10 matched species in the output results were sorted and outputted to check for bacterial contamination. JUICER software was used to compare the Hi-C data with the sketched genome. Finally, the results of the Hi-C library were compared and analyzed using 3D DNA software (<xref ref-type="bibr" rid="B6">Durand et al., 2016</xref>). The scaffold number was obtained using these methods.</p>
</sec>
</sec>
<sec id="s2-5">
<title>Genome Annotation</title>
<p>For repeat element annotations, software RepeatMasker was used to mask the predicted repeats and known repeats (RepBase) in the genome. We used MITE Hunter, LTRharvest, LTR Finder, LTR retriever, and RepeatModeler to predict repeat sequences (<xref ref-type="bibr" rid="B25">Scott and Madden, 2004</xref>; <xref ref-type="bibr" rid="B30">Xiong et al., 2017</xref>).</p>
<p>We used reference protein sequences and RNA-Seq analysis to predict gene models. <italic>Ab initio</italic> gene prediction and annotation were performed by Augustus v3.318, SNAP, and GlimmerHMM. Augustus V3.0.3 combined with RNA-Seq data was used to predict the gene structure. First, parameters were trained with the training set. Intron hints were then obtained based on the comparison between RNA-Seq reads and the Scaffold (TopHat V2.0.10) (i.e., predicted intron location information) and then combined with intron hints for gene structure prediction. Second, SNAP and GlimmerHMM were used to predict the gene structure. The parameters were first trained with the training set, and then the genetic structure of the Scaffold shielded with repeated sequences was predicted (<xref ref-type="bibr" rid="B15">Ian, 2004</xref>). Third, Genemark-ET V4.57 combined with intron hints obtained from Augustus V3.0.3 was used to predict the genomic structure of the scaffold with repetitive sequences. The published protein sequences of <italic>Oryza sativa</italic>, <italic>Zea mays</italic>, <italic>Echinochloa crus-galli</italic>, <italic>Brachypodium distachyon</italic>, <italic>S. bicolor</italic>, and <italic>Puccinellia tenuiflora</italic> (NCBI) were used to perform homologous searches by GeMoMa-1.6.1.</p>
<p>For non-coding RNA prediction, we used tRNAscan-SE to predict the tRNA. rRNA and other types of ncRNA were searched with the Rfam database, and the specific information of ncRNA was obtained through similarity comparison.</p>
<p>For the gene functional annotation of protein-coding genes, we used six databases, including NR, Swiss-Prot, eggNOG, GO, KEGG, and InterPro, to perform function prediction. All these predictions of functions were integrated.</p>
<p>In this study, the corresponding gene function annotation results were obtained by comparing and analyzing a single database. Finally, a Venn analysis was performed by integrating the five databases to obtain the precise gene function annotation information.</p>
</sec>
<sec id="s2-6">
<title>Comparative Genomic Analysis</title>
<sec id="s2-6-1">
<title>Colinear Analysis and Phylogenetic Tree</title>
<p>MUMmer software can be used to quickly compare two genome sequences (<xref ref-type="bibr" rid="B4">Delcher et al., 2003</xref>). MUMmer was used to conduct genomic colinearity analysis on <italic>S. dochna</italic> and its related species <italic>S. bicolor</italic>. The parameter was &#x201c;NucMER-G 1000-C90-L200.&#x201d;</p>
<p>To identify the gene protein family, the OrthoMCL cluster analysis was adopted (<xref ref-type="bibr" rid="B19">Li et al., 2003</xref>). We performed all-VS-all BLAST alignments on protein-coding sequences of all the selected species (e-value was 1e<sup>&#x2212;5</sup> by default), calculated the similarity between sequences, and conducted a cluster analysis using the Markov clustering algorithm with an expansion coefficient of 1.5. The results of the protein family clustering were obtained. A Venn diagram was used to display the clustering results, which distinguished the endemic/common genes. The time standard point (correction point) was from the Timetree website.</p>
<p>Single-copy genes of each species were selected as reference markers for species with incomplete evolutionary studies, and quadruple degenerate sites were chosen to construct hypergenes. MAFFT software was used for multiple sequence comparisons of the hypergenes, and the most suitable base substitution model was selected. A phylogenetic tree was constructed based on the maximum likelihood method (ML) using RAxML software. Based on the single-copy gene family, McMctree (Burn-in &#x3d; 5,000,000, sample-number &#x3d; 1,000,000, and sample-frequency &#x3d; 50) was used to estimate the differentiation time. The time standard point (correction point) was from the Timetree website (<xref ref-type="bibr" rid="B12">Hahn et al., 2005</xref>).</p>
</sec>
<sec id="s2-6-2">
<title>Gene Family Contraction and Expansion Analysis</title>
<p>Cafe software was used to analyze the gene families. This software can capture the changes in gene families between species based on random survival and death models combined with statistical inference methods. The number of contractions and expansions of gene families on each branch of evolution was obtained. We also determined whether contractions and expansions occurred in each gene family (<xref ref-type="bibr" rid="B12">Hahn et al., 2005</xref>; <xref ref-type="bibr" rid="B13">Hahn et al., 2007</xref>).</p>
</sec>
<sec id="s2-6-3">
<title>Positive Selected Gene Family Analysis</title>
<p>Early studies used the method of two-sequence alignment on all codons and the whole time zone before the divergence of the two sequences. The average value was utilized to calculate Ka and Ks. However, in fact, the vast majority of codons of a functional protein are in the process of evolution, and they are conservative most of the time. If a positive selection occurs, it will only affect some bits, and the positive selection only occurs during a specific time period. In 2002, a new method called the branch site model analysis method was reported, which can detect the positive events that occur in a specific evolutionary branch and affect only some positive selections (Jianzhi et al.). We used this method to detect the positive selection in protein coding sequences.</p>
<p>First, one-to-one orthology proteins from research species and related species were selected. Second, homologous protein sequences were compared with PRANK using the default parameters. Third, alignment results were filtered with G blocks with the following parameters: -t &#x3d; c-e &#x3d; . ft-b4 &#x3d; 5-d &#x3d; y. Fourth, CODEML in PAML was used to test the positive selection in a specific branch, which only affected some loci. Fifth, the Chi2 program in PAML was used to check and correct multiple hypotheses. Main parameters include degree of freedom &#x3d; 2.</p>
<p>Based on these methods, we obtained the positive selection genes and proceeded with the GO enrichment analysis.</p>
</sec>
<sec id="s2-6-4">
<title>Whole-Genome Duplication</title>
<p>Whole-genome duplication (WGD) is typically associated with the rapid loss of repeated fragments, chromosome rearrangement, and the process of rearrangement back to the diploid. In this study, the distribution of synonymous substitutions (Ks) of each synonymous locus between adjacent homologous genes in the genome was constructed to detect WGD. We used BLASTP to compare the longest protein sequence of the gene in <italic>S. dochna</italic> genome and MCScanX to filter the comparison results. In addition, we used the yn00 tool in the PAML software package to calculate the synonymous replacement rate. The density distribution with the value of Ks was plotted for all paralog gene pairs. This approach is also known as the duplicate age distribution method (<xref ref-type="bibr" rid="B27">Vanneste et al., 2013</xref>). Synonymous mutations are generally considered neutral and gradually accumulate in the genome at a nearly constant rate. Therefore, Ks can represent collateral homologous genes.</p>
</sec>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>Results</title>
<sec id="s3-1">
<title>High-Quality Gene Assembly</title>
<p>The quality control results of the offline data revealed 43.7&#xa0;Gb of clean bases with a GC content of 43.52% and 146,092,905 clean reads (<xref ref-type="sec" rid="s10">Supplementary Table S9</xref>). A K-mer analysis revealed that <italic>S. dochna</italic> is a heterozygous species (0.619%), and the 17-mer frequency distribution plot is shown as <xref ref-type="sec" rid="s10">Supplementary Figure S1</xref>.</p>
<p>PacBio long reads (<xref ref-type="bibr" rid="B23">Nurk et al., 2020</xref>) and Illumina short reads (<xref ref-type="bibr" rid="B5">Dudchenko et al., 2017</xref>; <xref ref-type="bibr" rid="B28">Vurture et al., 2017</xref>) technologies were used to assemble the <italic>S. dochna</italic> genome. The PacBio clean subread statistical results are shown in <xref ref-type="sec" rid="s10">Supplementary Table S7</xref>. We assembled the genome sequences into 1,628 contigs with a total length of 831.09&#xa0;Mb, a contig N50 length of 533.94&#xa0;kb, and the longest contig of 822.40&#xa0;kb after the initial assembly (<xref ref-type="sec" rid="s10">Supplementary Table S1</xref>). Thus, contigs with an average GC content of 55&#x2013;57% (abnormal GC content peak in the figure) were processed by filtering the contigs. We obtained the final genome as 144 contigs with a total length of 777.99&#xa0;Mb, a contig N50 of 553.47&#xa0;kb, and the longest contig of 822.40&#xa0;kb, which is 47&#xa0;Mb bigger than that of <italic>S. bicolor</italic> (&#x223c;730&#xa0;Mb), suggesting a close relationship between <italic>S. dochna</italic> and <italic>S. bicolor</italic>. The results of the Hi-C library were analyzed using 3D DNA software, and the results revealed a genome that was 778.03&#xa0;Mb long with scaffold N50 of 727.11&#xa0;kb. The third-generation assembly results are shown in <xref ref-type="table" rid="T1">Table 1</xref>. The evaluation results of the Benchmarking Universal Single-Copy Ortholog (BUSCO) analysis indicated 97.5% completeness. A complete Single-Copy BUSCO further validated the high degree of completeness of the <italic>S. dochna</italic> genome assembly (<xref ref-type="sec" rid="s10">Supplementary Table S2</xref>).</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Genome assembly results.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Parameter</th>
<th align="center">Contig</th>
<th align="center">Scaffold</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Genome assembly and Hi-C results</td>
<td align="center">144</td>
<td align="center">82</td>
</tr>
<tr>
<td align="left">Total number</td>
<td align="center">777,990,620</td>
<td align="center">778,026,804</td>
</tr>
<tr>
<td align="left">Total length (bp)</td>
<td align="center">55,347,497</td>
<td align="center">43,657,906</td>
</tr>
<tr>
<td align="left">N50 length (bp)</td>
<td align="center">43.90</td>
<td align="center">43.90</td>
</tr>
<tr>
<td align="left">GC (%)</td>
<td align="center">11,660,912</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="left">Contig N90 length (bp)</td>
<td align="center">&#x2014;</td>
<td align="center">72,771,365</td>
</tr>
<tr>
<td align="left">Scaffold N50 length (bp)</td>
<td align="center">&#x2014;</td>
<td align="center">94.09</td>
</tr>
<tr>
<td align="left">Chromosome length (%)</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Hi-C, high-throughput chromosome capture technology.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>The length distribution statistics of Hi-Fi reads and Hi-Fi read bases are shown in <xref ref-type="sec" rid="s10">Supplementary Figures S4A,B</xref>. The length of most Hi-Fi reads was distributed between 1,000 and 2,000. The gene assembly results demonstrate the high quality of the <italic>S. dochna</italic> genome assembly.</p>
<p>The completeness and accuracy of the assembly quality were assessed using the sequence data return ratio, GC-depth evaluation, and BUSCO evaluation. First of all, the results of our second-generation return ratio showed a mapping ratio of 99.58%, suggesting that most of the <italic>S. dochna</italic> genome had been assembled (<xref ref-type="table" rid="T2">Table 2</xref>). Second, an evaluation of the depth of GC found that there were no separate scattered clusters on the figure, which proved that our assembly results were not polluted. A BUSCO evaluation was used to evaluate the completeness of the <italic>S. dochna</italic> genome (<xref ref-type="bibr" rid="B29">Waterhouse et al., 2018</xref>).</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Statistics of the results of a comparison of the DNA library.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Sample name</th>
<th align="center">Reads number</th>
<th align="center">Mapped</th>
<th align="center">Properly paired Mapped</th>
<th align="center">Mapped DifferentChr</th>
<th align="center">Mapped different ChrMapQ&#x3e;&#x3d;5</th>
<th align="center">Secondary reads</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="2" align="left">
<italic>Sorghum dochna</italic>
</td>
<td rowspan="2" align="center">294,034,737</td>
<td align="center">292,796,203</td>
<td align="center">279,197,414</td>
<td align="center">9,547,568</td>
<td align="center">4,598,204</td>
<td align="center">1,848,927</td>
</tr>
<tr>
<td align="center">99.58%</td>
<td align="center">95.55%</td>
<td align="center">3.2%</td>
<td align="center">1.6%</td>
<td align="center">0.6%</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>After Hi-C assembly, 10 chromosomes were assembled, and 751&#xa0;Mb genomes were fixed to further verify the accuracy of the assembly results. This included 94.09% gene content and involved calculating the exchange between and within chromosomes. The heatmap in <xref ref-type="fig" rid="F2">Figure 2</xref> shows the intergenomic exchange (<xref ref-type="bibr" rid="B31">Zhang et al., 2013</xref>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Hi-C-assisted assembly genome interaction heatmap. The exchange within chromosomes is stronger than that between chromosomes. Moreover, the exchange between the same chromosomes with a close physical location is stronger than the exchange between chromosomes with a distant physical location. Hi-C, high-throughput chromosome capture technique.</p>
</caption>
<graphic xlink:href="fgene-13-844385-g002.tif"/>
</fig>
</sec>
<sec id="s3-2">
<title>Genome Annotation</title>
<sec id="s3-2-1">
<title>Repeat Sequence Statistics</title>
<p>The results of annotation showed that Class &#x2160; retrotransposons accounted for the highest proportion of the repeated sequences. The long terminal repeats (LTRs) were the most abundant transposable elements (TEs). LTR-retrotransposons accounted for 59.22%, and Gypsy accounted for 47.46% in the LTR&#x2013;retrotransposons. In contrast, Copia accounted for 6.79%. Notably, Gypsy-type and Copia-type TEs accounted for most of the LTRs (<xref ref-type="sec" rid="s10">Supplementary Table S6</xref>). Non-LTR-retrotransposons accounted for 5.97%, whereas Class II DNA transposons accounted for 9.3% of the repeated sequences (<xref ref-type="sec" rid="s10">Supplementary Table S4</xref>).</p>
</sec>
</sec>
<sec id="s3-3">
<title>Coding Gene and Non-Coding RNA Predictions</title>
<p>We predicted that 37,971 genes were encoded, and there were 39,937 transcripts in <italic>S. dochna</italic>. In addition, the number of genes in <italic>S. dochna</italic> was higher than the number of genes in <italic>O. sativa</italic>, <italic>Z. mays</italic>, <italic>E. crus-galli</italic>, <italic>B. distachyon</italic>, <italic>S. bicolor</italic>, and <italic>P. tenuiflora</italic> (<xref ref-type="sec" rid="s10">Supplementary Table S3</xref>), which was similar to that annotated for <italic>S. bicolor</italic>, indicating that its genome is more complex. Based on the open reading frames, we predicted 20,108 genes in <italic>S. dochna</italic> (<xref ref-type="sec" rid="s10">Supplementary Table S5</xref>). Moreover, according to the types of ncRNAs, the results of the ncRNA classes are shown in <xref ref-type="sec" rid="s10">Supplementary Table S6</xref>. There were three sRNAs, 3,101 rRNAs, 172 miRNAs, and 847 tRNAs. There were 5,694 snRNA:: snoRNA:: CD-Box in the ncRNA.</p>
</sec>
<sec id="s3-4">
<title>Gene Functional Annotation</title>
<p>We annotated 35,309 types of gene information using six databases (NR, Swiss-Prot, eggNOG, GO, KEGG, and InterPro). The corresponding gene function annotation results were obtained by comparing the analyses of a single database. A total of 35,195 types of gene information were annotated by NR (<xref ref-type="sec" rid="s10">Supplementary Table S13</xref>), and 21,097 types of gene information were annotated by Swiss-Prot (<xref ref-type="sec" rid="s10">Supplementary Table S14</xref>). A total of 9,169 types of gene information were annotated by KEGG (<xref ref-type="sec" rid="s10">Supplementary Table S12</xref>), and 23,594 types of gene information were annotated by GO (<xref ref-type="sec" rid="s10">Supplementary Table S11</xref>). A total of 30,996 types of gene information were annotated by eggNOG (<xref ref-type="sec" rid="s10">Supplementary Table S10</xref>). Finally, a Venn analysis was conducted by integrating the five databases (NR, Swiss-Prot, eggNOG, GO, and KEGG), which revealed a set of 7,988 common gene annotations (<xref ref-type="fig" rid="F2">Figure 2</xref> and <xref ref-type="sec" rid="s10">Supplementary Table S15</xref>). Venn analysis of gene functional annotations was shown in <xref ref-type="fig" rid="F3">Figure 3</xref>.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>A Venn analysis of gene function annotation.</p>
</caption>
<graphic xlink:href="fgene-13-844385-g003.tif"/>
</fig>
</sec>
<sec id="s3-5">
<title>Genome Comparison</title>
<sec id="s3-5-1">
<title>Colinearity and Phylogenetic Relationships</title>
<p>When species are closely related, there is greater coverage of colinear segments on the genome, and the colinear relationship between the genomes of different species is more accurate (<xref ref-type="bibr" rid="B17">Krzywinski et al., 2009</xref>). <xref ref-type="fig" rid="F4">Figure 4A</xref> shows that the colinear relationship between <italic>S. dochna</italic> and <italic>S. bicolor</italic> is relatively strong, and their relationship is relatively close. Circos displays the important features of the assembled <italic>S. dochna</italic> genome.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>
<bold>(A)</bold> Circos display of the important features of the assembled <italic>Sorghum dochna</italic> genome. From outside to inside, <bold>(A)</bold> chromosome, <bold>(B)</bold> repeat sequence distribution, <bold>(C)</bold> gene distribution, <bold>(D)</bold> GC content distribution, and <bold>(E)</bold> colinearity between <italic>S. dochna</italic> and <italic>S. bicolor</italic>. <bold>(B)</bold> Venn diagram of the protein families. tgl: <italic>S. dochna</italic> (<italic>S. bicolor dochna</italic>), sbi: <italic>S. bicolor</italic> (<italic>S. bicolor bicolor</italic>), osa: rice (<italic>Oryza sativa</italic>), zma: maize (<italic>Zea mays)</italic>, ssp: sugarcane (<italic>Saccharum spontaneum</italic>), and bol: kale (<italic>Brassica oleracea</italic>).</p>
</caption>
<graphic xlink:href="fgene-13-844385-g004.tif"/>
</fig>
</sec>
<sec id="s3-5-2">
<title>Gene Protein Family Identification and Positive Selection Gene Analysis</title>
<p>A protein family is a group of proteins with certain similarities in sequence and function. A protein family clustering analysis (of predicted gene proteins) groups proteins with identical or similar functions together, thus reducing the complexity of further analyses. The comparison with exogenous organisms also helps to understand and predict the gene functions. In the current genome of the Gramineae members <italic>S. bicolor</italic>, <italic>S. officinarum</italic>, <italic>Z. mays</italic>, and <italic>O. sativa</italic>, which are closely related to <italic>S. dochna</italic>, since they have substantial continuity in genome assembly continuity, they are selected for the protein family analysis. Among them, <italic>S. officinarum</italic> and <italic>S. dochna</italic> have the same biological characteristics of high sugar content. Simultaneously, distant species <italic>Brassica oleracea</italic> was selected for comparison. <xref ref-type="fig" rid="F4">Figure 4B</xref> shows the Venn diagram of protein clustering in <italic>S. dochna</italic> and other species. Among them, 969 gene families were specific to <italic>S. dochna</italic>. A total of 1,440 were specific to <italic>O. sativa</italic>, 1,714 were specific to <italic>Z. mays</italic>, 436 were specific to <italic>S. bicolor</italic>, 5,572 were specific to <italic>S. officinarum</italic>, and 4,369 were specific to <italic>B. oleracea</italic>. Distributions of the numbers of single-copy genes, multi-copy genes, endemic genes, and other types of genes per species are shown in <xref ref-type="sec" rid="s10">Supplementary Figure S2</xref>.</p>
<p>Phylogenetic trees that were constructed based on protein clustering results showed that <italic>S. dochna</italic> was closer to <italic>S. bicolor</italic>, while it was the most distant from <italic>B. oleracea</italic> (<xref ref-type="fig" rid="F5">Figure 5</xref>). Based on the differentiation time of species, <italic>S. bicolor</italic> and <italic>S. dochna</italic> diverged from sugarcane (<italic>Saccharum officinarum</italic>) 9.2 and 1.4 million years ago, respectively. A cafe software analysis showed that 191 gene families were significantly expanded, while 3,794 were significantly contracted in <italic>S. dochna</italic> (tgl) after the family-wide <italic>p</italic>-value threshold was 0.05. The result of GO enrichment in expanded gene families is shown in <xref ref-type="sec" rid="s10">Supplementary Figure S3</xref>. A GO function enrichment analysis of these gene families revealed that the expanded gene families were primarily clustered in the metabolic process, DNA reconstruction, and DNA binding among others (<xref ref-type="sec" rid="s10">Supplementary Figure S5</xref>). The positive selection analysis model with <italic>S. dochna</italic> as the foreground branch and other species as the background branch was established. Finally, we obtained four significant positive selected genes. The GO enrichment analysis showed that these positive selection genes were primarily clustered in organic cyclic compound binding, nucleic acid binding, nucleotidyl transferase activity, and tRNA methylation among others (<xref ref-type="sec" rid="s10">Supplementary Figures S5, S6</xref>). One significant positive selection gene was clustered in peptidyl-prolyl <italic>cis-trans</italic> isomerase (PPIase) activity. The GO enrichment information is shown in <xref ref-type="sec" rid="s10">Supplementary Table S8</xref>.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Phylogenetic tree of the species. In the analysis that estimated the time of differentiation of species, the branch length obtained is the base replacement rate, and after the analysis of species differentiation time, the branch length is the time in million years. <italic>O. sativa</italic>: <italic>Oryza sativa</italic>. <italic>Z. mays</italic>: <italic>Zea mays</italic>. <italic>S. bicolor1</italic>: <italic>Sorghum dochna</italic>. <italic>S. bicolor</italic>: <italic>Sorghum bicolor. S. spontaneum</italic>: <italic>Saccharum spontaneum. B. oleracea</italic>: <italic>Brassica oleracea.</italic>
</p>
</caption>
<graphic xlink:href="fgene-13-844385-g005.tif"/>
</fig>
<p>According to the species differentiation time, <italic>S. dochna</italic> and <italic>S. bicolor</italic> diverged 1.4 million years ago. During this period, the temperature of the Earth was lower by 5&#x2013;10&#xb0;. Since then, the Earth has undergone several alterations in climate. <italic>S. bicolor</italic> is native to Africa, while <italic>S. dochna</italic> is native to India/Myanmar, which is currently separated by the Indian Ocean (<xref ref-type="bibr" rid="B7">Dutt, 1999</xref>). Therefore, it is hypothesized that the formation and differentiation of the two <italic>S. bicolor</italic> species could be related to the climate and tectonic plate movement at that time (<xref ref-type="bibr" rid="B2">Chase, 1978</xref>). However, we did not explore the similarities in physiological functions and gene family clustering between the two <italic>S. bicolor</italic> species in more detail. Therefore, further studies should be conducted to fully elucidate their specific biological properties (<xref ref-type="bibr" rid="B12">Hahn et al., 2005</xref>).</p>
<p>During the GO enrichment analysis, one significant positive selection gene was clustered in peptidyl-prolyl <italic>cis-trans</italic> isomerase (PPIase) activity (<xref ref-type="sec" rid="s10">Supplementary Figure S6</xref>). PPIase can catalyze the conformation of protein substrates or the N-terminal of proline residues in the polypeptide from a homeopathic structure to a trans structure (<xref ref-type="bibr" rid="B21">Maruyama et al., 2000</xref>). This type of protein can also improve the stress resistance of plants when they are in adversity and pass on the stress resistance to future generations. Therefore, it is hypothesized that the high stress resistance of <italic>S. dochna</italic> is related to the positive selection of this gene. Other positive selection genes were clustered in tRNA methylation. tRNA methylation primarily occurs in the nitrogen atom of tRNA and can also occur in the oxygen atom of the 2&#x2032; hydroxyl of nucleotide ribose ring (<xref ref-type="bibr" rid="B11">Gustilo et al., 2008</xref>). In addition, the 5&#x2032; carbon atom on purine and the 2&#x2032; and 8&#x2032; carbon atoms on adenosine have also been identified. The methylation phenomenon is primarily related to protein translation and the stability of tRNA (<xref ref-type="bibr" rid="B22">Motorin and Helm, 2011</xref>). In addition, for organic cyclic compound binding, <italic>S. dochna</italic> is a high-quality bio-energy crop with high sugar content. Most sugar structures are constituted with organic cyclic compounds, such as furan and pyran. Therefore, we hypothesized that during the evolution of sweet sorghum, positive selection genes were enriched in the binding of organic cyclic compounds, which could be used in the synthesis of sugars (<xref ref-type="bibr" rid="B20">Lingle et al., 2012</xref>).</p>
</sec>
<sec id="s3-5-3">
<title>Whole-Genome Duplication Analysis</title>
<p>Whole-genome duplication (WGD) is often associated with a rapid loss of repeated fragments and chromosome rearrangements. Notably, it provides new materials for the evolution of organisms, particularly plants, which assists them in their adaptation to new environments. A whole-genome duplication analysis performed on the pan-genome of <italic>S. dochna</italic> (<xref ref-type="fig" rid="F5">Figure 5</xref>) revealed gene replication and loss and a sudden increase in the Ks within a certain period (shown as a peak), suggesting that a WGD event could have occurred. Otherwise, loss occurred (shown as a smooth decline).</p>
<p>The Ks of ortholog gene pairs between the <italic>S. dochna</italic> genome and those of related species were searched for a density distribution map. The Ks distribution of orthologs (<xref ref-type="fig" rid="F6">Figures 6A,B</xref>) suggested that a WGD event occurred in <italic>S. dochna</italic> as in other species of the Gramineae family. As shown in <xref ref-type="fig" rid="F5">Figure 5A</xref>, two differentiation events occurred in <italic>S. dochna</italic> when the Ks values were 0.1 and 0.8. Simultaneously, <xref ref-type="fig" rid="F5">Figure 5B</xref> shows that the green one represents the differentiation event of <italic>S. dochna</italic> and <italic>B. oleracea</italic>, and <italic>S. dochna</italic> and <italic>B. oleracea</italic> had the highest Ks values. Therefore, the WGD event occurred the earliest in these two species, followed by <italic>S. dochna</italic> and <italic>O. sativa</italic>, while the differentiation of <italic>O. sativa</italic>, <italic>B. oleracea</italic>, and <italic>S. dochna</italic> occurred relatively late. Thus, the Ks value was relatively low.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>
<bold>(A)</bold> Ks distribution map of <italic>Oryza sativa</italic>. The peaks indicate that WGD events occurred during the evolution of species. <bold>(B)</bold> Ks distribution map of the complete genome replication of <italic>Sorghum dochna</italic> and its related species. Tgl: <italic>S. dochna</italic>. WGD, whole-genome duplication.</p>
</caption>
<graphic xlink:href="fgene-13-844385-g006.tif"/>
</fig>
</sec>
</sec>
</sec>
<sec sec-type="conclusion" id="s4">
<title>Conclusion</title>
<p>In this study, we used PacBio long reads, Illumina short reads, and Hi-C sequences to assemble the <italic>S. dochna</italic> genome and analyze its gene family and relationship with other species. Our findings provide a preliminary understanding of the <italic>S. dochna</italic> genome. A high-quality chromosome assembly was achieved using PacBio long reads, Illumina short reads, and Hi-C sequences. The genome size of <italic>S. dochna</italic> is 777&#xa0;Mb, with a contig N50 of 553.5&#xa0;kb and a GC content of 43.9%. The coding gene analysis revealed 37,971 genes and 39,937 transcripts in the <italic>S. dochna</italic> genome.</p>
<p>The genome comparison indicated that <italic>S. dochna</italic> and <italic>S. bicolor</italic> had the strongest colinearity. GO enrichment revealed that the positive selection genes primarily clustered in organic cyclic compound binding, nucleic acid binding, nucleotide transferase activity, and tRNA methylation among others. However, the synthetic pathway of sugar production in <italic>S. dochna</italic> is still unclear (<xref ref-type="bibr" rid="B14">Hakim and Wijaya, 2009</xref>). Thus, subsequent studies on genome exploration should focus on the transcriptome and proteome of <italic>S. dochna</italic>. In addition, only one variety of <italic>S. dochna</italic> was used in this study. Cognizant of this, future studies should use multiple varieties to comparatively analyze the species and construct reference-quality genome sequences.</p>
</sec>
</body>
<back>
<sec id="s5" sec-type="data-availability">
<title>Data Availability Statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found in the article/<xref ref-type="sec" rid="s10">Supplementary Material</xref>.</p>
</sec>
<sec id="s6">
<title>Author Contributions</title>
<p>YuC, ZW and GY conceived and designed this research. YuC analyzed data and wrote the manuscript. YuC, YZ, HW and JS executed the data analyses. JS participated in the discussion of the results. LM, HS, FM, ZZ, YaC and JH collected samples. GY, JH contributed to the evaluation and discussion of the results and manuscript revisions. All authors have read and approved the final version.</p>
</sec>
<sec id="s7">
<title>Funding</title>
<p>This study was supported by the National Nature Science Foundation of China (U1906201), Shandong Forage Research System (SDAIT-23-01), China Agriculture Research System (CARS-34), and the First Class Grassland Science Discipline Program of Shandong Province (1619002), China.</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of Interest</title>
<p>Author JW was employed by the company Berry Genomics Corporation.</p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors, and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ack>
<p>The authors would like to thank GY, ZW, and JS (Professor of Grassland Science, Qingdao Agricultural University) for their help in data analysis and article writing. We are grateful for the scientific research funding provided by the College of Grassland Science of Qingdao Agricultural University and the experimental help provided by Beijing Berry and Kang.</p>
</ack>
<sec id="s10">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fgene.2022.844385/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fgene.2022.844385/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Image4.JPEG" id="SM1" mimetype="application/JPEG" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image5.JPEG" id="SM2" mimetype="application/JPEG" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image2.PNG" id="SM3" mimetype="application/PNG" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table1.XLSX" id="SM4" mimetype="application/XLSX" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image1.PNG" id="SM5" mimetype="application/PNG" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image3.PNG" id="SM6" mimetype="application/PNG" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image6.JPEG" id="SM7" mimetype="application/JPEG" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Antonopoulou</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Gavala</surname>
<given-names>H. N.</given-names>
</name>
<name>
<surname>Skiadas</surname>
<given-names>I. V.</given-names>
</name>
<name>
<surname>Angelopoulos</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Lyberatos</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Biofuels Generation from Sweet Sorghum: Fermentative Hydrogen Production and Anaerobic Digestion of the Remaining Biomass</article-title>. <source>Bioresour. Technol.</source> <volume>99</volume>, <fpage>110</fpage>&#x2013;<lpage>119</lpage>. <pub-id pub-id-type="doi">10.1016/j.biortech.2006.11.048</pub-id> </citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chase</surname>
<given-names>C. G.</given-names>
</name>
</person-group> (<year>1978</year>). <article-title>Plate Kinematics: The Americas, East Africa, and the Rest of the World</article-title>. <source>Earth Planet. Sci. Lett.</source> <volume>37</volume>, <fpage>355</fpage>&#x2013;<lpage>368</lpage>. <pub-id pub-id-type="doi">10.1016/0012-821x(78)90051-1</pub-id> </citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chikhi</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Medvedev</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Informed and Automated K-Mer Size Selection for Genome Assembly</article-title>. <source>Bioinformatics</source> <volume>30</volume>, <fpage>31</fpage>&#x2013;<lpage>37</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btt310</pub-id> </citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Delcher</surname>
<given-names>A. L.</given-names>
</name>
<name>
<surname>Salzberg</surname>
<given-names>S. L.</given-names>
</name>
<name>
<surname>Phillippy</surname>
<given-names>A. M.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>Using MUMmer to Identify Similar Regions in Large Sequence Sets</article-title>. <source>Curr. Protoc. Bioinforma</source> <volume>10</volume>. <pub-id pub-id-type="doi">10.1002/0471250953.bi1003s00</pub-id> </citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dudchenko</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Batra</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Omer</surname>
<given-names>A. D.</given-names>
</name>
<name>
<surname>Nyquist</surname>
<given-names>S. K.</given-names>
</name>
<name>
<surname>Hoeger</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Durand</surname>
<given-names>N. C.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>De Novo assembly of the <italic>Aedes aegypti</italic> Genome Using Hi-C Yields Chromosome-Length Scaffolds</article-title>. <source>Science</source> <volume>356</volume>, <fpage>92</fpage>&#x2013;<lpage>95</lpage>. <pub-id pub-id-type="doi">10.1126/science.aal3327</pub-id> </citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Durand</surname>
<given-names>N. C.</given-names>
</name>
<name>
<surname>Shamim</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Machol</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Rao</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Huntley</surname>
<given-names>M. H.</given-names>
</name>
<name>
<surname>Lander</surname>
<given-names>E. S.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Juicer Provides a One-Click System for Analyzing Loop-Resolution Hi-C Experiments</article-title>. <source>Cell. Syst.</source> <volume>3</volume>, <fpage>95</fpage>&#x2013;<lpage>98</lpage>. <pub-id pub-id-type="doi">10.1016/j.cels.2016.07.002</pub-id> </citation>
</ref>
<ref id="B7">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Dutt</surname>
<given-names>V. P.</given-names>
</name>
</person-group> (<year>1999</year>). <source>India&#x27;s Foreign Policy in a Changing World</source>. <publisher-loc>Vikas Publishing House</publisher-loc>. </citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Erdei</surname>
<given-names>&#xc9;.</given-names>
</name>
<name>
<surname>Pep&#xf3;</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Csap&#xf3;</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>T&#xf3;th</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Szab&#xf3;</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Sweet Sorghum (Sorghum Dochna L.) Restorer Lines Effects on Nutritional Parameters of Stalk Juice</article-title>. <source>Acta Agrar. debr.</source>, <fpage>51</fpage>&#x2013;<lpage>56</lpage>. <pub-id pub-id-type="doi">10.34101/actaagrar/36/2792</pub-id> </citation>
</ref>
<ref id="B9">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Feng</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Portik</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2021</year>). <source>Metagenome Assembly of High-Fidelity Long Reads with Hifiasm-Meta</source>. <publisher-loc>Arxiv E-Prints</publisher-loc>. </citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gnansounou</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Dauriat</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Wyman</surname>
<given-names>C. E.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Refining Sweet Sorghum to Ethanol and Sugar: Economic Trade-Offs in the Context of North China</article-title>. <source>Bioresour. Technol.</source> <volume>96</volume>, <fpage>985</fpage>&#x2013;<lpage>1002</lpage>. <pub-id pub-id-type="doi">10.1016/j.biortech.2004.09.015</pub-id> </citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gustilo</surname>
<given-names>E. M.</given-names>
</name>
<name>
<surname>Vendeix</surname>
<given-names>F. A.</given-names>
</name>
<name>
<surname>Agris</surname>
<given-names>P. F.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>tRNA&#x27;s Modifications Bring Order to Gene Expression</article-title>. <source>Curr. Opin. Microbiol.</source> <volume>11</volume>, <fpage>134</fpage>&#x2013;<lpage>140</lpage>. <pub-id pub-id-type="doi">10.1016/j.mib.2008.02.003</pub-id> </citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hahn</surname>
<given-names>M. W.</given-names>
</name>
<name>
<surname>De Bie</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Stajich</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Nguyen</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Cristianini</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Estimating the Tempo and Mode of Gene Family Evolution from Comparative Genomic Data</article-title>. <source>Genome Res.</source> <volume>15</volume>, <fpage>1153</fpage>&#x2013;<lpage>1160</lpage>. <pub-id pub-id-type="doi">10.1101/gr.3567505</pub-id> </citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hahn</surname>
<given-names>M. W.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>M. V.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>S.-G.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Gene Family Evolution across 12 Drosophila Genomes</article-title>. <source>PLoS Genet.</source> <volume>3</volume>, <fpage>e197</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pgen.0030197</pub-id> </citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hakim</surname>
<given-names>L. A.</given-names>
</name>
<name>
<surname>Wijaya</surname>
<given-names>I. H.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Production of Bioethanol from Sweet Sorghum: A Review</article-title>. <source>Afr. J. Agric. Res.</source> <volume>4</volume>, <fpage>772</fpage>&#x2013;<lpage>780</lpage>. <pub-id pub-id-type="doi">10.1021/jf9024163</pub-id> </citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ian</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Gene Finding in Novel Genomes</article-title>. <source>BMC Bioinforma.</source> <volume>5</volume>, <fpage>59</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-5-59</pub-id> </citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jin</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bian</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A Chromosome-Level Genome Assembly of the Oriental River Prawn, Macrobrachium Nipponense</article-title>. <source>GigaScience</source> <volume>10</volume>, <fpage>1&#x2013;9</fpage>. <pub-id pub-id-type="doi">10.1093/gigascience/giaa160</pub-id> </citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Koren</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Rhie</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Walenz</surname>
<given-names>B. P.</given-names>
</name>
<name>
<surname>Dilthey</surname>
<given-names>A. T.</given-names>
</name>
<name>
<surname>Bickhart</surname>
<given-names>D. M.</given-names>
</name>
<name>
<surname>Kingan</surname>
<given-names>S. B.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Assembly of happy-resolved genomes with trio binning</article-title>. <source>Nat. Biotechnol.</source> <volume>36</volume>, <fpage>1174</fpage>&#x2013;<lpage>1182</lpage>. </citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Krzywinski</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Schein</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Birol</surname>
<given-names>&#x130;.</given-names>
</name>
<name>
<surname>Connors</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Gascoyne</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Horsman</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2009</year>). <article-title>Circos: An Information Aesthetic for Comparative Genomics</article-title>. <source>Genome Res.</source> <volume>19</volume>, <fpage>1639</fpage>&#x2013;<lpage>1645</lpage>. <pub-id pub-id-type="doi">10.1101/gr.092759.109</pub-id> </citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Handsaker</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Wysoker</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Fennell</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Ruan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Homer</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2009</year>). <article-title>The Sequence Alignment/Map Format and SAMtools</article-title>. <source>Bioinformatics</source> <volume>25</volume>, <fpage>2078</fpage>&#x2013;<lpage>2079</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp352</pub-id> </citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Stoeckert</surname>
<given-names>C. J.</given-names>
</name>
<name>
<surname>Roos</surname>
<given-names>D. S.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>OrthoMCL: Identification of Ortholog Groups for Eukaryotic Genomes</article-title>. <source>Genome Res.</source> <volume>13</volume>, <fpage>2178</fpage>&#x2013;<lpage>2189</lpage>. <pub-id pub-id-type="doi">10.1101/gr.1224503</pub-id> </citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lingle</surname>
<given-names>S. E.</given-names>
</name>
<name>
<surname>Tew</surname>
<given-names>T. L.</given-names>
</name>
<name>
<surname>Rukavina</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Boykin</surname>
<given-names>D. L.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Post-harvest Changes in Sweet Sorghum I: Brix and Sugars</article-title>. <source>Bioenerg. Res.</source> <volume>5</volume>, <fpage>158</fpage>&#x2013;<lpage>167</lpage>. <pub-id pub-id-type="doi">10.1007/s12155-011-9164-0</pub-id> </citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maruyama</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Furutani</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Furutani</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>Archaeal Peptidyl Prolyl Cis-Trans Isomerases (PPIases)</article-title>. <source>Front. Biosci.</source> <volume>5</volume>, <fpage>D821</fpage>&#x2013;<lpage>D836</lpage>. <pub-id pub-id-type="doi">10.2741/maruyama</pub-id> </citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Motorin</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Helm</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>RNA Nucleotide Methylation</article-title>. <source>WIREs RNA</source> <volume>2</volume>, <fpage>611</fpage>&#x2013;<lpage>631</lpage>. <pub-id pub-id-type="doi">10.1002/wrna.79</pub-id> </citation>
</ref>
<ref id="B23">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Nurk</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Walenz</surname>
<given-names>B. P.</given-names>
</name>
<name>
<surname>Rhie</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Vollger</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>Koren</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <source>HiCanu: Accurate Assembly of Segmental Duplications, Satellites, and Allelic Variants from High-Fidelity Long Reads</source>. <publisher-loc>Havard university</publisher-loc>. </citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Paterson</surname>
<given-names>A. H.</given-names>
</name>
<name>
<surname>Bowers</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Bruggmann</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Dubchak</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Grimwood</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Gundlach</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2009</year>). <article-title>The Sorghum Bicolor Genome and the Diversification of Grasses</article-title>. <source>Nature</source> <volume>457</volume>. <pub-id pub-id-type="doi">10.1038/nature07723</pub-id> </citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Scott</surname>
<given-names>M. G.</given-names>
</name>
<name>
<surname>Madden</surname>
<given-names>T. L.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>BLAST: at the Core of a Powerful and Diverse Set of Sequence Analysis Tools</article-title>. <source>Nucleic Acids Res.</source> <volume>32</volume>, <fpage>W20&#x2013;W25</fpage>. </citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sim</surname>
<given-names>S. B.</given-names>
</name>
<name>
<surname>Corpuz</surname>
<given-names>R. L.</given-names>
</name>
<name>
<surname>Simmonds</surname>
<given-names>T. J.</given-names>
</name>
<name>
<surname>Geib</surname>
<given-names>S. M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>HiFiAdapterFilt, a Memory Efficient Read Processing Pipeline, Prevents Occurrence of Adapter Sequence in PacBio HiFi Reads and Their Negative Impacts on Genome Assembly</article-title>. <source>BMC Genomics</source> <volume>23</volume>, <fpage>157</fpage>. <pub-id pub-id-type="doi">10.1186/s12864-022-08375-1</pub-id> </citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vanneste</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Van de Peer</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Maere</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Inference of Genome Duplications from Age Distributions Revisited</article-title>. <source>Mol. Biol. Evol.</source> <volume>30</volume> (<issue>1</issue>), <fpage>177</fpage>&#x2013;<lpage>190</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/mss214</pub-id> </citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vurture</surname>
<given-names>G. W.</given-names>
</name>
<name>
<surname>Sedlazeck</surname>
<given-names>F. J.</given-names>
</name>
<name>
<surname>Nattestad</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Underwood</surname>
<given-names>C. J.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Gurtowski</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>GenomeScope: Fast Reference-free Genome Profiling from Short Reads</article-title>. <source>Bioinformatics</source> <volume>33</volume>, <fpage>2202</fpage>&#x2013;<lpage>2204</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btx153</pub-id> </citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Waterhouse</surname>
<given-names>R. M.</given-names>
</name>
<name>
<surname>Seppey</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Simao</surname>
<given-names>F. A.</given-names>
</name>
<name>
<surname>Manni</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Loannidis</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Klioutchnikov</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>BUSCO Applications from Quality Assessments to Gene Prediction and Phylogenomics</article-title>. <source>Mol. Biol. Evol.</source> <volume>35</volume>, <fpage>543</fpage>&#x2013;<lpage>548</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msx319</pub-id> </citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiong</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>X.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Recent Advances on the Development and Utilization of Molecular Markers Based on LTR Retrotransposons and MITE Transposons from Peanut(<italic>Arachis hypogaea</italic> L.)</article-title>. <source>Mol. Plant Breed.</source> <volume>2</volume>. </citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Hua</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>X.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Allele-defined Genome of the Autopolyploid Sugarcane <italic>Saccharum Spontaneum L</italic>
</article-title>. <source>Nat. Genet.</source> <volume>50</volume>, <fpage>1565</fpage>&#x2013;<lpage>1573</lpage>. <pub-id pub-id-type="doi">10.1038/s41588-018-0237-2</pub-id> </citation>
</ref>
</ref-list>
</back>
</article>