<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Archiving and Interchange DTD v2.3 20070202//EN" "archivearticle.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="data-paper" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mar. Sci.</journal-id>
<journal-title>Frontiers in Marine Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mar. Sci.</abbrev-journal-title>
<issn pub-type="epub">2296-7745</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmars.2022.1067526</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Marine Science</subject>
<subj-group>
<subject>Data Report</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>High-quality chromosome-level genome assembly of Pacific cod, <italic>Gadus macrocephalus</italic>
</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Ma</surname>
<given-names>Yiqing</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Yuan</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/671567"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Jiang</surname>
<given-names>Chen</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/753004"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zheng</surname>
<given-names>Li</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1559918"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Shenghao</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1502061"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Zhao</surname>
<given-names>Linlin</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1563822"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Key Laboratory of Marine Eco-Environmental Science and Technology, First Institute of Oceanography, Ministry of Natural Resources</institution>, <addr-line>Qingdao</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>School of Advanced Manufacturing, Fuzhou University</institution>, <addr-line>Jinjiang</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Laboratory of Marine Biodiversity, Third Institute of Oceanography, Ministry of Natural Resources</institution>, <addr-line>Xiamen</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Key Laboratory of Mariculture &amp; Stock Enhancement in North China&#x2019;s Sea, Ministry of Agriculture, Dalian Ocean University</institution>, <addr-line>Dalian</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Montse P&#xe9;rez, Spanish Institute of Oceanography (IEO), Spain</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Chenguang Feng, Northwestern Polytechnical University, China; Guanpin Yang, Ocean University of China, China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Linlin Zhao, <email xlink:href="mailto:zhaolinlin@fio.org.cn">zhaolinlin@fio.org.cn</email>
</p>
</fn>
<fn fn-type="other" id="fn002">
<p>This article was submitted to Marine Fisheries, Aquaculture and Living Resources, a section of the journal Frontiers in Marine Science</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>05</day>
<month>12</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>9</volume>
<elocation-id>1067526</elocation-id>
<history>
<date date-type="received">
<day>12</day>
<month>10</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>17</day>
<month>11</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2022 Ma, Li, Jiang, Zheng, Liu and Zhao</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Ma, Li, Jiang, Zheng, Liu and Zhao</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<kwd-group>
<kwd>
<italic>Gadus macrocephalus</italic>
</kwd>
<kwd>genome sequencing</kwd>
<kwd>chromosomal assembly</kwd>
<kwd>phylogenetics</kwd>
<kwd>comparative genomics</kwd>
</kwd-group>
<contract-sponsor id="cn001">National Natural Science Foundation of China<named-content content-type="fundref-id">10.13039/501100001809</named-content>
</contract-sponsor>
<counts>
<fig-count count="1"/>
<table-count count="1"/>
<equation-count count="0"/>
<ref-count count="36"/>
<page-count count="7"/>
<word-count count="3280"/>
</counts>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<title>Introduction</title>
<p>
<italic>Gadus macrocephalus</italic> (Pacific cod) is a species of the order Gadiformes, the family Gadidae and the genus <italic>Gadus</italic>. It is a typical demersal fish species that is widely distributed along the coastal waters of the Pacific, from the Yellow Sea, the Bohai Sea and the Sea of Japan in the Northwest Pacific Ocean through the Bering Sea to California in the Northeast Pacific Ocean (<xref ref-type="bibr" rid="B17">Hart and Clemens, 1973</xref>). It is a cold-water fish species with habitat temperatures &lt; 15&#xb0;C (<xref ref-type="bibr" rid="B28">Sakurai, 2007</xref>). The spawning period of <italic>G. macrocephalus</italic> is approximately January to March, and there is only one spawning once a year (<xref ref-type="bibr" rid="B34">Wu et&#xa0;al., 2021</xref>). <italic>G. macrocephalus</italic> migrate from the deep-sea areas (approximately 200&#xa0;m) to the coastal areas (30-50&#xa0;m) during the spawning period (<xref ref-type="bibr" rid="B34">Wu et&#xa0;al., 2021</xref>). This species released all mature eggs with a relatively short time, which occurs at temperatures of 6.4-7.6&#xb0;C (<xref ref-type="bibr" rid="B29">Sakurai and Hattori, 1996</xref>). The species is commercially important because of its abundance and high nutritional value. In recent decades, there has been an increasing tendency of the global capture of <italic>G. macrocephalus</italic> (<xref ref-type="bibr" rid="B14">FAO, 2020</xref>). The third generation sequencing technology is developing rapidly as an essential sequencing technology, it is also known for its prominent throughput and long-read sequencing, which could be conducive to the generation of highly contiguous genome sequences (<xref ref-type="bibr" rid="B22">Lang et&#xa0;al., 2020</xref>). Genome data, containing abundant genetic information, are considered fundamental for revealing the biological characteristics and evolutionary mechanisms of fish (<xref ref-type="bibr" rid="B16">Han et&#xa0;al., 2021</xref>). For now, two chromosome-level genomes of <italic>Gadus</italic> (<xref ref-type="bibr" rid="B27">Noh et&#xa0;al., 2022</xref>; <uri xlink:href="https://www.ncbi.nlm.nih.gov/data-hub/genome/GCF_902167405.1/">https://www.ncbi.nlm.nih.gov/data-hub/genome/GCF_902167405.1/</uri>) are publicly available, but high-quality genomic data for <italic>G. macrocephalus</italic> have been limited.</p>
<sec id="s1_1">
<title>Significance of the data</title>
<p>Based on PacBio HiFi long-read sequencing and Hi-C (High-throughput chromosome conformation capture) technology, we constructed a nearly complete genome of <italic>G. macrocephalus</italic> with 23 chromosomes. In total, 23,843 protein-coding genes were obtained by gene prediction. Protein-coding genes of <italic>G. macrocephalus</italic> and 12 species were used for comparative genomics analysis, for example, phylogenetic analysis, analysis of gene family expansion and contraction. This report provides the genomic characteristics of <italic>G. macrocephalus</italic> and elucidates the evolutionary relationship and divergence time of the order Gadiformes. These resources will be valuable for phylogenetic research, artificial breeding and fisheries management.</p>
</sec>
</sec>
<sec id="s2" sec-type="materials|methods">
<title>Materials and methods</title>
<sec id="s2_1">
<title>Sampling and sequencing</title>
<p>One female <italic>G. macrocephalus</italic> was sampled from the Yellow Sea in January 2022. The muscle, brain, gonad, liver, spleen and heart were stored at -80&#xb0;C. The total genomic DNA of the muscle tissue was extracted using the DNeasy Blood and Tissue Kit (Qiagen, Germany). The DNA samples were subjected to genomic sequencing to generate short and long reads. For short-read sequencing, Covaris M220 was used to break DNA into 300-350 bp fragments. DNA libraries preparation were completed by terminal repair, an A-tail addition, sequencing junction addition, DNA purification and bridge PCR. Subsequently, based on a paired-end sequencing strategy, these libraries were sequenced on the Illumina NovaSeq Nova 6000 platform. For long-read sequencing, according to the Pacific Biosciences (PacBio) standard protocol, a PacBio library was generated using a SMRTbell Template Prep Kit. Subsequently, these libraries were sequenced on the PacBio Sequel II platform, and long reads were generated from one SMRT cell. On the Illumina NovaSeq 6000 platform, a Hi-C library was sequenced and Hi-C data were generated. The total RNA of the six tissues (muscle, brain, gonad, liver, spleen and heart) was extracted using the RNeasy mini Kit (Qiagen, Germany). Then, RNA libraries were constructed for PE150 sequencing, and short reads were produced on the Illumina NovaSeq 6000 platform.</p>
</sec>
<sec id="s2_2">
<title>Genome assembly and evaluation</title>
<p>To assemble the contig-level genome, long-read sequencing data from the PacBio platform were assembled using Hifiasm v0.16.1-r375 (<xref ref-type="bibr" rid="B7">Cheng et&#xa0;al., 2021</xref>) with the default parameters. Subsequently, the assembled contigs were subjected to sequence polishing using NextPolish software (<xref ref-type="bibr" rid="B18">Hu et&#xa0;al., 2020</xref>) to reduce base errors. (non-default parameter: task = best; rewrite = yes; sgs_options = -max_depth 100 -bwa; lgs_options = -min_read_len 1k -max_depth 100; lgs_minimap2_options = -x map-pb -t {multithread_jobs}). To assemble the chromosome-level genome, Hi-C sequencing data were mapped and sorted against the draft genome assembly with Juicer v1.6 software (<xref ref-type="bibr" rid="B11">Durand et&#xa0;al., 2016a</xref>) (non-default parameter: -s MboI). Subsequently, the contigs were linked to form 23 chromosomes by using 3D-DNA v180922 software (<xref ref-type="bibr" rid="B9">Dudchenko et&#xa0;al., 2017</xref>) with the default parameters. Finally, based on chromosome interactions, the contig orientation was corrected and suspicious fragments were removed from the contigs in the Juicebox software (<xref ref-type="bibr" rid="B10">Durand et&#xa0;al., 2016b</xref>). Benchmarking Universal Single-Copy Orthologs (BUSCO) v5.2.2 (<xref ref-type="bibr" rid="B24">Manni et&#xa0;al., 2021</xref>) was using to evaluate the completeness and accuracy of the genome assembly (non-default parameter: -m genome; -i <italic>Gadus_macrocephalus</italic>.chr.v1.fa; -l actinopterygii_odb10). The reference BUSCO database was actinopterygii_odb10.</p>
</sec>
<sec id="s2_3">
<title>Repeat and transposable element annotation</title>
<p>Repetitive sequences were annotated by homology alignment and <italic>de novo</italic> prediction. For homologous alignment, we used RepeatMasker v4.1.2-p1 (<uri xlink:href="http://www.repeatmasker.org">http://www.repeatmasker.org</uri>) and repeat-proteinmask v4.1.0 (<uri xlink:href="http://www.repeatmasker.org">http://www.repeatmasker.org</uri>) to annotate the transposable elements (TEs), with the genome sequences employed as queries against the Repbase database library. For <italic>de novo</italic> prediction, we used RepeatModeler v2.0.3 (<uri xlink:href="http://www.repeatmasker.org/RepeatModeler">http://www.repeatmasker.org/RepeatModeler</uri>), LTR_Finder v1.07 (<xref ref-type="bibr" rid="B35">Xu and Wang, 2007</xref>) and RepeatScout v1.0.5 (<uri xlink:href="http://www.repeatmasker.org">http://www.repeatmasker.org</uri>) to construct a <italic>de novo</italic> repeat library with the default parameters. We used Tandem Repeats Finder (Trf) v4.07b (<xref ref-type="bibr" rid="B2">Benson, 1999</xref>) to annotate the tandem repeat elements. Finally, the results of all methods were integrated. After eliminating redundancy, we obtained the final annotated repeat sets.</p>
</sec>
<sec id="s2_4">
<title>Gene prediction and functional annotation</title>
<p>After masking repetitive sequences, protein-coding genes were predicted through a combination of homology-based, RNA-seq-based, and <italic>de novo</italic> prediction. For the homolog-based method, we downloaded the protein-coding sequences of <italic>G. morhua</italic> (Atlantic cod), <italic>G. chalcogrammus</italic> (Walleye pollock), <italic>Lota lota</italic> (Burbot), and <italic>Danio rerio</italic> (Zebrafish) from databases (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;1</bold>
</xref>) and retained the longest transcript of each gene for further analysis. Subsequently, we used tBLASTn to align protein-coding sequences to the genome (E-value: 1e-5). Based on the results described above, we used GeneWise v2.4.1 (<xref ref-type="bibr" rid="B3">Birney et&#xa0;al., 2004</xref>) to predict gene structures. For the RNA-seq-based method, transcriptomic data were assembled using Trinity v2.11 (<xref ref-type="bibr" rid="B15">Grabherr et&#xa0;al., 2011</xref>) with the default parameters. To detect gene structure, we used BlastN to align transcriptome to the genome (E-value: 1e-5). For <italic>de novo</italic> prediction, transcriptomic data generated from the Illumina platform were aligned to the genome by using HISAT2 v2.2.1 (<xref ref-type="bibr" rid="B20">Kim et&#xa0;al., 2019</xref>). Subsequently, based on the alignment results, Augustus v3.4.0 (<xref ref-type="bibr" rid="B31">Stanke et&#xa0;al., 2006</xref>), GlimmerHMM v3.0.4 (<xref ref-type="bibr" rid="B23">Majoros et&#xa0;al., 2004</xref>) and Genscan v1.0 (<xref ref-type="bibr" rid="B4">Burge and Karlin, 1997</xref>) were used to generate <italic>de novo-</italic>predicted gene sets. Three gene models were combined and redundant genes were removed with Maker v2.31.10 (<xref ref-type="bibr" rid="B5">Carson and Mark, 2011</xref>) (non-default parameter: est&#xa0;= transcriptome.fasta; protein = protein.fasta; softmask = 1; run_evm = 0; est2genome = 1; protein2genome = 1; trna = 0; cpus = 1; max_dna_len = 100000; min_contig = 10000; pred_flank = 200; pred_stats=0; AED_threshold = 1; min_protein = 50; alt_splice = 0; always_complete = 1). Finally, all protein-coding genes were integrated <italic>via</italic> the HiCESAP pipeline. For gene functional annotation, we used multiple databases, including NCBI Non-Redundant (NR), Swiss-Prot, TrEMBL, InterPro database, Gene Ontology (GO) pathway, Kyoto Encyclopedia of Genes and Genomes (KEGG) pathway, to annotate the function of protein-coding genes. We used the NR, Swiss-Prot and TrEMBL (<xref ref-type="bibr" rid="B1">Bairoch and Apweiler, 2000</xref>) databases to annotate the function of the protein-coding genes by using BlastP (E-value: 1e-5). InterProScan v5.55-88.0 (<xref ref-type="bibr" rid="B19">Jones et&#xa0;al., 2014</xref>) was conducted to identify the functional domains of protein-coding genes with the InterPro database. GO and KEGG pathway analyses were used to obtain the pathway information of the protein-coding genes by using Blast2GO 5.2.5 (<xref ref-type="bibr" rid="B8">Conesa et&#xa0;al., 2005</xref>). To predict noncoding RNA (ncRNA) in the genome of <italic>G. macrocephalus</italic>, we used tRNAscan-SE v1.3.1 (<xref ref-type="bibr" rid="B6">Chan and Lowe, 2019</xref>) to identify transfer RNAs (tRNAs), and Infernal v1.1.2 (<xref ref-type="bibr" rid="B26">Nawrocki and Eddy, 2013</xref>) to annotate other ncRNAs, including microRNAs (miRNAs) and small nuclear RNAs (snRNAs), in the Rfam and miRbase databases.</p>
</sec>
<sec id="s2_5">
<title>Comparative genomic analyses</title>
<p>For comparative genomic analyses, we downloaded the coding sequences and corresponding protein sequences of 12 species from the online databases (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;1</bold>
</xref>) and retained the longest transcript and encoded protein sequence of each gene for further analysis. First, we used OrthoFinder v2.5.4 (<xref ref-type="bibr" rid="B13">Emms and Kelly, 2019</xref>) to obtain the single-copy genes of all species with parameters: -f: data; -S: diamond; -M: msa; -T: fasttree;&#xa0;-t: 50. To investigate the phylogenetic relationships among these species, the single-copy genes of all species were further aligned using MUSCLE v3.8.1551 (<xref ref-type="bibr" rid="B12">Edgar, 2004</xref>) with the default parameters. Based on the alignment results, we used RaxML v8.2.12 (<xref ref-type="bibr" rid="B30">Stamatakis, 2014</xref>) software to construct a maximum likelihood phylogenetic tree, and the parameters were as follows: raxmlHPC-PTHREADS; -T: 90; -m: PROTGAMMAJTT; -f: a; -p: 123; -x: 123; -#: 100; -n: out; -s: all_FAD.muscle.fasta. Subsequently, four time-calibration points were selected from TimeTree (<xref ref-type="bibr" rid="B21">Kumar et&#xa0;al., 2017</xref>): (1) <italic>Lepisosteus oculatus</italic> (Spotted gar) and <italic>D. rerio</italic> (298.8-342.5 million years ago [Mya]), (2) <italic>D. rerio</italic> and <italic>L. lota</italic> (180.0-264.0 Mya), (3) <italic>L. lota</italic> and <italic>G. morhua</italic> (23.9-51.6 Mya), and (4) <italic>Cynoglossus semilaevis</italic> (Tongue sole) and <italic>Oryzias latipes</italic> (Medaka) (97.5-153.0 Mya). MCMCTree within the PAML v4.9j package (<xref ref-type="bibr" rid="B36">Yang, 2007</xref>) was used to estimate the divergence times among species with parameters: ndata = 1; seqtype = 2; usedata = 2; clock = 3; RootAge = &lt; 4; model = 0; alpha = 0; ncatG = 5; cleandata&#xa0;= 0; BDparas = 1 1 0; kappa_gamma = 6 2; alpha_gamma&#xa0;= 1 1; rgene_gamma = 2 33; sigma2_gamma = 1 10; finetune = 1: 0.1 0.1 0.1 0.01.5. Based on the results described above, CAFA5 (<xref ref-type="bibr" rid="B25">Mendes et&#xa0;al., 2020</xref>) was used to reveal gene families showing significant differences. Biological pathways for the expanded and contracted gene families (P-value &lt; 0.01) were investigated by using GO and KEGG pathway enrichment analysis. To investigate chromosomal collinearity, we used JCVI (<xref ref-type="bibr" rid="B33">Tang et&#xa0;al., 2008</xref>) to generate the genome synteny of <italic>G. macrocephalus</italic> and <italic>G. morhua</italic>.</p>
</sec>
<sec id="s2_6">
<title>Preliminary analysis report</title>
<p>For long-read sequencing, approximately 2,574,681 clean reads and a total of 44.50 Gb of clean data were generated by the PacBio platform (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;2</bold>
</xref>). A 674.25 Mb genome sequence was obtained by genome assembly and polishing with a 45.05% GC content. Accordingly, the number of contig was 4,343 with a contig N50 of 282.54 kb (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;3</bold>
</xref>). The read mapping rates of 95.33% and 99.79% and genome coverage rates of 99.57% and 99.94% determined for the Illumina and PacBio reads, respectively (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;4</bold>
</xref>), indicated the consistency and accuracy of the genome assembly. For Hi-C sequencing, approximately 447,087,580 clean reads and a total of 66.92 Gb of clean data were obtained from the Illumina platform (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;2</bold>
</xref>). After Hi-C data correction, 23 chromosomes were generated and 89.42% of the assembled sequences were anchored (<xref ref-type="fig" rid="f1">
<bold>Figures&#xa0;1A, B</bold>
</xref>). The genome size was 654.06 Mb, a contig and a scaffold N50 were approximately 291.52 kb and 25.26 Mb, respectively (<xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>
<bold>;</bold> <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;3</bold>
</xref>). A comparison showed that more than 90% of the BUSCO core genes were completely identified for 5 Gadidae species (<xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>). Compared with <italic>G. morhua</italic> and <italic>L. lota</italic>, the contig N50 of <italic>G. macrocephalus</italic>, <italic>G. chalcogrammus</italic> and <italic>Melanogrammus aeglefinus</italic> (Haddock) was smaller (<xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>). But, the scaffold N50 of <italic>G. macrocephalus</italic>, <italic>G. morhua</italic>, <italic>G. chalcogrammus</italic> and <italic>L. lota</italic> reached more than 20 Mb, except for <italic>M. aeglefinus</italic> (<xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>). On the whole, the assembly of <italic>G. macrocephalus</italic> genome reached the level of chromosome, indicating that the assembly quality of the genome was relatively high. A 315.00 Mb repeat sequence was identified through homology alignment and <italic>de novo</italic> prediction, accounting for 48.16% of the genome of <italic>G. macrocephalus</italic> (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;5</bold>
</xref>). The percentages of different types of repetitive sequences were as follows: 16.57% (108,408,781 bp) DNA TEs, 5.45% (35,649,299 bp) long interspersed nuclear elements (LINEs), 0.77% (5,034,419 bp) short interspersed nuclear elements (SINEs), 7.53% (49,245,913 bp) long terminal repeats (LTRs), 8.83% (57,742,721 bp) satellites and 0.07% (474,124 bp) simple repeats (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;6</bold>
</xref>). The distribution of different TEs sequence divergence were observed with <italic>De novo</italic> and RepeatMasker method (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figures&#xa0;1A, B</bold>
</xref>). After gene prediction, 23,843 protein-coding genes were identified in total, with an average of 9.27 exons per gene. The average length of each gene and its coding DNA sequence (CDS), exon and intron regions were 12,250 bp, 1,558 bp, 269.01 bp and 1,180 bp, respectively. The comparison of gene length, CDS length, exon length and intron length between <italic>G. macrocephalus</italic> and other teleost species were showed in <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure&#xa0;2</bold>
</xref>. Based on multiple databases, the function of 22,581 (94.71%) protein-coding genes were annotated (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;7</bold>
</xref>). For ncRNAs, a total of 4.44 Mb (0.68%) of ncRNA were predicted, including 58.21 kb of (0.01%) miRNA, 995.71 kb (0.15%) of tRNA, 1.35 Mb (0.21%) of rRNA, and 2.04 Mb (0.31%) of snRNA (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;8</bold>
</xref>). BUSCO analysis showed that 3,295 (90.5%) BUSCO core genes were completely detected, including 3,241 (89.0%) single-copy genes and 54 (1.5%) multiple-copy genes, and missing BUSCO core genes accounted for 8.6% (311) of the genome (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;9</bold>
</xref>). Among the 3,640 BUSCO groups searched, 3,195 (87.8%) of the complete BUSCOs were detected in the genome annotations (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;9</bold>
</xref>). After gene family clustering, a total of 15,965 gene families involving 21,463 genes were found in the genome of <italic>G. macrocephalus</italic>, including 115 unique gene families and 7,733 common gene families (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1C</bold>
</xref>; <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;10</bold>
</xref>). The phylogenetic tree showed that <italic>G. macrocephalus</italic> was closely related to <italic>G. morhua</italic> and <italic>G. chalcogrammus</italic>, and the estimated divergence time between <italic>G. macrocephalus</italic> and <italic>G. morhua</italic> was ~5.7 (4.0-8.3) million years (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1D</bold>
</xref>). Compared with the common ancestor, the <italic>G. macrocephalus</italic> genome revealed that the number of expanded and contracted gene families was 148 and 1,389, respectively (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1D</bold>
</xref>). However, the genome of <italic>G. morhua</italic> contained 647 expanded and 273 contracted gene families. After filtering the results of CAFA5 with the P-value of 0.01, 73 expanded and 723 contracted gene families were obtained for <italic>G. macrocephalus</italic>. GO and KEGG pathway enrichment analysis displayed that the expanded gene families of <italic>G. macrocephalus</italic> participated in multiple pathways, such as G protein-coupled receptor activity (GO:0004930, P-value = 4.36e-11), calcium ion binding (GO:0005509, P-value = 1.24e-15), mRNA surveillance pathway (ko03015, P-value = 1.94e-06), metabolism of xenobiotics by cytochrome P450 (ko00980, P-value = 9.17e-07) and drug metabolism-cytochrome P450 (ko00982, P-value = 1.13e-06) (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figures&#xa0;3, 4</bold>
</xref>). Conversely, some pathways, e.g., microtubule-based movement (GO:0007018, P-value = 5.44e-14), dynein complex (GO:0030286, P-value = 1.08e-19), proximal tubule bicarbonate reclamation (ko04964, P-value = 6.02e-04), GABAergic synapse (ko04727, P-value = 8.72e-05) and pathways of neurodegeneration - multiple diseases (ko05022, P-value = 1.05e-05) (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figures&#xa0;5, 6</bold>
</xref>), were enriched in the GO and KEGG pathway enrichment analysis of the contracted gene families of <italic>G. macrocephalus</italic>. In addition, the genome synteny between <italic>G. macrocephalus</italic> and <italic>G. morhua</italic> showed that the collinearity and recombination of genes (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1E</bold>
</xref>).</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Characteristics of genome and summary of comparative genomics analysis. <bold>(A)</bold> 23 chromosomes of <italic>G</italic>. <italic>macrocephalus</italic> were obtained from genome assembly. <bold>(B)</bold> Statistics of genome annotation of <italic>G</italic>. <italic>macrocephalus</italic>. From outside to inside, the results correspond to: (a) GC content; (b) gene density; (c) CDS content; (d) repetitive sequence content; (e) LTR content. <bold>(C)</bold> Ortholog clustering results among 13 species. <bold>(D)</bold> Phylogenetic analysis of 13 species and analysis of gene family expansion and contraction. The red dots on the branches indicated the calibration points. The divergence time of 13 species was showed on the branches with blue numbers (Mya). The green and red numbers indicated the expanded and contracted gene families, respectively. <bold>(E)</bold> Genome synteny between <italic>G</italic>. <italic>macrocephalus</italic> and <italic>G</italic>. <italic>morhua</italic>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-09-1067526-g001.tif"/>
</fig>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Comparison of assembly statistics for the <italic>G. macrocephalus</italic> and other Gadidae species genome.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Species</th>
<th valign="top" align="center">Genome size (Mb)</th>
<th valign="top" align="center">Contig N50 (kb)</th>
<th valign="top" align="center">Scaffold N50 (Mb)</th>
<th valign="top" align="center">Assembly level</th>
<th valign="top" align="center">Complete BUSCOs</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">
<bold>
<italic>G. macrocephalus</italic>
</bold>
</td>
<td valign="top" align="center">654.06</td>
<td valign="top" align="center">291.52</td>
<td valign="top" align="center">25.26</td>
<td valign="top" align="left">Chromosome</td>
<td valign="top" align="center">3,295 (90.5%)<sup>e</sup>
</td>
</tr>
<tr>
<td valign="top" align="left">
<bold>
<italic>G. morhua</italic>
<sup>a</sup>
</bold>
</td>
<td valign="top" align="center">669.97</td>
<td valign="top" align="center">1,015.66</td>
<td valign="top" align="center">28.73</td>
<td valign="top" align="left">Chromosome</td>
<td valign="top" align="center">3,523 (96.8%)<sup>e</sup>
</td>
</tr>
<tr>
<td valign="top" align="left">
<bold>
<italic>G. chalcogrammus</italic>
<sup>b</sup>
</bold>
</td>
<td valign="top" align="center">629.66</td>
<td valign="top" align="center">358.86</td>
<td valign="top" align="center">27.04</td>
<td valign="top" align="left">Chromosome</td>
<td valign="top" align="center">3,290 (90.4%)<sup>e</sup>
</td>
</tr>
<tr>
<td valign="top" align="left">
<bold>
<italic>M. aeglefinus</italic>
<sup>c</sup>
</bold>
</td>
<td valign="top" align="center">653.00</td>
<td valign="top" align="center">78.00</td>
<td valign="top" align="center">0.21</td>
<td valign="top" align="left">Scaffold</td>
<td valign="top" align="center">4,169 (90.9%)<sup>f</sup>
</td>
</tr>
<tr>
<td valign="top" align="left">
<bold>
<italic>L. lota</italic>
<sup>d</sup>
</bold>
</td>
<td valign="top" align="center">575.92</td>
<td valign="top" align="center">2,010.00</td>
<td valign="top" align="center">22.10</td>
<td valign="top" align="left">Chromosome</td>
<td valign="top" align="center">4,344 (94.76%)<sup>f</sup>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>
<sup>a</sup> from <uri xlink:href="https://www.ncbi.nlm.nih.gov/data-hub/genome/GCF_902167405.1/">https://www.ncbi.nlm.nih.gov/data-hub/genome/GCF_902167405.1/</uri>.</p>
</fn>
<fn>
<p>
<sup>b</sup> from <xref ref-type="bibr" rid="B27">Noh et&#xa0;al., 2022</xref>.</p>
</fn>
<fn>
<p>
<sup>c</sup> from <xref ref-type="bibr" rid="B32">T&#xf8;rresen et&#xa0;al., 2018</xref>.</p>
</fn>
<fn>
<p>
<sup>d</sup> from <xref ref-type="bibr" rid="B16">Han et&#xa0;al., 2021</xref>.</p>
</fn>
<fn>
<p>
<sup>e</sup> % of 3640 genes.</p>
</fn>
<fn>
<p>
<sup>f</sup> % of 4584 genes.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
</sec>
<sec id="s3" sec-type="data-availability">
<title>Data availability statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found below: <uri xlink:href="https://www.ncbi.nlm.nih.gov/">https://www.ncbi.nlm.nih.gov/</uri>, SRR21531029, SRR21529723, SRR21531329, SRR21528393; <uri xlink:href="https://bigd.big.ac.cn/gwh/">https://bigd.big.ac.cn/gwh/</uri>, GWHBMBB00000000. All annotation files (repeats, proteins, CDS, gff) and genome were uploaded to the Figshare database (<uri xlink:href="https://doi.org/10.6084/m9.figshare.21393969.v1">https://doi.org/10.6084/m9.figshare.21393969.v1</uri>).</p>
</sec>
<sec id="s4" sec-type="ethics-statement">
<title>Ethics statement</title>
<p>The animal study was reviewed and approved by Institutional Animal Care and Use Committee of First Institute of Oceanography, Ministry of Natural Resources. First Institute of Oceanography, Ministry of Natural Resources.</p>
</sec>
<sec id="s5" sec-type="author-contributions">
<title>Author contributions</title>
<p>LLZ and SL designed the study. YL and CJ performed the collection of <italic>G. macrocephalus</italic> and extracted the DNA and RNA samples. YM and LZ performed genome assembly and evaluation, annotations. YM and SL performed comparative genomics. YM drafted the manuscript. CJ, SL, and LLZ revised the manuscript. All authors read and approved the final manuscript.</p>
</sec>
<sec id="s6" sec-type="funding-information">
<title>Funding</title>
<p>This work was supported by the National Natural Science Foundation of China (41706187).</p>
</sec>
<sec id="s7" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s8" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
</body>
<back>
<sec id="s9" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fmars.2022.1067526/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fmars.2022.1067526/full#supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet_1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bairoch</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Apweiler</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>The SWISS-PROT protein sequence database and its supplement TrEMBL in 2000</article-title>. <source>Nucleic Acids Res.</source> <volume>28</volume>, <fpage>45</fpage>&#x2013;<lpage>48</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nar/28.1.45</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Benson</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>1999</year>). <article-title>Tandem repeats finder: A program to analyze DNA sequences</article-title>. <source>Nucleic Acids Res.</source> <volume>27</volume> (<issue>2</issue>), <fpage>573</fpage>&#x2013;<lpage>580</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nar/27.2.573</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Birney</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Clamp</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Durbin</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>GeneWise and genomewise</article-title>. <source>Genome Res.</source> <volume>14</volume>, <fpage>988</fpage>&#x2013;<lpage>995</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1101/gr.1865504</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Burge</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Karlin</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>1997</year>). <article-title>Prediction of complete gene structures in human genomic DNA</article-title>. <source>J. Mol. Biol.</source> <volume>268</volume>, <fpage>78</fpage>&#x2013;<lpage>94</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1006/jmbi.1997.0951</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Carson</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Mark</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>MAKER2: An annotation pipeline and GenomeDatabase management tool for second-generation genome projects</article-title>. <source>BMC Bioinform.</source> <volume>12</volume>, <elocation-id>491</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/1471-2105-12-491</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chan</surname> <given-names>P. P.</given-names>
</name>
<name>
<surname>Lowe</surname> <given-names>T. M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Trnascan-SE: Searching for tRNA genes in genomic sequences</article-title>. <source>Methods Mol. Biol.</source> <volume>1962</volume>, <fpage>1</fpage>&#x2013;<lpage>14</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-1-4939-9173-0_1</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheng</surname> <given-names>H. Y.</given-names>
</name>
<name>
<surname>Concepcion</surname> <given-names>G. T.</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>X. W.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>H. W.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Haplotyperesolved <italic>De novo a</italic>ssembly using phased assembly graphs with hifiasm</article-title>. <source>Nat.Methods</source> <volume>18</volume> (<issue>2</issue>), <fpage>170</fpage>&#x2013;<lpage>175</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41592-020-01056-5</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Conesa</surname> <given-names>A.</given-names>
</name>
<name>
<surname>G&#xf6;tz</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Garc&#x131;a-Gomez</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Terol</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Tal&#xf6;n</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Robles</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Blast2GO: A universal tool for annotation, visualization and analysis in functional genomics research</article-title>. <source>Bioinformatics</source> <volume>21</volume>, <fpage>3674</fpage>&#x2013;<lpage>3676</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/bti610</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dudchenko</surname> <given-names>O.</given-names>
</name>
<name>
<surname>Batra</surname> <given-names>S. S.</given-names>
</name>
<name>
<surname>Omer</surname> <given-names>A. D.</given-names>
</name>
<name>
<surname>Nyquist</surname> <given-names>S. K.</given-names>
</name>
<name>
<surname>Hoeger</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Durand</surname> <given-names>N. C.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>
<italic>De novo</italic> assembly of the aedes aegypti genome using Hi-c yields chromosome-length scaffolds</article-title>. <source>Science</source> <volume>356</volume> (<issue>6333</issue>), <fpage>92</fpage>&#x2013;<lpage>95</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1126/science.aal3327</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Durand</surname> <given-names>N. C.</given-names>
</name>
<name>
<surname>Robinson</surname> <given-names>J. T.</given-names>
</name>
<name>
<surname>Shamim</surname> <given-names>M. S.</given-names>
</name>
<name>
<surname>Machol</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Mesirov</surname> <given-names>J. P.</given-names>
</name>
<name>
<surname>Lander</surname> <given-names>E. S.</given-names>
</name>
<etal/>
</person-group>. (<year>2016</year>b). <article-title>Juicebox provides a visualization system for Hi-c contact maps with unlimited zoom</article-title>. <source>Cell Syst.</source> <volume>3</volume> (<issue>1</issue>), <fpage>99</fpage>&#x2013;<lpage>101</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.cels.2015.07.012</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Durand</surname> <given-names>N. C.</given-names>
</name>
<name>
<surname>Shamim</surname> <given-names>M. S.</given-names>
</name>
<name>
<surname>Machol</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Rao</surname> <given-names>S. S.</given-names>
</name>
<name>
<surname>Huntley</surname> <given-names>M. H.</given-names>
</name>
<name>
<surname>Lander</surname> <given-names>E. S.</given-names>
</name>
<etal/>
</person-group>. (<year>2016</year>a). <article-title>Juicer provides a one-click system for analyzing LoopResolution Hi-c experiments</article-title>. <source>Cell Syst.</source> <volume>3</volume>, <fpage>95</fpage>&#x2013;<lpage>98</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.cels.2016.07.002</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Edgar</surname> <given-names>R. C.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>MUSCLE: Multiple sequence alignment with high accuracy and high throughput</article-title>. <source>Nucleic Acids Res.</source> <volume>32</volume>, <fpage>1792</fpage>&#x2013;<lpage>1797</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/molbev/msw054</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Emms</surname> <given-names>D. M.</given-names>
</name>
<name>
<surname>Kelly</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>OrthoFinder: Phylogenetic orthology inference for comparative genomics</article-title>. <source>Genome Biol.</source> <volume>20</volume>, <fpage>238</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s13059-019-1832-y</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="web">
<person-group person-group-type="author">
<collab>FAO</collab>
</person-group> (<year>2020</year>). <source>FAO fish stat J database</source>. Available at: <uri xlink:href="http://www.fao.org/fishery/statistics/software/fishstatj/en">http://www.fao.org/fishery/statistics/software/fishstatj/en</uri>.</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Grabherr</surname> <given-names>M. G.</given-names>
</name>
<name>
<surname>Haas</surname> <given-names>B. J.</given-names>
</name>
<name>
<surname>Yassour</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Levin</surname> <given-names>J. Z.</given-names>
</name>
<name>
<surname>Thompson</surname> <given-names>D. A.</given-names>
</name>
<name>
<surname>Amit</surname> <given-names>I.</given-names>
</name>
<etal/>
</person-group>. (<year>2011</year>). <article-title>Full-length transcriptome assembly from RNA-seq data without a reference genome</article-title>. <source>Nat. Biotechnol.</source> <volume>29</volume>, <fpage>644</fpage>&#x2013;<lpage>652</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nbt.1883</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Han</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Zhai</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Xiao</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Chromosome-level genome assembly of burbot (<italic>Lota lota</italic>) provides insights into the evolutionary adaptations in freshwater</article-title>. <source>Mol. Ecol. Resour.</source> <volume>21</volume> (<issue>6</issue>), <fpage>2022</fpage>&#x2013;<lpage>2033</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/1755-0998.13382</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hart</surname> <given-names>J. L.</given-names>
</name>
<name>
<surname>Clemens</surname> <given-names>W. A.</given-names>
</name>
</person-group> (<year>1973</year>). <article-title>Pacific fishes of Canada</article-title>. <source>Bull. Fish. Res. Board Can.</source> <volume>47</volume>, <fpage>180</fpage>&#x2013;<lpage>730</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.2307/1443050</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Fan</surname> <given-names>J. P.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>Z. Y.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>S. L.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>NextPolish: A fast and efficient genome polishing tool for long-read assembly</article-title>. <source>Bioinformatics</source> <volume>36</volume> (<issue>7</issue>), <fpage>2253</fpage>&#x2013;<lpage>2255</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btz891</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jones</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Binns</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Chang</surname> <given-names>H. Y.</given-names>
</name>
<name>
<surname>Fraser</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>W.</given-names>
</name>
<name>
<surname>McAnulla</surname> <given-names>C.</given-names>
</name>
<etal/>
</person-group>. (<year>2014</year>). <article-title>InterProScan 5: Genome-scale protein function classification</article-title>. <source>Bioinformatics</source> <volume>30</volume> (<issue>9</issue>), <fpage>1236</fpage>&#x2013;<lpage>1240</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btu031</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kim</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Paggi</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Park</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Bennett</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Salzberg</surname> <given-names>S. L.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype</article-title>. <source>Nat. Biotechnol.</source> <volume>37</volume>, <fpage>907</fpage>&#x2013;<lpage>915</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41587-019-0201-4</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kumar</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Stecher</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Suleski</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Hedges</surname> <given-names>S. B.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>TimeTree: A resource for timelines, timetrees, and divergence times</article-title>. <source>Mol. Bio Evol.</source> <volume>34</volume>, <fpage>1812</fpage>&#x2013;<lpage>1819</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/molbev/msx116</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Liang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Meng</surname> <given-names>G.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Comparison of the two up-to-date sequencing technologies for genome assembly: HiFi reads of pacific biosciences sequel II system and ultralong reads of Oxford nanopore</article-title>. <source>Gigascience</source> <volume>9</volume> (<issue>12</issue>), <elocation-id>giaa123</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/gigascience/giaa123</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Majoros</surname> <given-names>W. H.</given-names>
</name>
<name>
<surname>Pertea</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Salzberg</surname> <given-names>S. L.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>TigrScan and GlimmerHMM: Two open source ab initio eukaryotic gene-finders</article-title>. <source>Bioinformatics</source> <volume>20</volume>, <fpage>2878</fpage>&#x2013;<lpage>2879</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/bth315</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Manni</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Berkeley</surname> <given-names>M. R.</given-names>
</name>
<name>
<surname>Seppey</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Sim&#xe3;o</surname> <given-names>F. A.</given-names>
</name>
<name>
<surname>Zdobnov</surname> <given-names>E. M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>BUSCO update: Novel and streamlined workflflows along with broader and deeper phylogenetic coverage for scoring of eukaryotic, prokaryotic, and viral genomes</article-title>. <source>Mol. Biol. Evol.</source> <volume>38</volume> (<issue>10</issue>), <fpage>4647</fpage>&#x2013;<lpage>4654</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/molbev/msab199</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mendes</surname> <given-names>F. K.</given-names>
</name>
<name>
<surname>Vanderpool</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Fulton</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Hahn</surname> <given-names>M. W.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>CAFE 5 models variation in evolutionary rates among gene families</article-title>. <source>Bioinformatics</source> <volume>36</volume> (<issue>22</issue>), <fpage>5516</fpage>&#x2013;<lpage>5518</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btaa1022</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nawrocki</surname> <given-names>E. P.</given-names>
</name>
<name>
<surname>Eddy</surname> <given-names>S. R.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Infernal 1.1: 100-fold faster RNA homology searches</article-title>. <source>Bioinformatics</source> <volume>29</volume> (<issue>22</issue>), <fpage>2933</fpage>&#x2013;<lpage>2935</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btt509</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Noh</surname> <given-names>E. S.</given-names>
</name>
<name>
<surname>Kang</surname> <given-names>B. C.</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Jeon</surname> <given-names>J. H.</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>Y. O.</given-names>
</name>
<name>
<surname>Byun</surname> <given-names>S. G.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Draft assembled genome of walleye Pollock (<italic>Gadus chalcogrammus</italic>)</article-title>. <source>Front. Mar. Sci.</source> <volume>9</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fmars.2022.744941</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sakurai</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>An overview of the oyashio ecosystem</article-title>. <source>Deep Sea Res. Part II Top. Stud. Oceanogr.</source> <volume>54</volume> (<issue>23</issue>), <fpage>2526</fpage>&#x2013;<lpage>2542</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.dsr2.2007.02.007</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sakurai</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Hattori</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>1996</year>). <article-title>Reproductive behavior of pacific cod in captivity</article-title>. <source>Fish. Sci.</source> <volume>62</volume>, <fpage>222</fpage>&#x2013;<lpage>228</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.2331/fishsci.62.222</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stamatakis</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>RAxML version 8: A tool for phylogenetic analysis and post-analysis of Large phylogenies</article-title>. <source>Bioinformatics</source> <volume>30</volume>, <fpage>1312</fpage>&#x2013;<lpage>1313</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btu033</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stanke</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Sch&#xf6;ffmann</surname> <given-names>O.</given-names>
</name>
<name>
<surname>Morgenstern</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Waack</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Gene prediction in eukaryotes with a generalized hidden Markov model that uses hints from external sources</article-title>. <source>BMC Bioinf.</source> <volume>7</volume>, <elocation-id>62</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/1471-2105-7-62</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>T&#xf8;rresen</surname> <given-names>O. K.</given-names>
</name>
<name>
<surname>Brieuc</surname> <given-names>M. S. O.</given-names>
</name>
<name>
<surname>Solbakken</surname> <given-names>M. H.</given-names>
</name>
<name>
<surname>S&#xf8;rhus</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Nederbragt</surname> <given-names>A. J.</given-names>
</name>
<name>
<surname>Jakobsen</surname> <given-names>K. S.</given-names>
</name>
<etal/>
</person-group>. (<year>2018</year>). <article-title>Genomic architecture of haddock (<italic>Melanogrammus aeglefinus</italic>) shows expansions of innate immune genes and short tandem repeats</article-title>. <source>BMC Genomics</source> <volume>19</volume> (<issue>1</issue>), <fpage>240</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12864-018-4616-y</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Bowers</surname> <given-names>J. E.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Ming</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Alam</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Paterson</surname> <given-names>A. H.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Synteny and collinearity in plant genomes</article-title>. <source>Science</source> <volume>320</volume> (<issue>5875</issue>), <fpage>486</fpage>&#x2013;<lpage>488</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1126/science.1153917</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J. C.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Pan</surname> <given-names>X. D.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Fine-scale variability in otolith chemistry: Application to the life history analysis of pacific cod (<italic>Gadus macrocephalus</italic>) in the yellow Sea</article-title>. <source>Estuar. Coast. Shelf S.</source> <volume>257</volume>, <elocation-id>107392</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ecss.2021.107392</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>LTR_FINDER: An effificient tool for the prediction of full-length LTR retrotransposons</article-title>. <source>Nucleic Acids Res.</source> <volume>35</volume>, <fpage>265</fpage>&#x2013;<lpage>268</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nar/gkm286</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>PAML 4: Phylogenetic analysis by maximum likelihood</article-title>. <source>Mol.Biol. Evol.</source> <volume>24</volume> (<issue>8</issue>), <fpage>1586</fpage>&#x2013;<lpage>1591</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/molbev/msm088</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>