<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2024.1352253</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Evidence-based unification of potato gene models with the UniTato collaborative genome browser</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Zagor&#x161;&#x10d;ak</surname>
<given-names>Maja</given-names>
</name>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2753712"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Zrimec</surname>
<given-names>Jan</given-names>
</name>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/459946"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Bleker</surname>
<given-names>Carissa</given-names>
</name>
<uri xlink:href="https://loop.frontiersin.org/people/653451"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Nolte</surname>
<given-names>Nadja</given-names>
</name>
<uri xlink:href="https://loop.frontiersin.org/people/2706802"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Juter&#x161;ek</surname>
<given-names>Mojca</given-names>
</name>
<uri xlink:href="https://loop.frontiersin.org/people/1664277"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ram&#x161;ak</surname>
<given-names>&#x17d;iva</given-names>
</name>
<uri xlink:href="https://loop.frontiersin.org/people/509067"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Gruden</surname>
<given-names>Kristina</given-names>
</name>
<uri xlink:href="https://loop.frontiersin.org/people/50276"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Petek</surname>
<given-names>Marko</given-names>
</name>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/499611"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<institution>Department of Biotechnology and Systems Biology, National Institute of Biology</institution>, <addr-line>Ljubljana</addr-line>, <country>Slovenia</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Hiroshi Ezura, University of Tsukuba, Japan</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Ya-Ping Lin, World Vegetable Center, Taiwan</p>
<p>Weihua Pan, Chinese Academy of Agricultural Sciences, China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Marko Petek, <email xlink:href="mailto:marko.petek@nib.si">marko.petek@nib.si</email>
</p>
</fn>
<fn fn-type="equal" id="fn003">
<p>&#x2020;These authors have contributed equally to this work and share first authorship</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>11</day>
<month>06</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>15</volume>
<elocation-id>1352253</elocation-id>
<history>
<date date-type="received">
<day>07</day>
<month>12</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>20</day>
<month>05</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Zagor&#x161;&#x10d;ak, Zrimec, Bleker, Nolte, Juter&#x161;ek, Ram&#x161;ak, Gruden and Petek</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Zagor&#x161;&#x10d;ak, Zrimec, Bleker, Nolte, Juter&#x161;ek, Ram&#x161;ak, Gruden and Petek</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Potato (<italic>Solanum tuberosum</italic>) is the most popular tuber crop and a model organism. A variety of gene models for potato exist, and despite frequent updates, they are not unified. This hinders the comparison of gene models across versions, limits the ability to reuse experimental data without significant re-analysis, and leads to missing or wrongly annotated genes. Here, we unify the recent potato double monoploid v4 and v6 gene models by developing an automated merging protocol, resulting in a Unified poTato genome model (UniTato). We subsequently established an Apollo genome browser (<underline>unitato.nib.si</underline>) that enables public access to UniTato and further community-based curation. We demonstrate how the UniTato resource can help resolve problems with missing or misplaced genes and can be used to update or consolidate a wider set of gene models or genome information. The automated protocol, genome annotation files, and a comprehensive translation table are provided at <ext-link ext-link-type="uri" xlink:href="https://github.com/NIB-SI/unitato">github.com/NIB-SI/unitato</ext-link>.</p>
</abstract>
<kwd-group>
<kwd>
<italic>Solanum tuberosum</italic>
</kwd>
<kwd>bioinformatics analysis</kwd>
<kwd>plant genome annotation</kwd>
<kwd>Solanaceae</kwd>
<kwd>gene model annotations</kwd>
<kwd>Phureja group</kwd>
<kwd>GFF files</kwd>
</kwd-group>
<contract-num rid="cn001">862858</contract-num>
<contract-num rid="cn002">101072892</contract-num>
<contract-num rid="cn003">P4-0165, P4-0431, J2-3060, Z4-50146</contract-num>
<contract-sponsor id="cn001">Horizon 2020 Framework Programme<named-content content-type="fundref-id">10.13039/100010661</named-content>
</contract-sponsor>
<contract-sponsor id="cn002">HORIZON EUROPE Marie Sklodowska-Curie Actions<named-content content-type="fundref-id">10.13039/100018694</named-content>
</contract-sponsor>
<contract-sponsor id="cn003">Javna Agencija za Raziskovalno Dejavnost RS<named-content content-type="fundref-id">10.13039/501100004329</named-content>
</contract-sponsor>
<counts>
<fig-count count="4"/>
<table-count count="3"/>
<equation-count count="0"/>
<ref-count count="50"/>
<page-count count="10"/>
<word-count count="5394"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Plant Systems and Synthetic Biology</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>
<italic>Solanum tuberosum</italic> (potato) is among the most important food crops and a model tuber species. The crop is highly useful as an organism for studying plant responses to environmental stress factors (<xref ref-type="bibr" rid="B24">Lukan et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B2">Bleker et&#xa0;al., 2024</xref>), such as herbivory (<xref ref-type="bibr" rid="B27">Petek et&#xa0;al., 2020a</xref>), viral diseases (<xref ref-type="bibr" rid="B1">Baebler et&#xa0;al., 2020</xref>), transcriptional (<xref ref-type="bibr" rid="B40">Toma&#x17e; et&#xa0;al., 2023</xref>) and small RNA regulation (<xref ref-type="bibr" rid="B19">Kri&#x17e;nik et&#xa0;al., 2020</xref>), single and combined abiotic stress responses (<xref ref-type="bibr" rid="B10">Demirel et&#xa0;al., 2020</xref>), and growth-defense trade-offs (<xref ref-type="bibr" rid="B17">Huot et&#xa0;al., 2014</xref>). Potato also serves as an excellent platform for transferring and testing vast amounts of knowledge garnered in <italic>Arabidopsis</italic> with an agriculturally relevant crop (<xref ref-type="bibr" rid="B33">Ram&#x161;ak et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B50">Zagor&#x161;&#x10d;ak et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B35">Schwacke et&#xa0;al., 2019</xref>), aiding toward solving present-day food security issues (<xref ref-type="bibr" rid="B7">Cole et&#xa0;al., 2018</xref>).</p>
<p>Apart from novel wild potato (<xref ref-type="bibr" rid="B37">Tang et&#xa0;al., 2022</xref>) and pan-genome assemblies (<xref ref-type="bibr" rid="B14">Hoopes et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B4">Bozan et&#xa0;al., 2023</xref>), the double monoploid (DM) clone of group Phureja DM1&#x2013;3 516 R44 was until recently the standard variety on which gene models were defined. DM genome assemblies and gene models have been introduced by multiple consortia, including the Potato Genome Sequencing Consortium (PGSC), International Tomato Genome Consortium (ITAG), and Buell Lab (University of Georgia). These sequenced and assembled (<xref ref-type="bibr" rid="B47">Yandell and Ence, 2012</xref>) up to 88% of the potato genome that included between 35,004 (ITAG) (<xref ref-type="bibr" rid="B39">Tomato Genome Consortium, 2012</xref>) and 39,428 (PGSCv4.04) (<xref ref-type="bibr" rid="B31">Potato Genome Sequencing Consortium et&#xa0;al., 2011</xref>) gene models, with the recent nanopore long read assembly DMv6.1 (<xref ref-type="bibr" rid="B30">Pham et&#xa0;al., 2020</xref>) annotating 40,652 genes (in the working version). Moreover, we previously unified the PGSC v4 and ITAG gene models into a merged DMv4n version containing 49,322 genes (<xref ref-type="bibr" rid="B29">Petek et&#xa0;al., 2020b</xref>).</p>
<p>In contrast to <italic>Arabidopsis</italic> gene models, where version control and gene model tracking have been utilized for over a decade and gene annotations are optimized (<xref ref-type="bibr" rid="B34">Rhee et&#xa0;al., 2003</xref>; <xref ref-type="bibr" rid="B20">Lamesch et&#xa0;al., 2012</xref>), this is not the case with potato and similar less matured crop assemblies. Here, sequencing assemblies and gene models are only slowly advancing, and while each subsequent version improves sequencing depth, coverage, and assembly statistics, the gene models are reformulated. Frequently, previous gene model versions are not accounted for, and mapping and translation tables are not provided (e.g., <xref ref-type="bibr" rid="B30">Pham et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B14">Hoopes et&#xa0;al., 2022</xref>). This unfortunately limits potato research in multiple ways: i) hindering comparison of gene models across versions and experiments, ii) limiting the reuse and integration of experimental data based on older model versions (e.g., v4) with the latest version (v6) without extensive reprocessing of the RNA-Seq data, and iii) impeding the use of certain popular comparative genomics resources, such as Plaza and Ensembl plants (<xref ref-type="bibr" rid="B41">Valentin et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B42">Van Bel et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B49">Yates et&#xa0;al., 2022</xref>), which, as of writing this paper, have not been updated to the latest gene model versions (v6). Plaza is a platform for comparative, evolutionary, and functional plant genomics, which even in its latest version (5.0) uses v4 potato gene models (<xref ref-type="bibr" rid="B42">Van Bel et&#xa0;al., 2022</xref>). On the other hand, Ensembl plants (<xref ref-type="bibr" rid="B49">Yates et&#xa0;al., 2022</xref>), a plant genome analysis platform, is based on even older PGSC v3 potato gene models (<xref ref-type="bibr" rid="B43">Visser et&#xa0;al., 2009</xref>), yet is a source for other derived ontology (<xref ref-type="bibr" rid="B35">Schwacke et&#xa0;al., 2019</xref>) and transcription factor databases (<xref ref-type="bibr" rid="B38">Tian et&#xa0;al., 2020</xref>).</p>
<p>In addition to the previously mentioned issues, inadequate consideration of previous gene model information has resulted in the omission of a number of known genes (<xref ref-type="bibr" rid="B43">Visser et&#xa0;al., 2009</xref>; <xref ref-type="bibr" rid="B31">Potato Genome Sequencing Consortium et&#xa0;al., 2011</xref>; <xref ref-type="bibr" rid="B39">Tomato Genome Consortium, 2012</xref>; <xref ref-type="bibr" rid="B29">Petek et&#xa0;al., 2020b</xref>). In the case of the recent v6 gene models, we observed that they do not include certain well-known genes with important molecular functions, as they do not account for previous gene model information (<xref ref-type="bibr" rid="B30">Pham et&#xa0;al., 2020</xref>). An example is the transcription factor TGA2, an essential regulator of hormonal signaling (<xref ref-type="bibr" rid="B40">Toma&#x17e; et&#xa0;al., 2023</xref>). Aside from such missing genes, some are also moved, merged, or split (as presented in the sections below). These deviations can lead to differences in interpretation in downstream analyses (e.g., gene family expansion, differential expression, marker selection, gene set enrichment analysis) (<xref ref-type="bibr" rid="B47">Yandell and Ence, 2012</xref>). In addition to these imperfect annotations negatively affecting future experiments, existing published results using previous gene models, including, e.g., AlphaFold structure predictions (<xref ref-type="bibr" rid="B40">Toma&#x17e; et&#xa0;al., 2023</xref>), have become outdated, making it essential to update and consolidate gene predictions.</p>
<p>To help resolve these issues, here, we expand the ITAG and PGSC v4 annotations with v6 annotations (<xref ref-type="bibr" rid="B30">Pham et&#xa0;al., 2020</xref>), unifying the different gene models. In addition, we include experiment-based evidence from our pan-transcriptome (<xref ref-type="bibr" rid="B29">Petek et&#xa0;al., 2020b</xref>), short- and long-read sequencing data (<xref ref-type="bibr" rid="B23">Lukan et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B14">Hoopes et&#xa0;al., 2022</xref>), and Solanaceae proteomes (<xref ref-type="bibr" rid="B16">Hosmani et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B44">Wang et&#xa0;al., 2024</xref>), thereby creating an improved and more accurate potato gene annotation model for downstream analyses. To ensure the transparency and accuracy of future gene models, we present the Unified poTato genome annotation resource (UniTato). UniTato is provided through an Apollo web interface (<xref ref-type="bibr" rid="B12">Dunn et&#xa0;al., 2019</xref>), enabling a community-driven effort for real-time revision and enhancement of gene models by experts. This will increase the interpretational power of experimental datasets and facilitate the reuse of experimental analyses conducted on v4, thus expediting progress in potato research.</p>
</sec>
<sec id="s2" sec-type="results">
<label>2</label>
<title>Results</title>
<sec id="s2_1">
<label>2.1</label>
<title>A unified v4 and v6 potato genome annotation</title>
<p>To compare the potato gene model versions, we mapped gene annotations of older PGSCv4.04 (<xref ref-type="bibr" rid="B31">Potato Genome Sequencing Consortium et&#xa0;al., 2011</xref>) and ITAG assemblies (<xref ref-type="bibr" rid="B39">Tomato Genome Consortium, 2012</xref>) to the recent potato DMv6.1 assembly (<xref ref-type="bibr" rid="B30">Pham et&#xa0;al., 2020</xref>) using Liftoff (<xref ref-type="bibr" rid="B36">Shumate and Salzberg, 2020</xref>) and used Bedtools <italic>intersect</italic> (<xref ref-type="bibr" rid="B32">Quinlan and Hall, 2010</xref>) to find intersecting genes (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>, see Methods 2.2). Briefly, Liftoff is a tool that accurately maps annotations between assemblies of the same or closely related species. We used it to transfer the gene model annotations from v4 to the v6 assembly. Two genome assemblies (either ITAG or PGSC v4 and DMv6.1) and a v4 annotation file (ITAG or PGSC v4, respectively) were provided as input. The v4 gene models were aligned chromosome by chromosome to the v6 genome assembly. Bedtools <italic>intersect</italic> (<xref ref-type="bibr" rid="B32">Quinlan and Hall, 2010</xref>) was then used to check for overlap (intersection) between the sets of v4 and v6 gene models.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Schematic overview of the procedure used to create a unified DM v4 and v6 potato genome annotation resource.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1352253-g001.tif"/>
</fig>
<p>We first explored the Liftoff <italic>flank</italic> parameter, which controls the amount of flanking sequence upstream and downstream of a gene, by using a setting of either none or 500 nt. In order to include the gene neighborhood, the upstream and downstream expansion of each v4 gene sequence (combined PGSC/ITAG v4 dataset) before mapping can improve mapping precision. This is especially important for the ITAG annotation which contains only CDS regions, as opposed to the PGSC annotation, where complete mRNA sequences are provided. Without a flanking sequence (0 nt), we mapped 72,143 v4 gene models, whereas when using a flanking sequence of 500 nt length, we mapped 73,820 v4 gene models. Using either of the <italic>flank</italic> parameter settings, 316 PGSC and 211 ITAG gene models could not be mapped to the v6 genome assembly (<xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>; <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table S1</bold>
</xref>).</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Overview of total gene counts and Liftoff results at different flank parameter values for v4 and v6 gene models.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left"/>
<th valign="middle" align="center">Total gene count</th>
<th valign="middle" align="center">No <italic>flank</italic>, unmapped</th>
<th valign="middle" align="center">
<italic>Flank</italic> 500 nt, unmapped</th>
<th valign="middle" align="center">No <italic>flank</italic>, mapped</th>
<th valign="middle" align="center">
<italic>Flank</italic> 500 nt, mapped</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">
<bold>PGSC (v4)</bold>
</td>
<td valign="middle" align="center">39,428 (100.00%)</td>
<td valign="middle" align="center">492 (1.25%)</td>
<td valign="middle" align="center">363 (0.92%)</td>
<td valign="middle" align="center">38,936 (98.75%)</td>
<td valign="middle" align="center">39,065 (99.08%)</td>
</tr>
<tr>
<td valign="middle" align="center">
<bold>ITAG (v4)</bold>
</td>
<td valign="middle" align="center">35,004 (100.00%)</td>
<td valign="middle" align="center">1,797 (5.13%)</td>
<td valign="middle" align="center">249 (0.71%)</td>
<td valign="middle" align="center">33,207 (94.87%)</td>
<td valign="middle" align="center">34,755 (99.29%)</td>
</tr>
<tr>
<td valign="middle" align="center">
<bold>DMV6.1 working</bold>
</td>
<td valign="middle" align="center">40,652 (32,917 hc)</td>
<td valign="middle" align="center">/</td>
<td valign="middle" align="center">/</td>
<td valign="middle" align="center">/</td>
<td valign="middle" align="center">/</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Three hundred sixteen PGSC and 211 ITAG gene models could not be mapped to the v6 genome assembly (unmapped) with either flank parameter value.</p>
</fn>
<fn>
<p>hc, gene models defined as &#x201c;high confidence&#x201d; in v6 (<xref ref-type="bibr" rid="B30">Pham et&#xa0;al., 2020</xref>).</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Next, to identify the overlap between the sets of v4 and v6 gene models, we explored the Bedtools <italic>F</italic> parameter, which allows for control over the minimum overlap required as a fraction of the length of v4 gene models. By ranging <italic>F</italic> from 0.0001 to 1, we found that 0.30 was the optimum value (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure S1</bold>
</xref>; <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>). With the Liftoff <italic>flank</italic> parameter of 500 nt, we achieved a mapping coverage (<italic>F</italic> &gt;= 0.3, high identity) with 56,776 v4 gene models mapping to 31,594 v6 models [of these, 92% belong to v6 high confidence gene models as defined by <xref ref-type="bibr" rid="B30">Pham et&#xa0;al. (2020)</xref>, <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table S1</bold>
</xref>]. Since <italic>flank</italic> can also capture v4 assembly gaps (N runs) or misassemblies that were corrected in the v6 assembly, using it may not always be the optimal choice. For example, we found that no flanking sequence (0 nt) achieved a better mapping coverage <italic>F</italic> with 387 v4 gene models mapping to 458 v6 models. We thus decided to keep the Liftoff result with the better mapping coverage per gene (either 0 or 500 nt <italic>flank</italic>), as reported above. For gene models with a Bedtools coverage <italic>F</italic> above or equal to 0.30, we kept the v6 gene models and added 17,272 v4 models with low coverage (<italic>F</italic> &lt; 0.30). This merge resulted in the final genome annotation model, termed UniTato (<xref ref-type="fig" rid="f2">
<bold>Figures&#xa0;2A, B</bold>
</xref>). Note that the v6 genome assembly has many inversions compared to the v4 assembly, most evidently in chromosome 12 (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2C</bold>
</xref>).</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Coverage of v4 to v6 gene models by the number of models and % of all v6 models, at different Bedtools <italic>intersect</italic> sequence coverage (<italic>F</italic>) parameter values.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left"/>
<th valign="middle" align="center">Version</th>
<th valign="middle" align="center">
<italic>F</italic> = 1</th>
<th valign="middle" align="center">
<italic>F</italic> &gt;= 0.30</th>
<th valign="middle" align="center">
<italic>F</italic> &gt;= 0.0001</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="2" align="center">
<bold>PGSC/ITAG no <italic>flank</italic>, working version</bold>
</td>
<td valign="middle" align="center">v4</td>
<td valign="middle" align="center">40,252 (54.08%)</td>
<td valign="middle" align="center">56,512 (75.92%)</td>
<td valign="middle" align="center">57,663 (77.47%)</td>
</tr>
<tr>
<td valign="middle" align="center">v6</td>
<td valign="middle" align="center">27,103 (66.67%)</td>
<td valign="middle" align="center">31,481 (77.44%)</td>
<td valign="middle" align="center">32,263 (79.36%)</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">
<bold>PGSC/ITAG no <italic>flank</italic>, high confidence</bold>
</td>
<td valign="middle" align="center">v4</td>
<td valign="middle" align="center">38,095 (51.18%)</td>
<td valign="middle" align="center">53,360 (71.69%)</td>
<td valign="middle" align="center">54,199 (72.81%)</td>
</tr>
<tr>
<td valign="middle" align="center">v6</td>
<td valign="middle" align="center">25,331 (76.95%)</td>
<td valign="middle" align="center">28,986 (88.06%)</td>
<td valign="middle" align="center">29,470 (89.53%)</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">
<bold>PGSC/ITAG <italic>flank</italic> 500 nt, working version</bold>
</td>
<td valign="middle" align="center">v4</td>
<td valign="middle" align="center">40,586 (54.53%)</td>
<td valign="middle" align="center">57,040 (76.63%)</td>
<td valign="middle" align="center">58,209 (78.20%)</td>
</tr>
<tr>
<td valign="middle" align="center">v6</td>
<td valign="middle" align="center">27,261 (67.06%)</td>
<td valign="middle" align="center">31,669 (77.90%)</td>
<td valign="middle" align="center">32,452 (79.83%)</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">
<bold>PGSC/ITAG <italic>flank</italic> 500 nt, high confidence</bold>
</td>
<td valign="middle" align="center">v4</td>
<td valign="middle" align="center">38,373 (51.55%)</td>
<td valign="middle" align="center">53,793 (72.27%)</td>
<td valign="middle" align="center">54,637 (73.40%)</td>
</tr>
<tr>
<td valign="middle" align="center">v6</td>
<td valign="middle" align="center">25,443 (77.29%)</td>
<td valign="middle" align="center">29,113 (88.44%)</td>
<td valign="middle" align="center">29,586 (89.88%)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Genes that mapped with the same F value with and without flank are counted twice.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Mapping v4 to v6 gene models. <bold>(A)</bold> Venn diagram of overlaps between the v4 and v6 gene models obtained using Liftoff and Bedtools <italic>intersect</italic> (<italic>F</italic>&#xa0;&gt;&#xa0;0.30). In the intersected areas, note that the number of v6 IDs is shown. <bold>(B)</bold> Chord diagram of the synteny between v4 and v6 gene models. The&#xa0;diagram shows that most chromosomes are almost completely syntenic across models; however, some scaffolds remain unanchored. <bold>(C)</bold>&#xa0;Rearrangements of chromosome 12 in v6 genome assembly vs. the v4 genome assembly. The lines represent synteny between gene model coding regions. Other chromosomes&#x2019; pairwise synteny graphs can be found on the Unitato GitHub page (<uri xlink:href="https://github.com/NIB-SI/unitato">https://github.com/NIB-SI/unitato</uri>).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1352253-g002.tif"/>
</fig>
<p>Of the observed 17,272 v4 gene models with low coverage (<italic>F</italic> &lt; 0.30), 11,832 were from the PGSC dataset and 5,440 from ITAG. These sequences are present in the v6 assembly but were not identified as genes (<xref ref-type="bibr" rid="B30">Pham et&#xa0;al., 2020</xref>). We decided to retain all such &#x201c;rescued&#x201d; genes and assigned them with the identifier from v4. Of these, 16,117 mapped to the intergenic regions in v6 (<italic>F</italic> &lt; 0.0001). On the other hand, 8,888 v6 working version gene models were not supported by v4 annotations (of these, 5,979 with v6 annotation &#x201c;hypothetical protein&#x201d;), of which 3,742 were high-confidence v6 gene models. Finally, we further analyzed the genome-mapped and unmapped v4 genes, searching for evidence of their expression within our published pan-transcriptome dataset (<xref ref-type="bibr" rid="B29">Petek et&#xa0;al., 2020b</xref>). The v4 gene models that do not match any v6 gene models (<italic>F</italic> &lt; 0.0001) but do match tetraploid transcriptomes (3,596 out of 15,590 gene models) were considered to be valid genes. On the other hand, some of the 11,924 gene models that match neither the v6 models nor the pan-transcriptome are likely unreliable gene model predictions. Note that 292 out of 559 v4 gene models did not map to the v6 genome yet match tetraploid D&#xe9;sir&#xe9;e, Rywal, or PW363 transcripts. These genes were lost with the reassembly of the DM scaffold in v6 (<xref ref-type="bibr" rid="B30">Pham et&#xa0;al., 2020</xref>). Moreover, we obtained additional evidence about the reliability of the transcriptome-unsupported rescued genes, by mapping to the genome RNA-Seq reads of DM Phureja and tetraploid cultivars (<xref ref-type="bibr" rid="B23">Lukan et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B29">Petek et&#xa0;al., 2020b</xref>; <xref ref-type="bibr" rid="B14">Hoopes et&#xa0;al., 2022</xref>) as well as protein sequences of <italic>Arabidopsis</italic> (<xref ref-type="bibr" rid="B6">Cheng et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B25">Pasha et&#xa0;al., 2020</xref>) and three <italic>Solanaceae</italic> species (<xref ref-type="bibr" rid="B16">Hosmani et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B44">Wang et&#xa0;al., 2024</xref>) (see Results ch. 2.2). Finally, the newly generated GFF3 file and a table linking identifiers of ITAG and PGSC v4 gene models with v6 gene models are available at GitHub (<ext-link ext-link-type="uri" xlink:href="https://github.com/NIB-SI/unitato">https://github.com/NIB-SI/unitato</ext-link>).</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>UniTato database access and user interface</title>
<p>The UniTato database (accessible at <ext-link ext-link-type="uri" xlink:href="http://unitato.nib.si/">http://unitato.nib.si/</ext-link>) is hosted in a deployment of the community-focused genome annotation editor Apollo (<xref ref-type="bibr" rid="B12">Dunn et&#xa0;al., 2019</xref>) (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>). Based on the popular JBrowse genome viewer (<xref ref-type="bibr" rid="B5">Buels et&#xa0;al., 2016</xref>), Apollo allows visitors to browse, compare, and interpret the available evidence-based gene models. The annotator panel in the Apollo interface provides several tabs, allowing easy navigation through the genome and the ability to view or hide tracks as well as to locate and view annotation details. For further information, we refer the reader to the Apollo documentation (<ext-link ext-link-type="uri" xlink:href="https://genomearchitect.readthedocs.io/">https://genomearchitect.readthedocs.io/</ext-link>).</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Overview of the UniTato user interface and TGA2 use case. Screenshot of the Apollo server web interface for the <italic>Solanum tuberosum</italic> DM gene model manual annotation, showing the manual annotation of a TGA2 transcription factor gene model which was split into two gene models in v6 (track &#x201c;DMv6 working models&#x201d;). The gene model&#x2019;s manual annotation with nine exons (track &#x201c;User-created Annotations&#x201d;) was based on the correctly predicted ITAG v4 CDS and the Rywal Iso-Seq transcript mapping.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1352253-g003.tif"/>
</fig>
<p>The Apollo interface currently contains a number of tracks (see <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>, <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table S2</bold>
</xref>), which include various gene models (v4, v6, unified v4 and v6) as well as different subsets of high-confidence matching and rescued genes. To aid in interpreting and evaluating the gene models, a number of evidence tracks are also available, including long read and short paired-end Illumina mappings from DM Phureja and tetraploid cultivars (<xref ref-type="bibr" rid="B23">Lukan et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B29">Petek et&#xa0;al., 2020b</xref>; <xref ref-type="bibr" rid="B14">Hoopes et&#xa0;al., 2022</xref>), reference proteomes of Arabidopsis (<italic>Arabidopsis thaliana</italic>) (<xref ref-type="bibr" rid="B6">Cheng et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B25">Pasha et&#xa0;al., 2020</xref>), tomato (<italic>Solanum lycopersicum</italic>) (<xref ref-type="bibr" rid="B16">Hosmani et&#xa0;al., 2019</xref>), tobacco (<italic>Nicotiana tabacum</italic>) and <italic>Nicotiana benthamiana</italic> (<xref ref-type="bibr" rid="B44">Wang et&#xa0;al., 2024</xref>), and reference transcriptomes of potato cultivars D&#xe9;sir&#xe9;e, PW363, and Rywal (<xref ref-type="bibr" rid="B29">Petek et&#xa0;al., 2020b</xref>). These tracks are publicly viewable by all UniTato web page visitors. On the other hand, potential contributors are encouraged to use the contact details on the web page to request edit access through a user account. Upon login, these users have access to the curator tools, providing the ability to collaboratively add, remove, and modify potato gene models. The improvements can then be exported as an updated version of the genome annotation file (GFF, VCF, or FASTA).</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Overview of the evidence tracks available in the UniTato v1.0 web server.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left"/>
<th valign="middle" align="left">Track name</th>
<th valign="middle" align="left">Track description</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" rowspan="4" align="left">
<bold>Gene models and gene model subsets</bold>
</td>
<td valign="top" align="left">UniTato-v4v6</td>
<td valign="top" align="left">DM Phureja potato unified (merged) v4 and v6 gene models (GFF)</td>
</tr>
<tr>
<td valign="top" align="left">DMv4 unmatching v6</td>
<td valign="top" align="left">DM Phureja potato v4 gene models not matching v6 gene models (GFF), added to UniTato GFF</td>
</tr>
<tr>
<td valign="top" align="left">DMv4 low-confidence matches to v6</td>
<td valign="top" align="left">DM Phureja potato v4 gene models matching v6 gene models with low confidence (GFF), added to UniTato GFF</td>
</tr>
<tr>
<td valign="top" align="left">DMv4 high-confidence matches to v6</td>
<td valign="top" align="left">DM Phureja potato v4 gene models matching v6 gene models with high confidence (GFF), included in the translation table</td>
</tr>
<tr>
<td valign="top" align="left">
<bold>Pan-transcriptome</bold>
</td>
<td valign="top" align="left">Pan-transcriptome Desiree/PW363/Rywal</td>
<td valign="top" align="left">Representative <italic>de-novo</italic> assembled transcripts of potato cv. Desiree, cv. Rywal, and breeding clone PW363 (BAM)</td>
</tr>
<tr>
<td valign="top" rowspan="6" align="left">
<bold>Long-read transcriptomes</bold>
</td>
<td valign="top" align="left">Rywal Iso-Seq transcripts</td>
<td valign="top" align="left">cv. Rywal potato Iso-Seq reads (BAM)</td>
</tr>
<tr>
<td valign="top" align="left">Altus Iso-Seq transcripts</td>
<td valign="top" align="left">cv. Altus potato Iso-Seq reads (BAM)</td>
</tr>
<tr>
<td valign="top" align="left">Avenger Iso-Seq transcripts</td>
<td valign="top" align="left">cv. Avenger potato Iso-Seq reads (BAM)</td>
</tr>
<tr>
<td valign="top" align="left">Colomba Iso-Seq transcripts</td>
<td valign="top" align="left">cv. Colomba potato Iso-Seq reads (BAM)</td>
</tr>
<tr>
<td valign="top" align="left">Spunta Iso-Seq transcripts</td>
<td valign="top" align="left">cv. Spunta potato Iso-Seq reads (BAM)</td>
</tr>
<tr>
<td valign="top" align="left">PRJNA612026 ONT transcripts</td>
<td valign="top" align="left">Potato ONT reads from SRA project PRJNA612026 (BAM)</td>
</tr>
<tr>
<td valign="top" rowspan="7" align="left">
<bold>Short paired-end read transcriptomes</bold>
</td>
<td valign="top" align="left">Phureja tuber Illumina PE</td>
<td valign="top" align="left">
<italic>Solanum tuberosum</italic> L. Phureja Illumina NovaSeq 6000 reads (bw)</td>
</tr>
<tr>
<td valign="top" align="left">Potato seed-tubers Illumina PE</td>
<td valign="top" align="left">
<italic>Solanum tuberosum</italic> tuber-seeds from northern Antioquia/Cundinamarca/Boyaca (bw)</td>
</tr>
<tr>
<td valign="top" align="left">Phureja DM1&#x2013;3 516 R44 Illumina PE</td>
<td valign="top" align="left">
<italic>Solanum tuberosum</italic> strain: DM1&#x2013;3 516 R44 genome sequencing and assembly (bw)</td>
</tr>
<tr>
<td valign="top" align="left">Phureja pistil Illumina PE</td>
<td valign="top" align="left">C065 pistil transcriptome sequencing (bw)</td>
</tr>
<tr>
<td valign="top" align="left">Phureja Illumina PE</td>
<td valign="top" align="left">
<italic>Solanum phureja</italic> lines contrasting by resistance to nematode (bw)</td>
</tr>
<tr>
<td valign="top" align="left">Potato landraces young leaves Illumina PE</td>
<td valign="top" align="left">Transcriptomes of <italic>in-vitro</italic> young leaves in 11 potato landraces (bw)</td>
</tr>
<tr>
<td valign="top" align="left">Phureja seed-tuber sprouts Illumina PE</td>
<td valign="top" align="left">RNA-Seq of certified and informal potato seed tubers in the province of Antioquia (bw)</td>
</tr>
<tr>
<td valign="top" rowspan="4" align="left">
<bold>Reference proteomes</bold>
</td>
<td valign="top" align="left">Arabidopsis proteome Araport11</td>
<td valign="top" align="left">
<italic>Arabidopsis thaliana</italic> proteome (v. Araport11) aligned to UniTato genome using miniprot</td>
</tr>
<tr>
<td valign="top" align="left">Tomato proteome ITAG4.1</td>
<td valign="top" align="left">
<italic>Solanum lycopersicum</italic> proteome (v. ITAG 4.1) aligned to UniTato genome using miniprot</td>
</tr>
<tr>
<td valign="top" align="left">Tobacco proteome NtaSR1</td>
<td valign="top" align="left">
<italic>Nicotiana tabacum</italic> proteome (v. SR1) aligned to UniTato genome using miniprot</td>
</tr>
<tr>
<td valign="top" align="left">Benthi proteome NbeHZ1</td>
<td valign="top" align="left">
<italic>Nicotiana benthamiana</italic> proteome (v. HZ1) aligned to UniTato genome using miniprot</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>UniTato improves the coverage and accuracy of gene models</title>
<p>Merging of v4 and v6 genome annotations improves the coverage and accuracy of the computationally predicted gene models (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table S3</bold>
</xref>), whereas manual annotation by experts will provide the necessary quality control. The improved coverage is most evident by adding the rescued v4 genes showing experimental evidence for expression. These include important genes, such as a gene encoding a cysteine protease inhibitor (PGSC0003DMG400010139/Sotub03g015980) and the salicylic acid-binding protein 2 (PGSC0003DMG400028777/Sotub06g025780; for details see Phureja_v4-v6.1_translations.xlsx on GitHub). Apart from the missing genes, several v6 genome models have been wrongly predicted. One such case is the TGA2 transcription factor gene encoded by two v6 gene models and correctly annotated as a single gene model by ITAG v4 (<xref ref-type="bibr" rid="B40">Toma&#x17e; et&#xa0;al., 2023</xref>). The Iso-Seq read mapping suggests that the gene&#x2019;s 5&#x2032;-untranslated region extends into another exon (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>). Such mis-annotations can be easily manually curated in the UniTato Apollo instance. Here, tracks of mapped transcripts can additionally help curators build more accurate gene models (see <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Tables S3&#x2013;S5</bold>
</xref>).</p>
<p>We further decided to identify genomic loci where the v4 and v6 gene models were predicted very differently and/or overlap in a &#x201c;many-to-many&#x201d; fashion. Thus, without additional evidence, for these loci, it is very challenging to decide which gene models are more probable. A full list of such complex cases of gene models is available in &#x201c;overlaps.xlsx&#x201d; on the UniTato GitHub repository (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure S2</bold>
</xref>). We showcase here two such genomic loci. The first is the v6 model Soltu.DM.02G032590 on chromosome 2 encoding a transferase gene (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4A</bold>
</xref>). The mapped Iso-Seq reads and the tomato ortholog sequence architecture with 18 exons fit better the v4 model PGSCG0003DMT400001369 than the v6 gene model. The second is the v6 gene model Soltu.DM.04G024440, a chimeric model of the laccase gene and the adjacent ribosomal protein S15A gene (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4B</bold>
</xref>). Based on the Iso-Seq data and the presence of only five exons in the tomato laccase ortholog, the v4 Sotub04g025130 gene model is more accurate.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Examples of overlapping v4 and v6 gene models that require RNA-Seq read mapping and ortholog evidence for manual curation. From top to bottom: graphical representation of v4 and v6 gene model overlaps from &#x201c;04_intervals_many-to-many.html&#x201d; file on the UniTato GitHub repository, UniTato Apollo representation of these gene models with RNA-Seq Illumina PE and Iso-Seq tracks, and representation of tomato synthetic genomic region from the SolGenomics genome browser showing tomato ITAG4.1 annotation and Iso-Seq tracks. The numbers in brackets below the gene models show the exon count. Dotted lines follow the curated gene models through the three representations. <bold>(A)</bold> Manual curation of a v6 transferase gene model Soltu.DM.02G032590 for which the v4 model PGSCG0003DMT400001369 better fits the transcriptome data and the tomato ortholog evidence. <bold>(B)</bold> Manual curation of a chimeric v6 gene model Soltu.DM.04G024440 for which the v4 Sotub04g025130 model better fits the transcriptome and ortholog evidence.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1352253-g004.tif"/>
</fig>
</sec>
</sec>
<sec id="s3" sec-type="discussion">
<label>3</label>
<title>Discussion</title>
<p>The advancement and maturation of high-throughput and long-read sequencing has led to several different potato genome assemblies, gene annotations, and transcriptomic datasets. Sequencing the group Phureja DM (<xref ref-type="bibr" rid="B31">Potato Genome Sequencing Consortium et&#xa0;al., 2011</xref>) still enables functional studies of polyploid potato cultivars using RNA-Seq technologies, although with the limitation of not covering cultivar-specific gene expression (<xref ref-type="bibr" rid="B29">Petek et&#xa0;al., 2020b</xref>; <xref ref-type="bibr" rid="B14">Hoopes et&#xa0;al., 2022</xref>). For practical reasons, most potato researchers use only one genome annotation, either PGSC (<xref ref-type="bibr" rid="B31">Potato Genome Sequencing Consortium et&#xa0;al., 2011</xref>) or ITAG (<xref ref-type="bibr" rid="B39">Tomato Genome Consortium, 2012</xref>), especially when conducting high-throughput analyses. However, using an incomplete gene set can lead to false outcomes regarding gene presence or gene family diversity, severely affecting downstream results (<xref ref-type="bibr" rid="B47">Yandell and Ence, 2012</xref>; <xref ref-type="bibr" rid="B29">Petek et&#xa0;al., 2020b</xref>). It is well known that incorrect or incomplete annotations corrupt all subsequent experiments that rely on them, making it essential to have the ability to share accurate and up-to-date annotations (<xref ref-type="bibr" rid="B47">Yandell and Ence, 2012</xref>; <xref ref-type="bibr" rid="B3">Bolger et&#xa0;al., 2018</xref>).</p>
<p>Our motivation here was thus two-fold: first, to transfer both gene model sets from the older PGSC assembly (<xref ref-type="bibr" rid="B31">Potato Genome Sequencing Consortium et&#xa0;al., 2011</xref>; <xref ref-type="bibr" rid="B39">Tomato Genome Consortium, 2012</xref>) to the new DMv6.1 assembly (<xref ref-type="bibr" rid="B30">Pham et&#xa0;al., 2020</xref>) and, at the same time, to merge the gene models (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>), allowing for data interoperability of previous experimental results (e.g., from RNA-Seq) (<xref ref-type="bibr" rid="B29">Petek et&#xa0;al., 2020b</xref>) with the unified gene model set, UniTato. Annotation merging was performed using an in-house-developed bioinformatics pipeline that utilizes open-source software and complementing it with evidence from published tetraploid transcriptomes (<xref ref-type="bibr" rid="B29">Petek et&#xa0;al., 2020b</xref>) (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>). The resulting annotation files were incorporated into an Apollo web server (<xref ref-type="bibr" rid="B12">Dunn et&#xa0;al., 2019</xref>), which enables the potato community to curate and refine potato gene models collaboratively and in real time, facilitating the establishment of a single standardized potato genome annotation (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>). Moreover, by comparing v4 and v6 annotations with UniTato, we observed multiple complex cases of gene models that cannot be straightforwardly resolved and will need to be manually curated (see &#x201c;overlaps.xlsx&#x201d; on the UniTato GitHub for a list of gene identifiers for these complicated cases). We thus show how UniTato can be used to identify gene models that are either missing or were moved, merged, or split (see <xref ref-type="fig" rid="f3">
<bold>Figures&#xa0;3</bold>
</xref>, <xref ref-type="fig" rid="f3">
<bold>4</bold>
</xref>; <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure S2</bold>
</xref>).</p>
<p>This showcases the usefulness of the established resource for resolving genome assembly and annotation issues. Bioinformatics users can thus i) compare gene models visually across versions and tracks, pinpointing and resolving errors and ensuring that the most accurate gene models are constructed and applied; ii) compare experimental results obtained on v4 to those obtained on the new v6 assembly or higher, such as for instance with RNA-Seq, where results with old identifiers can be incorporated with new results using v6 identifiers (via the translation table), without requiring repeated read mapping and computations; iii) curate potato gene models in problematic regions, such as determining gene structures in tandemly repeated gene regions, which cause problems with most annotation pipelines (multiple long-read and short-read tracks available in UniTato, see <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>); and iv) use current data with popular genome analysis resources that still rely on older annotations (<xref ref-type="bibr" rid="B42">Van Bel et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B49">Yates et&#xa0;al., 2022</xref>), facilitating, e.g., translation of gene descriptions and ontologies via orthology from model plants. Furthermore, with the provided v4&#x2013;v6 mapping and available evidence tracks (<xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>), UniTato also aids wet lab research. This includes i) guiding experiment design and interpretation, enabling users to check for off-target effects across different gene models; ii) defining and cloning functional orthologs based on experimental results and not merely partial sequence similarity, by revealing if orthologs from another plant map to the v6 assembly (see RNA-Seq and proteome tracks, <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>); and iii) primer design, since the unified gene models are an improvement over the initial v4 and v6 models, enhancing gene coverage and accuracy (e.g., the user can visually determine if the amplicon is covering variations in the RNA-Seq tracks, <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>).</p>
<p>In conclusion, we believe that building upon existing gene models to improve and unify them in a community-wise manner is a reasonable and transparent way to improve potato gene model annotations. The repeated creation of new genome model versions, without interlinking, is not contributing to the FAIR data paradigm (<xref ref-type="bibr" rid="B46">Wilkinson et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B28">Petek et&#xa0;al., 2022</xref>) and thus hinders agricultural research, including precision agriculture and food safety (<xref ref-type="bibr" rid="B7">Cole et&#xa0;al., 2018</xref>). The requirements of periodic annotation curation and incorporating experimental data and novel findings into the annotation process are inherent also to other plant species (<xref ref-type="bibr" rid="B47">Yandell and Ence, 2012</xref>; <xref ref-type="bibr" rid="B18">Kersey, 2019</xref>). Even in model plants, up to 40% of protein-coding genes can still be of unknown function, suggesting that much work is still required to fully resolve, annotate, and understand most plant genomes (<xref ref-type="bibr" rid="B15">Horan et&#xa0;al., 2008</xref>; <xref ref-type="bibr" rid="B45">Wang et&#xa0;al., 2023</xref>). We propose that a similar approach for evidence- and community-based revision as the one presented here can be utilized for any other insufficiently annotated species, for which genome models of closely related species are available. Apart from updating our database with new assemblies as they become available (<xref ref-type="bibr" rid="B48">Yang et&#xa0;al., 2023</xref>), future developments include the addition of novel experimental omics datasets and expansion to related genomes.</p>
</sec>
<sec id="s4">
<label>4</label>
<title>Methods</title>
<sec id="s4_1">
<label>4.1</label>
<title>Data sources</title>
<p>To develop UniTato, we used the publicly available potato group Phureja DM gene models: DMv6.1 (<xref ref-type="bibr" rid="B30">Pham et&#xa0;al., 2020</xref>), ITAG (<xref ref-type="bibr" rid="B39">Tomato Genome Consortium, 2012</xref>), and PGSCv4.04 (<xref ref-type="bibr" rid="B31">Potato Genome Sequencing Consortium et&#xa0;al., 2011</xref>), as well as reference transcriptomes of D&#xe9;sir&#xe9;e, Rywal, and PW363 tetraploid genotypes, and an ITAG/PGSC translation table (<xref ref-type="bibr" rid="B29">Petek et&#xa0;al., 2020b</xref>). The latter consolidated the two publicly available PGSC and ITAG gene models into a single unified one.</p>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Data processing</title>
<p>To map gene annotations across potato genome assemblies (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>), GFF files were sorted using the <italic>sort</italic> function from Bedtools v2.25.0 (<xref ref-type="bibr" rid="B32">Quinlan and Hall, 2010</xref>). Liftoff v.1.6.3 (<xref ref-type="bibr" rid="B36">Shumate and Salzberg, 2020</xref>) uses Minimap2 (<xref ref-type="bibr" rid="B21">Li, 2018</xref>) to map annotations between assemblies of the same or closely related species. We modified it to accept the number of nucleotides for the <italic>flank</italic> parameter (<ext-link ext-link-type="uri" xlink:href="https://github.com/NIB-SI/Liftoff">https://github.com/NIB-SI/Liftoff</ext-link>), instead of the ratio of sequence size, and used with the following parameters: i) coverage of 0.90%, ii) sequence identity of 90%, iii) flanking sequence length <italic>flank</italic> of either 0 or 500 nucleotides, and iv) Minimap2 v.2.24-r1122 &#x201c;asm5&#x201d; option for long assembly to reference mapping. In addition, Minimap2 was used with the same set of parameters as for Liftoff (&#x2013;end-bonus 5 &#x2013;eqx -N 50 -p 0.8 -ax asm5) to map the reference CDSome and transcriptome (<xref ref-type="bibr" rid="B29">Petek et&#xa0;al., 2020b</xref>) of three potato genotypes: D&#xe9;sir&#xe9;e, PW363, and Rywal. FASTQ files of long-read transcriptomics datasets were downloaded from SRA. The Iso-Seq reads were mapped to the v6 genome assembly using Minimap2 with parameters &#x201c;-ax splice:hq -G 10000 -uf&#x201d;.</p>
<p>Next, to compare the mapped annotation across the assemblies, as well as overlaps within the DMv6.1, the <italic>intersect</italic> function from Bedtools (<xref ref-type="bibr" rid="B32">Quinlan and Hall, 2010</xref>) was used with the following minimum overlap as a fraction (<italic>F</italic>) ranging incrementally from 0.0001 to 1. Pairs from our existing merged v4 genome model (<xref ref-type="bibr" rid="B29">Petek et&#xa0;al., 2020b</xref>) were used to determine the optimal <italic>F</italic> threshold value of 0.30 (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure S1</bold>
</xref>). All reported v6 values below refer to working model versions unless stated otherwise. To obtain additional evidence about the reliability of the transcriptome-unsupported rescued genes, we mapped i) short paired-end Illumina RNA-Seq reads of DM Phureja (<xref ref-type="bibr" rid="B30">Pham et&#xa0;al., 2020</xref>) using STAR (<xref ref-type="bibr" rid="B11">Dobin et&#xa0;al., 2013</xref>) and of cv. Rywal (<xref ref-type="bibr" rid="B23">Lukan et&#xa0;al., 2020</xref>) using Salmon (<xref ref-type="bibr" rid="B26">Patro et&#xa0;al., 2017</xref>); ii) long reads of tetraploid cultivars (<xref ref-type="bibr" rid="B9">Della Bartola et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B23">Lukan et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B14">Hoopes et&#xa0;al., 2022</xref>) using Minimap2 (<xref ref-type="bibr" rid="B21">Li, 2018</xref>); and iii) protein sequences of the model plant <italic>Arabidopsis</italic> (<xref ref-type="bibr" rid="B6">Cheng et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B25">Pasha et&#xa0;al., 2020</xref>) and three Solanaceae species (<xref ref-type="bibr" rid="B16">Hosmani et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B44">Wang et&#xa0;al., 2024</xref>) using miniprot v.0.13-r24 (-G 100 -O 10 -J 34 -F 30 -j1 -M0 &#x2013;gff-only -ut64) (<xref ref-type="bibr" rid="B22">Li, 2023</xref>).</p>
<p>For visualization, packages circlize v0.4.15 (<xref ref-type="bibr" rid="B13">Gu et&#xa0;al., 2014</xref>) and intervals v0.15.4 (<ext-link ext-link-type="uri" xlink:href="https://github.com/edzer/intervals">github.com/edzer/intervals</ext-link>) were used with default settings. For topological sorting of unified GFF features, AGAT v0.6.2 (<xref ref-type="bibr" rid="B8">Dainat et&#xa0;al., 2023</xref>) was used with default settings.</p>
</sec>
<sec id="s4_3">
<label>4.3</label>
<title>Database implementation</title>
<p>A web server hosting the Apollo genomic annotation editor (<xref ref-type="bibr" rid="B12">Dunn et&#xa0;al., 2019</xref>) for real-time collaborative analysis and curation was deployed at <ext-link ext-link-type="uri" xlink:href="https://unitato.nib.si">https://unitato.nib.si</ext-link>. The reference DM genome assembly (DMv6.1) was uploaded as the base organism. Several evidence tracks corresponding to the different gene models are available for exploration and curation. The database instance is running Apollo 2.7.0, deployed with docker, with default parameters. Data upload was carried out using JBrowse utility scripts (<xref ref-type="bibr" rid="B5">Buels et&#xa0;al., 2016</xref>).</p>
</sec>
<sec id="s4_4">
<label>4.4</label>
<title>Software and code</title>
<p>The programming environments R v.4.3 (<ext-link ext-link-type="uri" xlink:href="https://www.r-project.org/">https://www.r-project.org/</ext-link>) and Python v3.8 (<ext-link ext-link-type="uri" xlink:href="https://www.python.org/">https://www.python.org/</ext-link>) were used. Code to reproduce the analysis and results including scripts used for constructing the mapping table between v4 and v6 gene IDs, as well as merging v4 and v6 models are available at the GitHub repository (<ext-link ext-link-type="uri" xlink:href="https://github.com/NIB-SI/unitato">https://github.com/NIB-SI/unitato</ext-link>).</p>
</sec>
</sec>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>Data and code to reproduce the analysis are available at the GitHub repository <ext-link ext-link-type="uri" xlink:href="https://github.com/NIB-SI/unitato/">https://github.com/NIB-SI/unitato/</ext-link>. The GFF and GTF files and the identifier translation table are also available from <uri xlink:href="https://unitato.nib.si/downloads/">https://unitato.nib.si/downloads/</uri>. Publicly available RNA-Seq datasets were used in the study. The data can be found under the following SRA accession numbers: SRR8281993-SRR8282008 (Rywal IsoSeq reads; SRA study SRP172523), SRR14298411-SRR14298459 (Altus, Avenger, Colomba and Spunta IsoSeq reads; SRA study SRP315827), SRR11431596-SRR11431617 (PRJNA612026 ONT reads; SRA study SRP254248), SRR10690850, SRR10690852, SRR10690854, SRR10690856, SRR10690857, SRR10690858 (Rywal Illumina reads; SRA study SRP237525; GEO accession GSE142002), SRR122108, SRR122109, SRR122113, SRR122122, SRR122124, SRR122129, SRR122139 (Phureja Illumina reads; SRA study SRP005965), SRR7047512 (Phureja Illumina reads; SRA study SRP141363), SRR8457030-SRR8457059 (Phureja Illumina reads; SRA study SRP180310), SRR14627804-SRR14627805 (Phureja Illumina reads; SRA study SRP321011), SRR17202512-SRR17202515 (Phureja Illumina reads; SRA study SRP350333), SRR17244262-SRR17244298 (Phureja Illumina reads; SRA study SRP350981), and SRR10153126 (Phureja Illumina reads; SRA study SRP222783).</p>
</sec>
<sec id="s6" sec-type="author-contributions">
<title>Author contributions</title>
<p>MZ: Conceptualization, Data curation, Formal analysis, Investigation, Methodology, Software, Validation, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. JZ: Formal analysis, Funding acquisition, Investigation, Methodology, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. CB: Formal analysis, Funding acquisition, Investigation, Methodology, Software, Visualization, Writing &#x2013; review &amp; editing. NN: Formal analysis, Investigation, Validation, Writing &#x2013; review &amp; editing. MJ: Formal analysis, Investigation, Validation, Writing - review and editing. &#x17d;R: Investigation, Writing &#x2013; review &amp; editing. KG: Conceptualization, Investigation, Project administration, Supervision, Writing &#x2013; review &amp; editing, Funding acquisition, Resources. MP: Conceptualization, Data curation, Formal analysis, Funding acquisition, Investigation, Methodology, Project administration, Resources, Supervision, Validation, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing.</p>
</sec>
</body>
<back>
<sec id="s7" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. The study was supported by the EU Horizon 2020 Research and Innovation Programme under grant agreement no. 862858 (ADAPT); the Marie Sk&#x142;odowska-Curie Actions (MSCA) Doctoral Network &#x201c;LongTREC&#x201d; under grant agreement no. 101072892; the Public Scholarship, Development, Disability and Maintenance Fund of the Republic of Slovenia grant no. 11013&#x2013;9/2021&#x2013;2; and the Slovenian Research and Innovation Agency under grant agreements no. P4&#x2013;0165, P4&#x2013;0431, J2&#x2013;3060, and Z4&#x2013;50146.</p>
</sec>
<sec id="s8" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s9" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fpls.2024.1352253/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fpls.2024.1352253/full#supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet_1.pdf" id="SM1" mimetype="application/pdf"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Baebler</surname> <given-names>&#x160;</given-names>
</name>
<name>
<surname>Coll</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Gruden</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Plant molecular responses to potato virus Y: A continuum of outcomes from sensitivity and tolerance to resistance</article-title>. <source>Viruses</source> <volume>12</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/v12020217</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bleker</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Ram&#x161;ak</surname> <given-names>&#x17d;</given-names>
</name>
<name>
<surname>Bittner</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Podpe&#x10d;an</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Zagor&#x161;&#x10d;ak</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Wurzinger</surname> <given-names>B.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Stress Knowledge Map: A knowledge graph resource for systems biology analysis of plant stress responses</article-title>. <source>Plant communications</source>, <page-range>100920</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.xplc.2024.100920</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bolger</surname> <given-names>M. E.</given-names>
</name>
<name>
<surname>Arsova</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Usadel</surname> <given-names>B.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Plant genome and transcriptome annotations: from misconceptions to simple solutions</article-title>. <source>Briefings Bioinf.</source> <volume>19</volume>, <fpage>437</fpage>&#x2013;<lpage>495</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bib/bbw135</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bozan</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Achakkagari</surname> <given-names>S. R.</given-names>
</name>
<name>
<surname>Anglin</surname> <given-names>N. L.</given-names>
</name>
<name>
<surname>Ellis</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Tai</surname> <given-names>H. H.</given-names>
</name>
<name>
<surname>Str&#xf6;mvik</surname> <given-names>M. V.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Pangenome analyses reveal impact of transposable elements and ploidy on the evolution of potato species</article-title>. <source>Proc. Natl. Acad. Sci. United States America</source> <volume>120</volume>, <elocation-id>e22111171205</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1073/pnas.2211117120</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Buels</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Yao</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Diesh</surname> <given-names>C. M.</given-names>
</name>
<name>
<surname>Hayes</surname> <given-names>R. D.</given-names>
</name>
<name>
<surname>Munoz-Torres</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Helt</surname> <given-names>G.</given-names>
</name>
<etal/>
</person-group>. (<year>2016</year>). <article-title>JBrowse: A dynamic web platform for genome visualization and analysis</article-title>. <source>Genome Biol.</source> <volume>17</volume>, <fpage>66</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s13059-016-0924-1</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheng</surname> <given-names>C.-Y.</given-names>
</name>
<name>
<surname>Krishnakumar</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Chan</surname> <given-names>A. P.</given-names>
</name>
<name>
<surname>Thibaud-Nissen</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Schobel</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Town</surname> <given-names>C. D.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Araport11: A complete reannotation of the arabidopsis thaliana reference genome</article-title>. <source>Plant Journal: For Cell Mol. Biol.</source> <volume>89</volume>, <fpage>789</fpage>&#x2013;<lpage>8045</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/tpj.13415</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cole</surname> <given-names>M. B.</given-names>
</name>
<name>
<surname>Augustin</surname> <given-names>M. A.</given-names>
</name>
<name>
<surname>Robertson</surname> <given-names>M. J.</given-names>
</name>
<name>
<surname>Manners</surname> <given-names>J. M.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>The science of food security</article-title>. <source>NPJ Sci. Food</source> <volume>2</volume>, <fpage>14</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41538-018-0021-9</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dainat</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Here&#xf1;&#xfa;</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Murray</surname> <given-names>D. K. D.</given-names>
</name>
<name>
<surname>Davis</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Crouch</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Lucile</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>NBISweden/AGAT: AGAT: Another Gff Analysis Toolkit to handle annotations in any GTF/GFF format</article-title> (Version v1.2.0). doi:&#xa0;<pub-id pub-id-type="doi">10.5281/zenodo.8178877</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Della Bartola</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Byrne</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Mullins</surname> <given-names>E.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Characterization of potato virus Y isolates and assessment of nanopore sequencing to detect and genotype potato viruses</article-title>. <source>Viruses</source> <volume>12</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/v12040478</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Demirel</surname> <given-names>U.</given-names>
</name>
<name>
<surname>Morris</surname> <given-names>W. L.</given-names>
</name>
<name>
<surname>Ducreux</surname> <given-names>L. J.M.</given-names>
</name>
<name>
<surname>Yavuz</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Asim</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Tindas</surname> <given-names>I.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Physiological, biochemical, and transcriptional responses to single and combined abiotic stress in stress-tolerant and stress-sensitive potato genotypes</article-title>. <source>Front. Plant Sci.</source> <volume>11</volume>, <elocation-id>169</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2020.00169</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dobin</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Davis</surname> <given-names>C. A.</given-names>
</name>
<name>
<surname>Schlesinger</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Drenkow</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zaleski</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Jha</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2013</year>). <article-title>STAR: ultrafast universal RNA-seq aligner</article-title>. <source>Bioinformatics</source> <volume>29</volume>, <fpage>15</fpage>&#x2013;<lpage>215</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/bts635</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dunn</surname> <given-names>N. A.</given-names>
</name>
<name>
<surname>Unni</surname> <given-names>D. R.</given-names>
</name>
<name>
<surname>Diesh</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Munoz-Torres</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Harris</surname> <given-names>N. L.</given-names>
</name>
<name>
<surname>Yao</surname> <given-names>E.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Apollo: democratizing genome annotation</article-title>. <source>PloS Comput. Biol.</source> <volume>15</volume>, <elocation-id>e10067905</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pcbi.1006790</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Gu</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Eils</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Schlesner</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Brors</surname> <given-names>B.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Circlize implements and enhances circular visualization in R</article-title>. <source>Bioinformatics</source> <volume>30</volume>, <fpage>2811</fpage>&#x2013;<lpage>2125</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btu393</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hoopes</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Meng</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Hamilton</surname> <given-names>J. P.</given-names>
</name>
<name>
<surname>Achakkagari</surname> <given-names>S. R.</given-names>
</name>
<name>
<surname>Guesdes</surname> <given-names>F. d. A. F.</given-names>
</name>
<name>
<surname>Bolger</surname> <given-names>M. E.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Phased, chromosome-scale genome assemblies of tetraploid potato reveal a complex genome, transcriptome, and predicted proteome landscape underpinning genetic diversity</article-title>. <source>Mol. Plant</source> <volume>15</volume>, <fpage>520</fpage>&#x2013;<lpage>536</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.molp.2022.01.003</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Horan</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Jang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Bailey-Serres</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Mittler</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Shelton</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Harper</surname> <given-names>J. F.</given-names>
</name>
<etal/>
</person-group>. (<year>2008</year>). <article-title>Annotating genes of known and unknown function by large-scale coexpression analysis</article-title>. <source>Plant Physiol.</source> <volume>147</volume>, <fpage>41</fpage>&#x2013;<lpage>575</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1104/pp.108.117366</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hosmani</surname> <given-names>P. S.</given-names>
</name>
<name>
<surname>Flores-Gonzalez</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Geest</surname> <given-names>H. v. d.</given-names>
</name>
<name>
<surname>Maumus</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Bakker</surname> <given-names>L. V.</given-names>
</name>
<name>
<surname>Schijlen</surname> <given-names>E.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>An improved de novo assembly and annotation of the tomato reference genome using single-molecule sequencing, hi-C proximity ligation and optical maps</article-title>. <source>bioRxiv</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.1101/767764</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huot</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Yao</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Montgomery</surname> <given-names>B. L.</given-names>
</name>
<name>
<surname>He</surname> <given-names>S. Y.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Growth-defense tradeoffs in plants: A balancing act to optimize fitness</article-title>. <source>Mol. Plant</source> <volume>7</volume>, <fpage>1267</fpage>&#x2013;<lpage>1875</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/mp/ssu049</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kersey</surname> <given-names>P. J.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Plant genome sequences: past, present, future</article-title>. <source>Curr. Opin. Plant Biol.</source> <volume>48</volume>, <fpage>1</fpage>&#x2013;<lpage>8</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.pbi.2018.11.001</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kri&#x17e;nik</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Baebler</surname> <given-names>&#x160;</given-names>
</name>
<name>
<surname>Gruden</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Roles of small RNAs in the establishment of tolerant interaction between plants and viruses</article-title>. <source>Curr. Opin. Virol.</source> <volume>42</volume>, <fpage>25</fpage>&#x2013;<lpage>31</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.coviro.2020.04.006</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lamesch</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Berardini</surname> <given-names>T. Z.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Swarbreck</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Wilks</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Sasidharan</surname> <given-names>R.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>The arabidopsis information resource (TAIR): improved gene annotation and new tools</article-title>. <source>Nucleic Acids Res.</source> <volume>40</volume>, <fpage>D1202</fpage>&#x2013;<lpage>D1210</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nar/gkr1090</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Minimap2: pairwise alignment for nucleotide sequences</article-title>. <source>Bioinformatics</source> <volume>34</volume>, <fpage>3094</fpage>&#x2013;<lpage>3100</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/bty191</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Protein-to-genome alignment with miniprot</article-title>. <source>Bioinformatics</source> <volume>39</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btad014</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lukan</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Pompe-Novak</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Baebler</surname> <given-names>&#x160;.</given-names>
</name>
<name>
<surname>Tu&#x161;ek-&#x17d;nidari&#x10d;</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Kladnik</surname> <given-names>A</given-names>
</name>
<name>
<surname>Kri&#x17e;nik</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Precision transcriptomics of viral foci reveals the spatial regulation of immune-signaling genes and identifies RBOHD as an important player in the incompatible interaction between potato virus Y and potato</article-title>. <source>Plant Journal: For Cell Mol. Biol.</source> <volume>104</volume>, <fpage>645</fpage>&#x2013;<lpage>661</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/tpj.14953</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lukan</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Veillet</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Kri&#x17e;nik</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Coll</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Povalej</surname> <given-names>T. M.</given-names>
</name>
<name>
<surname>Poga&#x10d;ar</surname> <given-names>K.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>CRISPR/cas9-mediated fine-tuning of miRNA expression in tetraploid potato</article-title>. <source>Horticulture Res.</source> <volume>9</volume>, <fpage>uhac147</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/hr/uhac147</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pasha</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Subramaniam</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Cleary</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Berardini</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Farmer</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Araport lives: an updated framework for arabidopsis bioinformatics</article-title>. <source>Plant Cell</source> <volume>32</volume>, <fpage>2683</fpage>&#x2013;<lpage>2865</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1105/tpc.20.00358</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Patro</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Duggal</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Love</surname> <given-names>M. I.</given-names>
</name>
<name>
<surname>Irizarry</surname> <given-names>R. A.</given-names>
</name>
<name>
<surname>Kingsford</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Salmon provides fast and bias-aware quantification of transcript expression</article-title>. <source>Nat. Methods</source> <volume>14</volume>, <fpage>417</fpage>&#x2013;<lpage>195</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nmeth.4197</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Petek</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Coll</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Ferenc</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Razinger</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Gruden</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2020</year>a). <article-title>Validating the potential of double-stranded RNA targeting colorado potato beetle mesh gene in laboratory and field trials</article-title>. <source>Front. Plant Sci.</source> <volume>11</volume>, <elocation-id>1250</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2020.01250</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Petek</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Zagor&#x161;&#x10d;ak</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Blejec</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Ram&#x161;ak</surname> <given-names>&#x17d;.</given-names>
</name>
<name>
<surname>Coll</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Baebler</surname> <given-names>&#x160;.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>pISA-tree - a data management framework for life science research projects using a standardised directory tree</article-title>. <source>Sci. Data</source> <volume>9</volume>, <fpage>6855</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41597-022-01805-5</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Petek</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Zagor&#x161;&#x10d;ak</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Ram&#x161;ak</surname> <given-names>&#x17d;.</given-names>
</name>
<name>
<surname>Sanders</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Toma&#x17e;</surname> <given-names>&#x160;.</given-names>
</name>
<name>
<surname>Tseng</surname> <given-names>E.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>b). <article-title>Cultivar-specific transcriptome and pan-transcriptome reconstruction of tetraploid potato</article-title>. <source>Sci. Data</source> <volume>7</volume>, <fpage>2495</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41597-020-00581-4</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pham</surname> <given-names>G. M.</given-names>
</name>
<name>
<surname>Hamilton</surname> <given-names>J. P.</given-names>
</name>
<name>
<surname>Wood</surname> <given-names>J. C.</given-names>
</name>
<name>
<surname>Burke</surname> <given-names>J. T.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Vaillancourt</surname> <given-names>B.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Construction of a chromosome-scale long-read reference genome assembly for potato</article-title>. <source>GigaScience</source> <volume>9</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/gigascience/giaa100</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<collab>Potato Genome Sequencing Consortium</collab>
<name>
<surname>Xu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Pan</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Cheng</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Mu</surname> <given-names>D.</given-names>
</name>
<etal/>
</person-group>. (<year>2011</year>). <article-title>Genome sequence and analysis of the tuber crop potato</article-title>. <source>Nature</source> <volume>475</volume>, <fpage>189</fpage>&#x2013;<lpage>195</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nature10158</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Quinlan</surname> <given-names>A. R.</given-names>
</name>
<name>
<surname>Hall</surname> <given-names>I. M.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>BEDTools: A flexible suite of utilities for comparing genomic features</article-title>. <source>Bioinformatics</source> <volume>26</volume>, <fpage>841</fpage>&#x2013;<lpage>425</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btq033</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ram&#x161;ak</surname> <given-names>&#x17d;.</given-names>
</name>
<name>
<surname>Coll</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Stare</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Tzfadia</surname> <given-names>O.</given-names>
</name>
<name>
<surname>Baebler</surname> <given-names>&#x160;.</given-names>
</name>
<name>
<surname>Peer</surname> <given-names>Y. V. d.</given-names>
</name>
<etal/>
</person-group>. (<year>2018</year>). <article-title>Network modeling unravels mechanisms of crosstalk between ethylene and salicylate signaling in potato</article-title>. <source>Plant Physiol.</source> <volume>178</volume>, <fpage>488</fpage>&#x2013;<lpage>499</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1104/pp.18.00450</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rhee</surname> <given-names>S. Y.</given-names>
</name>
<name>
<surname>Beavis</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Berardini</surname> <given-names>T. Z.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Dixon</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Doyle</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2003</year>). <article-title>The arabidopsis information resource (TAIR): A model organism database providing a centralized, curated gateway to arabidopsis biology, research materials and community</article-title>. <source>Nucleic Acids Res.</source> <volume>31</volume>, <fpage>224</fpage>&#x2013;<lpage>228</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nar/gkg076</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schwacke</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Ponce-Soto</surname> <given-names>G. Y.</given-names>
</name>
<name>
<surname>Krause</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Bolger</surname> <given-names>A. M.</given-names>
</name>
<name>
<surname>Arsova</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Hallab</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>MapMan4: A refined protein classification and annotation framework applicable to multi-omics data analysis</article-title>. <source>Mol. Plant</source> <volume>12</volume>, <fpage>879</fpage>&#x2013;<lpage>925</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.molp.2019.01.003</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shumate</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Salzberg</surname> <given-names>S. L.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Liftoff: accurate mapping of gene annotations</article-title>. <source>Bioinformatics</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btaa1016</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Jia</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Cheng</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>P.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Genome evolution and diversity of wild and cultivated potatoes</article-title>. <source>Nature</source> <volume>606</volume>, <fpage>535</fpage>&#x2013;<lpage>541</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41586-022-04822-x</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tian</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>D.-C.</given-names>
</name>
<name>
<surname>Meng</surname> <given-names>Y.-Q.</given-names>
</name>
<name>
<surname>Jin</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>PlantRegMap: charting functional regulatory maps in plants</article-title>. <source>Nucleic Acids Res.</source> <volume>48</volume>, <fpage>D1104</fpage>&#x2013;<lpage>D1113</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nar/gkz1020</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<collab>Tomato Genome Consortium</collab>
</person-group> (<year>2012</year>). <article-title>The tomato genome sequence provides insights into fleshy fruit evolution</article-title>. <source>Nature</source> <volume>485</volume>, <fpage>635</fpage>&#x2013;<lpage>641</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nature11119</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Toma&#x17e;</surname> <given-names>&#x160;.</given-names>
</name>
<name>
<surname>Petek</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Lukan</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Poga&#x10d;ar</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Stare</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Prates</surname> <given-names>E. T.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>A mini-TGA protein modulates gene expression through heterogeneous association with transcription factors</article-title>. <source>Plant Physiol.</source> <volume>191</volume>, <fpage>1934</fpage>&#x2013;<lpage>1952</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/plphys/kiac579</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Valentin</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Abdel</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Ga&#xeb;tan</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Jean-Fran&#xe7;ois</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Matthieu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Mathieu</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>GreenPhylDB v5: A comparative pangenomic database for plant genomes</article-title>. <source>Nucleic Acids Res.</source> <volume>49</volume>, <fpage>D1464</fpage>&#x2013;<lpage>D1471</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nar/gkaa1068</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Van Bel</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Silvestri</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Weitz</surname> <given-names>E. M.</given-names>
</name>
<name>
<surname>Kreft</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Botzki</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Coppens</surname> <given-names>F.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>PLAZA 5.0: extending the scope and power of comparative and functional genomics in plants</article-title>. <source>Nucleic Acids Res.</source> <volume>50</volume>, <fpage>D1468</fpage>&#x2013;<lpage>D1474</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nar/gkab1024</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Visser</surname> <given-names>R. G.F.</given-names>
</name>
<name>
<surname>Bachem</surname> <given-names>C. W.B.</given-names>
</name>
<name>
<surname>de Boer</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Bryan</surname> <given-names>G. J.</given-names>
</name>
<name>
<surname>Chakrabati</surname> <given-names>S. K.</given-names>
</name>
<name>
<surname>Feingold</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2009</year>). <article-title>Sequencing the potato genome: outline and first results to come from the elucidation of the sequence of the world&#x2019;s third most important food crop</article-title>. <source>Am. J. Potato Research: Off. Publ. Potato Assoc. America</source> <volume>86</volume>, <fpage>417</fpage>&#x2013;<lpage>429</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s12230-009-9097-8</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Tung</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Deng</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>High-quality assembled and annotated genomes of nicotiana tabacum and nicotiana benthamiana reveal chromosome evolution and changes in defense arsenals</article-title>. <source>Mol. Plant</source> <volume>17</volume>, <fpage>423</fpage>&#x2013;<lpage>437</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.molp.2024.01.008</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Yuan</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Deciphering the roles of unknown/uncharacterized genes in plant development and stress responses</article-title>. <source>Front. Plant Sci.</source> <volume>14</volume>, <elocation-id>1276559</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2023.1276559</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wilkinson</surname> <given-names>M. D.</given-names>
</name>
<name>
<surname>Dumontier</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Aalbersberg</surname> <given-names>I.J. J.</given-names>
</name>
<name>
<surname>Appleton</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Axton</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Baak</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2016</year>). <article-title>The FAIR guiding principles for scientific data management and stewardship</article-title>. <source>Sci. Data</source> <volume>3</volume>, <fpage>160018</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/sdata.2016.18</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yandell</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Ence</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>A beginner&#x2019;s guide to eukaryotic genome annotation</article-title>. <source>Nat. Rev. Genet.</source> <volume>13</volume>, <fpage>329</fpage>&#x2013;<lpage>425</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nrg3174</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>The gap-free potato genome assembly reveals large tandem gene clusters of agronomical importance in highly repeated genomic regions</article-title>. <source>Mol. Plant</source> <volume>16</volume>, <fpage>314</fpage>&#x2013;<lpage>317</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.molp.2022.12.010</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yates</surname> <given-names>A. D.</given-names>
</name>
<name>
<surname>Allen</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Amode</surname> <given-names>R. M.</given-names>
</name>
<name>
<surname>Azov</surname> <given-names>A. G.</given-names>
</name>
<name>
<surname>Barba</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Becerra</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Ensembl genomes 2022: an expanding genome resource for non-vertebrates</article-title>. <source>Nucleic Acids Res.</source> <volume>50</volume>, <fpage>D996</fpage>&#x2013;<lpage>1003</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nar/gkab1007</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zagor&#x161;&#x10d;ak</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Blejec</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Ram&#x161;ak</surname> <given-names>&#x17d;.</given-names>
</name>
<name>
<surname>Petek</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Stare</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Gruden</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>DiNAR: revealing hidden patterns of plant signalling dynamics using differential network analysis in R</article-title>. <source>Plant Methods</source> <volume>14</volume>, <fpage>78</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s13007-018-0345-0</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>