<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Artif. Intell.</journal-id>
<journal-title>Frontiers in Artificial Intelligence</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Artif. Intell.</abbrev-journal-title>
<issn pub-type="epub">2624-8212</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frai.2023.1201002</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Artificial Intelligence</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Using knowledge graphs to infer gene expression in plants</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Thessen</surname> <given-names>Anne E.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1559198/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Cooper</surname> <given-names>Laurel</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/682472/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Swetnam</surname> <given-names>Tyson L.</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x02020;</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Hegde</surname> <given-names>Harshad</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="author-notes" rid="fn003"><sup>&#x02020;</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Reese</surname> <given-names>Justin</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="author-notes" rid="fn004"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2296052/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Elser</surname> <given-names>Justin</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn005"><sup>&#x02020;</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Jaiswal</surname> <given-names>Pankaj</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn006"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/51711/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Department of Biomedical Informatics, University of Colorado Anschutz Medical Campus</institution>, <addr-line>Aurora, CO</addr-line>, <country>United States</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Botany and Plant Pathology, Oregon State University</institution>, <addr-line>Corvallis, OR</addr-line>, <country>United States</country></aff>
<aff id="aff3"><sup>3</sup><institution>BIO5 Institute, University of Arizona</institution>, <addr-line>Tucson, AZ</addr-line>, <country>United States</country></aff>
<aff id="aff4"><sup>4</sup><institution>Environmental Genomics and Systems Biology Division, Berkeley Lab (DOE)</institution>, <addr-line>Berkeley, CA</addr-line>, <country>United States</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Marie-Ang&#x000E9;lique Laporte, Bioversity International, France</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Ashni Sedani, Accenture, United Kingdom; Angela Kranz, Helmholtz Association of German Research Centres (HZ), Germany</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Anne E. Thessen <email>annethessen&#x00040;gmail.com</email></corresp>
<fn fn-type="other" id="fn001"><p>&#x02020;ORCID: Laurel Cooper <ext-link ext-link-type="uri" xlink:href="https://orcid.org/0000-0002-6379-8932">orcid.org/0000-0002-6379-8932</ext-link></p></fn>
<fn fn-type="other" id="fn002"><p>Tyson L. Swetnam <ext-link ext-link-type="uri" xlink:href="https://orcid.org/0000-0002-6639-7181">orcid.org/0000-0002-6639-7181</ext-link></p></fn>
<fn fn-type="other" id="fn003"><p>Harshad Hegde <ext-link ext-link-type="uri" xlink:href="https://orcid.org/0000-0002-2411-565X">orcid.org/0000-0002-2411-565X</ext-link></p></fn>
<fn fn-type="other" id="fn004"><p>Justin Reese <ext-link ext-link-type="uri" xlink:href="https://orcid.org/0000-0002-2170-2250">orcid.org/0000-0002-2170-2250</ext-link></p></fn>
<fn fn-type="other" id="fn005"><p>Justin Elser <ext-link ext-link-type="uri" xlink:href="https://orcid.org/0000-0003-0921-1982">orcid.org/0000-0003-0921-1982</ext-link></p></fn>
<fn fn-type="other" id="fn006"><p>Pankaj Jaiswal <ext-link ext-link-type="uri" xlink:href="https://orcid.org/0000-0002-1005-8383">orcid.org/0000-0002-1005-8383</ext-link></p></fn></author-notes>
<pub-date pub-type="epub">
<day>13</day>
<month>06</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>6</volume>
<elocation-id>1201002</elocation-id>
<history>
<date date-type="received">
<day>05</day>
<month>04</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>23</day>
<month>05</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2023 Thessen, Cooper, Swetnam, Hegde, Reese, Elser and Jaiswal.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Thessen, Cooper, Swetnam, Hegde, Reese, Elser and Jaiswal</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license> </permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Climate change is already affecting ecosystems around the world and forcing us to adapt to meet societal needs. The speed with which climate change is progressing necessitates a massive scaling up of the number of species with understood genotype-environment-phenotype (G&#x000D7;E&#x000D7;P) dynamics in order to increase ecosystem and agriculture resilience. An important part of predicting phenotype is understanding the complex gene regulatory networks present in organisms. Previous work has demonstrated that knowledge about one species can be applied to another using ontologically-supported knowledge bases that exploit homologous structures and homologous genes. These types of structures that can apply knowledge about one species to another have the potential to enable the massive scaling up that is needed through <italic>in silico</italic> experimentation.</p></sec>
<sec>
<title>Methods</title>
<p>We developed one such structure, a knowledge graph (KG) using information from Planteome and the EMBL-EBI Expression Atlas that connects gene expression, molecular interactions, functions, and pathways to homology-based gene annotations. Our preliminary analysis uses data from gene expression studies in <italic>Arabidopsis thaliana</italic> and <italic>Populus trichocarpa</italic> plants exposed to drought conditions.</p></sec>
<sec>
<title>Results</title>
<p>A graph query identified 16 pairs of homologous genes in these two taxa, some of which show opposite patterns of gene expression in response to drought. As expected, analysis of the upstream cis-regulatory region of these genes revealed that homologs with similar expression behavior had conserved cis-regulatory regions and potential interaction with similar trans-elements, unlike homologs that changed their expression in opposite ways.</p></sec>
<sec>
<title>Discussion</title>
<p>This suggests that even though the homologous pairs share common ancestry and functional roles, predicting expression and phenotype through homology inference needs careful consideration of integrating cis and trans-regulatory components in the curated and inferred knowledge graph.</p></sec></abstract>
<kwd-group>
<kwd>knowledge graph (KG)</kwd>
<kwd>plant genome</kwd>
<kwd>gene expression</kwd>
<kwd>ontology</kwd>
<kwd>phenotype</kwd>
</kwd-group>
<counts>
<fig-count count="5"/>
<table-count count="3"/>
<equation-count count="0"/>
<ref-count count="45"/>
<page-count count="11"/>
<word-count count="6215"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>AI in Food, Agriculture and Water</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>Introduction</title>
<p>Climate change is already affecting ecosystems around the world and forcing us to explore ways to adapt to meet societal needs. This is particularly true in crop science where researchers are working to identify and predict genes and their resulting phenotypes under different environmental conditions in order to secure food production under a new climate regime (Thudi et al., <xref ref-type="bibr" rid="B39">2021</xref>; Tian et al., <xref ref-type="bibr" rid="B40">2021</xref>). Understanding gene/phenotype/environment relationships requires a large data set which can be difficult to collect, so most researchers focus on a small number of heavily studied species. The speed with which climate change is progressing necessitates a massive scaling up of the number of species with understood G/P/E dynamics. The research and the knowledge gained in this area will also help human exploration in space, where plants will play an important role (Barker et al., <xref ref-type="bibr" rid="B2">2023</xref>). Previous study has demonstrated that knowledge about one species can be applied to another using ontologically supported knowledgebases that exploit homologous structures and orthologous genes (Naithani et al., <xref ref-type="bibr" rid="B23">2020</xref>). These types of knowledge structures that can apply knowledge about one species to another have the potential to enable the massive scaling up that is needed.</p>
<p>An important part of predicting phenotype is understanding the complex gene regulatory networks present in plants. This study will focus on the promoter region, the 5&#x02032; cis-regulatory regions of the homologs. This region is a portion of the DNA strand that is &#x0201C;upstream&#x0201D; from the 5&#x02032; end of the gene&#x00027;s coding start site and provides selective binding sites for trans-acting factors such as transcription factors, repressors, and activators that regulate the expression of the gene (Liu et al., <xref ref-type="bibr" rid="B17">1999</xref>). These regions are just one element of the gene expression process. Studying the expression of trans-acting factors is important for understanding the spatiotemporal dynamics of molecular interactions that help adapt or overcome stress. Resources such as the Gene Ontology (The GO Consortium, <xref ref-type="bibr" rid="B38">2021</xref>), Planteome (Cooper et al., <xref ref-type="bibr" rid="B7">2018</xref>), Plant Reactome (Naithani et al., <xref ref-type="bibr" rid="B23">2020</xref>), and KnetMiner (Hassani-Pak et al., <xref ref-type="bibr" rid="B14">2021</xref>) contain much of what we know about gene function, gene regulatory networks, and phenotypes in the form of Gene X regulates Gene Y and Gene Y impacts phenotype Z, but the contextual effect of environmental conditions under which these interactions happen is almost always not included in the annotations. Not all plants and their genes are characterized in detail, but if it is included, the environmental context is usually detailed only in the metadata. Investigations that use protein domain identification and gene homology-based methods to infer the functional role a gene carries out in a given species may be overlooking the spatial and temporal dynamics of mRNA expression that determines whether a gene product (protein) will be present at the desired time and place to serve a molecular function. The interactive nature of genes, environments, and phenotypes requires a data structure that can represent qualitative relationships (e.g., &#x0201C;has phenotype&#x0201D; or &#x0201C;regulates&#x0201D;) and integrate heterogeneous data types in a single, queryable framework. One of these data structures is a knowledge graph (KG) (Sheth et al., <xref ref-type="bibr" rid="B32">2019</xref>).</p>
<p>A graph is made up of objects (nodes) and the relationships (edges) between those objects and, in this context, represents what we know about how biological and environmental entities (objects) interact. Rather than store data in a table or database, a knowledge graph stores the synthesized knowledge we gain from the data, e.g., Gene X has phenotype Y. As more knowledge is added to the graph, more complex queries, network analyses, and inferences can be made. Important examples include the use of knowledge graphs in rare disease diagnosis in humans (Zemojtel et al., <xref ref-type="bibr" rid="B45">2014</xref>), drug repurposing (Reese et al., <xref ref-type="bibr" rid="B29">2021</xref>), improving cancer treatment (Gogleva et al., <xref ref-type="bibr" rid="B13">2022</xref>), and meta-analyses (Tiddi et al., <xref ref-type="bibr" rid="B41">2020</xref>). KGs used for translational science rarely contain environmental exposures even though we know environmental conditions are an important part of gene expression dynamics. The exact way to model exposures in a KG is still under development (Chan et al., <xref ref-type="bibr" rid="B4">2023</xref>). A KG containing information about plant genomics and phenomics under different environmental conditions can be used to generate hypotheses <italic>in silico</italic> for targeting, thereby reducing the number of <italic>in vivo</italic> experiments that need to be conducted, saving time and resources.</p>
<p>This study examines gene expression patterns in response to drought conditions in four plant species, such as <italic>Arabidopsis thaliana, Zea mays, Sorghum bicolor</italic>, and <italic>Populus trichocarpa</italic>. The central motivation of this study is to assess the feasibility of using homologs to make predictions about gene expression in multiple species.</p></sec>
<sec sec-type="materials and methods" id="s2">
<title>Materials and methods</title>
<sec>
<title>Data description</title>
<sec>
<title>Planteome</title>
<p>The Planteome (<ext-link ext-link-type="uri" xlink:href="https://planteome.org/">https://planteome.org/</ext-link>) is a centralized web portal with a suite of interrelated ontologies for plants and a database of plant genomics data, annotated to the ontology terms (Cooper et al., <xref ref-type="bibr" rid="B7">2018</xref>). In the October 2020 release (version 4.0), the Planteome database included approximately 60,000 ontology terms and more than 3 million data objects, which are connected to ontology terms through approximately 20 million associations. The Planteome database has plant genomic information covering 125 plant taxa. The data available in the Planteome and annotated with ontology terms, include plant gene expression data, traits, phenotypes, genomes, and germplasm sources.</p>
<p>The ontologies developed in-house by the Planteome project include the Plant Ontology (PO; Cooper et al., <xref ref-type="bibr" rid="B7">2018</xref>; Walls et al., <xref ref-type="bibr" rid="B44">2019</xref>), which describes plant anatomical structures and developmental stages, the Plant Trait Ontology (TO) for traits and phenotypes, and the Plant Experimental Conditions Ontology (PECO), which describes experimental conditions and plant exposures. In addition to these, the Planteome hosts the collaborator reference ontologies&#x02014;the Gene Ontology (GO; The GO Consortium, <xref ref-type="bibr" rid="B38">2021</xref>), Phenotype and Trait Ontology (PATO; Gkoutos et al., <xref ref-type="bibr" rid="B12">2018</xref>), and also a number of species-specific trait dictionaries developed by the Crop Ontology (CO; Shrestha et al., <xref ref-type="bibr" rid="B33">2010</xref>; Arnaud et al., <xref ref-type="bibr" rid="B1">2020</xref>). In the current release, the Planetome includes 11 of the CO trait dictionaries, mapped to the TO.</p>
<p>GO annotations were computationally generated for new species using InParanoid and InterProScan (Shulaev et al., <xref ref-type="bibr" rid="B34">2011</xref>; Myburg et al., <xref ref-type="bibr" rid="B22">2014</xref>). InParanoid was used to predict gene orthology based on the <italic>Arabidopsis thaliana</italic> associations generated by TAIR (Reiser et al., <xref ref-type="bibr" rid="B30">2022</xref>). InterProScan was used to add GO annotations to genes via inference by analyzing protein families and domain mappings (Paysan-Lafosse et al., <xref ref-type="bibr" rid="B27">2023</xref>).</p></sec>
<sec>
<title>EMBL-EBI expression atlas</title>
<p>The EMBL-EBI Expression Atlas (GXA) can be accessed online and is part of the European Bioinformatics Institute (Papatheodorou et al., <xref ref-type="bibr" rid="B26">2020</xref>). It contains manually curated and analyzed data from over 900 plant experiments that have been re-analyzed using the latest versions of the reference plant genome assembly and annotations and by deploying a standardized analysis workflow. Every experiment is fully documented with metadata and provenance.</p>
<p>Gene expression data were downloaded as a table from GXA after searching for desired species and environmental conditions. Data were filtered to include only genes that had statistically different gene expressions (p &#x0003C;0.05) compared with a baseline that was &#x0003C;-1 or &#x0003E;1. Genes with positive differential expression were annotated as having increased expression. Genes with negative differential expression were annotated as having decreased expression. The tabulated data were annotated with additional ontology terms where appropriate and made available in GitHub for graph construction.</p>
</sec>
</sec>
<sec>
<title>Creating the graph</title>
<p>The graph was created by combining data from Planteome, the GXA, PO, TO, GO, and PECO using the tools available at KG-Hub (Caufield et al., <xref ref-type="bibr" rid="B3">2023</xref>). First, the data and mapping files were downloaded from their respective data repositories. GO-Basic and NCBI Tax-Slim were downloaded from the OBO Foundry in javascript object notation (JSON) format. PO and TO were downloaded from the OBO Foundry in owl format and transformed to JSON using ROBOT (Jackson et al., <xref ref-type="bibr" rid="B15">2019</xref>). Data files containing information about <italic>Sorghum bicolor, Zea mays, Oryza sativa, Populus trichocarpa</italic>, and <italic>Arabidopsis thaliana</italic> were downloaded from Planteome servers in GAF format. Data files containing differential gene expression data involving <italic>Sorghum bicolor, Zea mays, Oryza sativa, Populus trichocarpa</italic>, and <italic>Arabidopsis thaliana</italic> in drought and saline environments were downloaded from the GXA. Several mapping files were used to normalize gene and trait identifiers. Rice gene identifiers were mapped to <italic>Oryza sativa</italic> v7.0 using the ID converter file from the Rice Annotation Project Database (Ouyang et al., <xref ref-type="bibr" rid="B25">2007</xref>; Sakai et al., <xref ref-type="bibr" rid="B31">2013</xref>). Maize gene identifiers were mapped to Zm-B73-REFERENCE-NAM-5.0 assembly using a mapping file that includes all B73 assembly versions and includes the DAGchainer analysis which was obtained from MaizeGDB (Portwood et al., <xref ref-type="bibr" rid="B28">2019</xref>; EMBL-EBI). Poplar gene identifiers were mapped to the reference genome using a mapping file from Gramene (Tello-Ruiz et al., <xref ref-type="bibr" rid="B37">2018</xref>). <italic>Sorghum</italic> gene names were normalized to <italic>Sorghum bicolor</italic> v3.1.1 (McCormick et al., <xref ref-type="bibr" rid="B19">2018</xref>). Plant traits and phenotypes were annotated with TO terms using a look-up dictionary file. Second, each of the data files was transformed into standardized nodes and edges in a tsv file using custom scripts. These scripts normalized gene and trait identifiers using ontologies and the provided mapping files and annotated every entity with a Biolink semantic type (<xref ref-type="table" rid="T1">Table 1</xref>), and relationships between entities were described using Biolink predicates (<xref ref-type="table" rid="T2">Table 2</xref>). The graph was assembled according to the Biolink model, which provides standard semantic types and relationships for biological entities (Unni et al., <xref ref-type="bibr" rid="B43">2022</xref>).</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Identifiers and Biolink semantic types assigned to elements of the graph.</p></caption> 
<table frame="box" rules="all">
<thead>
<tr style="background-color:&#x00023;919498;color:&#x00023;ffffff">
<th valign="top" align="left"><bold>Biological element</bold></th>
<th valign="top" align="left"><bold>Identifier</bold></th>
<th valign="top" align="left"><bold>Biolink type</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Plant part</td>
<td valign="top" align="left">PO</td>
<td valign="top" align="left">Anatomical entity</td>
</tr>
<tr>
<td valign="top" align="left">Growth stage</td>
<td valign="top" align="left">PO</td>
<td valign="top" align="left">Life stage</td>
</tr>
<tr>
<td valign="top" align="left">Plant trait</td>
<td valign="top" align="left">TO</td>
<td valign="top" align="left">Phenotypic feature</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Zea mays</italic> gene</td>
<td valign="top" align="left">Zm00001eb IDs</td>
<td valign="top" align="left">Genomic entity</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Sorghum bicolor</italic> gene</td>
<td valign="top" align="left">Sobic IDs</td>
<td valign="top" align="left">Genomic entity</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Oryza sativa</italic> gene</td>
<td valign="top" align="left">LOC_Os IDs</td>
<td valign="top" align="left">Genomic entity</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Populus trichocarpa</italic> gene</td>
<td valign="top" align="left">POPTR IDs</td>
<td valign="top" align="left">Genomic entity</td>
</tr>
<tr>
<td valign="top" align="left">Experimental condition</td>
<td valign="top" align="left">PECO</td>
<td valign="top" align="left">Environmental exposure</td>
</tr>
<tr>
<td valign="top" align="left">QTL</td>
<td valign="top" align="left">Gramene IDs</td>
<td valign="top" align="left">Genomic entity</td>
</tr>
<tr>
<td valign="top" align="left">Cultivar</td>
<td valign="top" align="left">NCBITaxonomy</td>
<td valign="top" align="left">Organismal entity</td>
</tr>
<tr>
<td valign="top" align="left">Taxon</td>
<td valign="top" align="left">NCBITaxonomy</td>
<td valign="top" align="left">Organism taxon</td>
</tr>
<tr>
<td valign="top" align="left">Cellular component</td>
<td valign="top" align="left">GO</td>
<td valign="top" align="left">Cellular component</td>
</tr>
<tr>
<td valign="top" align="left">Molecular function</td>
<td valign="top" align="left">GO</td>
<td valign="top" align="left">Molecular function</td>
</tr>
<tr>
<td valign="top" align="left">Biological process</td>
<td valign="top" align="left">GO</td>
<td valign="top" align="left">Biological process</td>
</tr>
<tr>
<td valign="top" align="left">Germplasm</td>
<td valign="top" align="left">GRIN and IRIC IDs</td>
<td valign="top" align="left">Organismal entity</td>
</tr></tbody>
</table>
</table-wrap>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Edges and their Biolink predicates.</p></caption> 
<table frame="box" rules="all">
<thead>
<tr style="background-color:&#x00023;919498;color:&#x00023;ffffff">
<th valign="top" align="left"><bold>Subject entity</bold></th>
<th valign="top" align="left"><bold>Predicate type</bold></th>
<th valign="top" align="left"><bold>Object entity</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Genomic entity</td>
<td valign="top" align="left">In taxon</td>
<td valign="top" align="left">Organism taxon</td>
</tr>
<tr>
<td valign="top" align="left">Genomic entity</td>
<td valign="top" align="left">Active in</td>
<td valign="top" align="left">Cellular component</td>
</tr>
<tr>
<td valign="top" align="left">Genomic entity</td>
<td valign="top" align="left">Regulates</td>
<td valign="top" align="left">Biological process</td>
</tr>
<tr>
<td valign="top" align="left">Genomic entity</td>
<td valign="top" align="left">Enables</td>
<td valign="top" align="left">Molecular function</td>
</tr>
<tr>
<td valign="top" align="left">Genomic entity</td>
<td valign="top" align="left">Expressed in</td>
<td valign="top" align="left">Anatomical entity</td>
</tr>
<tr>
<td valign="top" align="left">Genomic entity</td>
<td valign="top" align="left">Expressed in</td>
<td valign="top" align="left">Life stage</td>
</tr>
<tr>
<td valign="top" align="left">Genomic entity</td>
<td valign="top" align="left">Has phenotype</td>
<td valign="top" align="left">Phenotypic feature</td>
</tr>
<tr>
<td valign="top" align="left">Genomic entity</td>
<td valign="top" align="left">Orthologous to</td>
<td valign="top" align="left">Genomic entity</td>
</tr>
<tr>
<td valign="top" align="left">Organism taxon</td>
<td valign="top" align="left">Has phenotype</td>
<td valign="top" align="left">Phenotypic feature</td>
</tr>
<tr>
<td valign="top" align="left">Organismal entity</td>
<td valign="top" align="left">In taxon</td>
<td valign="top" align="left">Organism taxon</td>
</tr>
<tr>
<td valign="top" align="left">Organismal entity</td>
<td valign="top" align="left">Has phenotype</td>
<td valign="top" align="left">Phenotypic feature</td>
</tr>
<tr>
<td valign="top" align="left">Environmental exposure</td>
<td valign="top" align="left">Increases expression of</td>
<td valign="top" align="left">Genomic entity</td>
</tr>
<tr>
<td valign="top" align="left">Environmental exposure</td>
<td valign="top" align="left">Decreases expression of</td>
<td valign="top" align="left">Genomic entity</td>
</tr></tbody>
</table>
</table-wrap>
<p>There was not enough overlapping expression data to include <italic>O. sativa</italic> or saline environments in this analysis, but they were included in the graph.</p>
<p>The third and final step merged the transformed tsv files into a deduplicated list of nodes and edges in KGX format. The final graph consisted of over 400,000 nodes and over 5,000,000 edges and contained additional data from EOLTraitbank that was not used in this study (<xref ref-type="fig" rid="F1">Figure 1</xref>). Specific information about quantitative and qualitative plant phenotypes was represented as an edge property (<xref ref-type="fig" rid="F2">Figure 2</xref>).</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>Structure of the knowledge graph. Data are transformed using ontologies and the Biolink model to form a graph. Nodes (gray boxes) are labeled with Biolink semantic type and edges (gray arrows) are labeled with Biolink predicate. Arrows indicate directionality.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1201002-g0001.tif"/>
</fig>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>Phenotype data in edge properties. Detailed phenotype information was represented as a collection of edge properties that can accommodate quantitative and qualitative phenotypes.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1201002-g0002.tif"/>
</fig>
</sec>
<sec>
<title>Querying the graph</title>
<p>The merged node file and edge file were uploaded into Neo4j for exploration and query. A Cipher query (<xref ref-type="boxed-text" rid="Box1">Box 1</xref>) was used to find all of the homologous genes that had been documented to have differential gene expression in either a drought or a saline environment (<xref ref-type="app" rid="A1">Supplementary material 1</xref>, <xref ref-type="app" rid="A1">2</xref>). The saline environment did not return overlapping data.</p>
<boxed-text id="Box1">
<label>Box 1</label>
<title>Cipher query.</title>
<p>MATCH (e {id:&#x00027;PECO:0007404&#x02032;})-[r]-&#x0003E;(g),(g)-[q:&#x02018;biolink: orthologous_to&#x02018;]-(h), (e {id:&#x00027;PECO:0007404&#x02032;})-[s]-&#x0003E;(h) RETURN <sup>&#x0002A;</sup></p>
</boxed-text>
<p>Genes returned from the query for the drought environment were compared based on GO annotations (<xref ref-type="app" rid="A1">Supplementary material 3</xref>), but this also did not give enough data to make conclusions using PANTHER (<xref ref-type="app" rid="A1">Supplementary material 4</xref>).</p>
</sec>
<sec>
<title>Comparing promoter regions</title>
<p>We collected 5&#x02032;-regulatory regions of the identified genes (700&#x02013;900 bp) using BioMart in the Gramene database (Spooner et al., <xref ref-type="bibr" rid="B36">2012</xref>) and searched for potential transcription factor-binding sites using PlantPAN (Chow et al., <xref ref-type="bibr" rid="B6">2016</xref>). Using these data (<xref ref-type="app" rid="A1">Supplementary material 5</xref>), we created a matrix comparing the occurrence of each transcription factor in the binding site of each gene pair and made note of which were or were not held in common. We used ClustVis (Metsalu and Vilo, <xref ref-type="bibr" rid="B20">2015</xref>) to examine the similarity between the transcription factor-binding sites for each of the <italic>Populus</italic> and <italic>Arabidopsis</italic> gene pairs using PCA. A total of 12 transcription factor-binding sites (AT-Hook, bHLH, C2H2, Dehydrin, Dof, GATA, Homeodomain, Myb/SANT, NF-YB, TBP, Trihelix, and ZF-HD) were present in the promoter regions of all the genes studied and thus were removed from clustering analysis. The same data were fed into Morpheus (M&#x000FC;ller et al., <xref ref-type="bibr" rid="B21">2008</xref>) for hierarchical clustering performed with default parameters using One minus Pearson&#x00027;s correlation and complete linkage methods on the TF-binding site annotations. Additional similarity matrices were created using Pearson&#x00027;s correlation metric to separately examine the TF-binding site annotations for genes with similar and contrasting expression profiles. The correlation heatmap colors were adjusted for visualization purposes.</p>
</sec>
<sec>
<title>Data availability</title>
<p>The merged KG data are hosted on the CyVerse DataCommons (<ext-link ext-link-type="uri" xlink:href="https://datacommons.cyverse.org/browse/iplant/home/shared/genophenoenvo">https://datacommons.cyverse.org/browse/iplant/home/shared/genophenoenvo</ext-link>). The KG data are available for direct download or remote visualization via CyVerse WebDav service (<ext-link ext-link-type="uri" xlink:href="https://data.cyverse.org/dav-anon/iplant/commons/community_released/genophenoenvo/kg/">https://data.cyverse.org/dav-anon/iplant/commons/community_released/genophenoenvo/kg/</ext-link>) using visualization software such as Neo4J. The Python code used to create the graphs is publicly hosted on GitHub (<ext-link ext-link-type="uri" xlink:href="https://github.com/genophenoenvo/knowledge-graph">https://github.com/genophenoenvo/knowledge-graph</ext-link>). The final merged KG includes two tab-separated value (tsv) files which include the edges and nodes.</p></sec></sec>
<sec sec-type="results" id="s3">
<title>Results</title>
<p>The graph query returned 62 pairs of homologous genes from <italic>Sorghum bicolor, Zea mays, Arabidopsis thaliana</italic>, and <italic>Populus trichocarpa</italic> (<xref ref-type="app" rid="A1">Supplementary material 6</xref>), but only 16 pairs between <italic>A. thaliana</italic> and <italic>P. trichocarpa</italic> had documented similar (8) and differential (8) expressions in drought conditions (<xref ref-type="table" rid="T3">Table 3</xref>). All of the genes with similarly expressed pairs had decreased expression. Expression data for the 16 homologous pairs of <italic>A. thaliana</italic> and <italic>P. trichocarpa</italic> came from two studies in GXA (de Simone et al., <xref ref-type="bibr" rid="B8">2017</xref>; Filichkin et al., <xref ref-type="bibr" rid="B11">2018</xref>).</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Gene expression in <italic>A. thaliana</italic> and <italic>P. trichocarpa</italic> homologous genes under drought conditions.</p></caption> 
<table frame="box" rules="all">
<thead>
<tr style="background-color:&#x00023;919498;color:&#x00023;ffffff">
<th valign="top" align="left"><bold><italic>A. thaliana</italic> Gene<xref ref-type="table-fn" rid="TN1"><sup>&#x0002A;</sup></xref></bold></th>
<th valign="top" align="left"><bold><italic>P. trichocarpa</italic> Gene<xref ref-type="table-fn" rid="TN1"><sup>&#x0002A;</sup></xref></bold></th>
<th valign="top" align="left"><bold>Gene function (from Planteome)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">AT3G49960 &#x02193;</td>
<td valign="top" align="left">POPTR_007G053400v3 &#x02191;</td>
<td valign="top" align="left">Peroxidase activity, response to oxidative stress, heme binding</td>
</tr>
<tr>
<td valign="top" align="left">AT1G70710 &#x02193;</td>
<td valign="top" align="left">POPTR_010G109200v3 &#x02193;</td>
<td valign="top" align="left">Catalytic activity, hyrolase activity, carbohydrate metabolic process</td>
</tr>
<tr>
<td valign="top" align="left">AT1G10550 &#x02193;</td>
<td valign="top" align="left">POPTR_014G115000v3 &#x02193;</td>
<td valign="top" align="left">Xyloglucan metabolism, hyrolase activity, carbohydrate metabolic process, cell wall biogenesis</td>
</tr>
<tr>
<td valign="top" align="left">AT5G67400 &#x02193;</td>
<td valign="top" align="left">POPTR_007G053400v3 &#x02191;</td>
<td valign="top" align="left">Peroxidase activity, response to oxidative stress, heme binding, hydrogen peroxide catabolic process</td>
</tr>
<tr>
<td valign="top" align="left">AT5G23210 &#x02193;</td>
<td valign="top" align="left">POPTR_005G091700v3 &#x02193;</td>
<td valign="top" align="left">Proteolysis, serine-type carboxypeptidase activity</td>
</tr>
<tr>
<td valign="top" align="left">AT1G67750 &#x02193;</td>
<td valign="top" align="left">POPTR_008G182200v3 &#x02193;</td>
<td valign="top" align="left">Pectate lyase activity, metal ion binding</td>
</tr>
<tr>
<td valign="top" align="left">AT2G39530 &#x02193;</td>
<td valign="top" align="left">POPTR_010G205300v3 &#x02191;</td>
<td valign="top" align="left">iron/sulfur cluster binding</td>
</tr>
<tr>
<td valign="top" align="left">AT5G13140 &#x02193;</td>
<td valign="top" align="left">POPTR_003G167100v3 &#x02193;</td>
<td valign="top" align="left">Response to nematode, pectate lyase activity, metal ion binding</td>
</tr>
<tr>
<td valign="top" align="left">AT1G11580 &#x02193;</td>
<td valign="top" align="left">POPTR_011G025400v3 &#x02191;</td>
<td valign="top" align="left">Enzyme inhibitor activity, pectinesterase activity, cell wall modification, rRNA N-glycosylase activity, aspartyl esterase activity, toxin activity, defense response</td>
</tr>
<tr>
<td valign="top" align="left">AT3G27400 &#x02193;</td>
<td valign="top" align="left">POPTR_001G339500v3 &#x02191;</td>
<td valign="top" align="left">Response to nematode, pectate lyase activity, metal ion binding</td>
</tr>
<tr>
<td valign="top" align="left">AT4G02330 &#x02193;</td>
<td valign="top" align="left">POPTR_014G127000v3 &#x02191;</td>
<td valign="top" align="left">Enzyme inhibitor activity, pectinesterase activity, cell wall modification, response to stress, aspartyl esterase activity</td>
</tr>
<tr>
<td valign="top" align="left">AT5G20630 &#x02193;</td>
<td valign="top" align="left">POPTR_006G142600v3 &#x02193;</td>
<td valign="top" align="left">Manganese ion binding, nutrient reservoir activity</td>
</tr>
<tr>
<td valign="top" align="left">AT4G26260 &#x02191;</td>
<td valign="top" align="left">POPTR_018G069700v3 &#x02193;</td>
<td valign="top" align="left">Iron ion binding, inositol oxygenase activity, syncytium formation, L-ascorbic acid biosynthetic pathway</td>
</tr>
<tr>
<td valign="top" align="left">AT2G44990 &#x02191;</td>
<td valign="top" align="left">POPTR_014G056800v3 &#x02193;</td>
<td valign="top" align="left">Oxidoreductase activity, secondary shoot formation, carotene catabolic process, strigolactone biosynthetic process, xanthophyll catabolic process, metal ion binding</td>
</tr>
<tr>
<td valign="top" align="left">AT1G70710 &#x02193;</td>
<td valign="top" align="left">POPTR_010G109200v3 &#x02193;</td>
<td valign="top" align="left">Cellulase activity, cell wall modification, hydrolase activity</td>
</tr>
<tr>
<td valign="top" align="left">AT1G12940 &#x02191;</td>
<td valign="top" align="left">POPTR_015G081500v3 &#x02193;</td>
<td valign="top" align="left">Transmembrane transport</td>
</tr></tbody>
</table>
<table-wrap-foot>
<fn id="TN1">
<label>&#x0002A;</label>
<p>&#x02193; Indicates decreased expression and &#x02191; indicates increased expression.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>Based on the predicted transcription factor-binding sites in the promoter regions, the <italic>Populus</italic> genes in the differentially expressed homolog pairs cluster separately from the other <italic>Populus</italic> and <italic>Arabidopsis</italic> genes (<xref ref-type="fig" rid="F3">Figure 3</xref>). This difference is driven by a group of 11 transcription factor-binding sites that are absent in the promoter regions of the subset of divergent <italic>Populus</italic> genes (RAV, MIKC, NAM, G2-like, CPP, ARR-B, tify, TALE, NF-YC, ERF, and NF-YA). The separation of these genes cannot be explained by the taxon or the study providing the data (which overlaps the taxon).</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>Clustering of <italic>Populus</italic> and <italic>Arabidopsis</italic> genes based on similarity of the transcription factor-binding sites in the promoter region - PCA. The <italic>Populus</italic> genes from the differentially expressed homolog pairs (blue circles) clustered away from the other <italic>Populus</italic> (blue) and <italic>Arabidopsis</italic> (red) genes. Differentially expressed genes are represented as circles and similarly expressed genes are represented as squares. Note that taxonomic differences (blue and red ovals) do not explain the differences in gene expression. No scaling is applied to rows; SVD with imputation is used to calculate principal components. X and Y axes show principal component 1 and principal component 2 that explain 25.1 and 9.5% of the total variance, respectively. <italic>N</italic> = 29 data points.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1201002-g0003.tif"/>
</fig>
<p>There were seven <italic>Populus</italic> genes that clustered away from the others. All but one (POPTR_014G056800v3 involved in strigolactone biosynthesis) were hypothetical proteins (According to Gramene). GO annotations for these genes clustered around transporter activity, catabolic activity, response to stress, binding, and catalytic activity. The 11 transcription factors absent in the binding sites of the <italic>Populus</italic> genes include proteins involved in plant stress response in <italic>Arabidopsis</italic> (According to UniProt).</p>
<p>A comparison of the promoter regions between homolog pairs showed that homologs that were expressed similarly had more similar promoter regions than pairs that were expressed differentially (<xref ref-type="fig" rid="F4">Figure 4</xref>).</p>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p>Similarity of the transcription factor-binding sites in the promoter region of <italic>Populus</italic> and <italic>Arabidopsis</italic> homologous gene pairs. Poplar (POPTR) and <italic>Arabidopsis</italic> (AT) genes were grouped into their homolog pairs and whether they had similar or contrasting gene expression when exposed to drought. This figure shows that the promoter regions of pairs with contrasting expressions were less similar (blue) and the promoter regions of pairs with similar expressions were more similar (red).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1201002-g0004.tif"/>
</fig>
<p>Separate comparisons of the promoter regions from gene pairs with contrasting expression profiles also show that gene pairs with similar expression had more similar promoter regions (<xref ref-type="fig" rid="F5">Figure 5A</xref>) and gene pairs with contrasting expression had less similar promoter regions (<xref ref-type="fig" rid="F5">Figure 5B</xref>).</p>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p>Similarity of the transcription factor-binding sites in the promoter region of <italic>Populus</italic> and <italic>Arabidopsis</italic> genes grouped by their expression profile. Genes that were similarly expressed in a drought treatment <bold>(A)</bold> had more similar promoter regions (red) than genes that were differentially expressed <bold>(B)</bold>.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1201002-g0005.tif"/>
</fig></sec>
<sec sec-type="discussion" id="s4">
<title>Discussion</title>
<p>This study shows that one can use <italic>in silico</italic> experiments to predict gene expression in drought conditions using homologous gene families in some species pairs but not all. This study supports previous findings that in some cases, promoter regions evolve separately from the coding region of the genes they regulate (Tirosh et al., <xref ref-type="bibr" rid="B42">2008</xref>). Thus, we can translate knowledge about gene expression in one species to another, but we need to include these dynamics in the data infrastructures we use to make this translation, in this case, KGs. Many data structures link a gene to a phenotype, trait, or disease without specific expression information. The current representation of differential gene expression links exposure to a chemical or a drug to the increased or decreased expression of a specific gene in the context of toxicology and drug development (Fecho et al., <xref ref-type="bibr" rid="B10">2022</xref>; Unni et al., <xref ref-type="bibr" rid="B43">2022</xref>). Gene regulatory networks are represented as mini-networks of genes that influence other genes (The GO Consortium, <xref ref-type="bibr" rid="B38">2021</xref>), but many of these networks are still unknown in plants. In the short term, <italic>in silico</italic> KG experiments involving gene expression can be improved by including empirically validated gene expression patterns of homologs.</p>
<p>Gene regulatory networks in plants have been developed using a combination of experimental and computational approaches (Kulkarni and Vandepoele, <xref ref-type="bibr" rid="B16">2020</xref>). Methods combining high-throughput DNA sequencing (ChIP-seq) and expression data have successfully revealed the detailed regulatory networks controlling flowering (Chen et al., <xref ref-type="bibr" rid="B5">2018</xref>) but are difficult to scale. Methods such as ATAC-seq and DAP-seq are more scalable but only reveal a partial picture of the regulatory network (O&#x00027;Malley et al., <xref ref-type="bibr" rid="B24">2016</xref>; Maher et al., <xref ref-type="bibr" rid="B18">2018</xref>). KGs can be used to infer regulatory networks at scale, but the quality is highly dependent on the data used to build the KG. The advantage of applying a KG is the ability to integrate incredibly heterogeneous data in a single graph, thus modeling regulatory networks in their larger biological context. An example of this application is the relatively new field of &#x0201C;network medicine&#x0201D; that uses KGs to examine the progression of disease (Silverman et al., <xref ref-type="bibr" rid="B35">2020</xref>). The main disadvantage of KGs in this application is that large amounts of computable data and domain-specific knowledge models are needed to create a graph of this type. Many disciplines do not have these resources available. While KGs can infer gene regulatory networks, these networks should always be confirmed using established experimental and computational approaches.</p>
<p>These opposing gene expression patterns are not a concern for researchers who are only interested in finding a list of genes that are potentially important in a specific context. It is not until one needs to generate hypotheses about the impact of the environment on the biological function that more complex graph representations become needed. If we are to incorporate the effect of the environment, we need to know more than that Gene X has phenotype Y. We need to know if the environmental effect increases or decreases the expression of the gene and the biological consequences of that change in expression. In some cases, we may only know that an environment is linked to a specific phenotype without knowing the underlying mechanism. This information can still add useful knowledge to the graph. In some cases, the graph itself can be used to generate hypotheses about the interplay between genes, biological processes, molecular functions, cellular components, and an observed phenotype.</p>
<p>Despite having the graph available to quickly explore the data and locate genes of interest, the workflow for comparing the promoter regions required substantial manual intervention. In this instance, we only had 16 gene pairs to explore, but scaling up these types of analyses will require the ability to traverse data annotated with gene identifiers and gene coordinates. Future studies should include extending the graph model to include these data types.</p>
<p>The semantic representation of the effect of environmental exposure on gene expression is more straightforward for the effects of a chemical or a substance, such as phenol or rubber cement. Data can be collected in the laboratory using model organisms, and the results added to the graph for analysis and translational research. Everyday environmental exposures are rarely this simple and frequently involve exposure to many types of substances in different contexts, such as climate or socioeconomic status. Future studies may need to develop ontologies and semantic representations for these more complex exposures.</p>
<p>Our observations support our hypothesis and justify the extension of our KG to include TF-binding site annotations and the actual TF genes, which are either known empirically or are supported by co-expression network analysis. In future, an investigation of conservation vs. non-conservation of cis- and trans-regulatory regions of genes may improve the understanding of interspecies and intraspecies responses to stress and adaptation.</p></sec>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The merged KG data are hosted on the CyVerse DataCommons (<ext-link ext-link-type="uri" xlink:href="https://datacommons.cyverse.org/browse/iplant/home/shared/genophenoenvo">https://datacommons.cyverse.org/browse/iplant/home/shared/genophenoenvo</ext-link>). The KG data are available for direct download or for remote visualization via CyVerse WebDav service (<ext-link ext-link-type="uri" xlink:href="https://data.cyverse.org/dav-anon/iplant/commons/community_released/genophenoenvo/kg/">https://data.cyverse.org/dav-anon/iplant/commons/community_released/genophenoenvo/kg/</ext-link>) using visualization software such as Neo4J. The python code used to create the graphs are publicly hosted on GitHub (<ext-link ext-link-type="uri" xlink:href="https://github.com/genophenoenvo/knowledge-graph">https://github.com/genophenoenvo/knowledge-graph</ext-link>). The final merged KG includes two tab-separated value (tsv) files which include the edges and nodes.</p></sec>
<sec sec-type="author-contributions" id="s6">
<title>Author contributions</title>
<p>AT developed and framed research question(s), analyzed data, contributed to data analysis, developed software, contributed to writing and revising the paper, and project administration and management. HH contributed to data analysis. TS contributed to data analysis and contributed to writing and revising the paper. JR developed software, validated results or software, developed and framed research question(s), and contributed to writing and revising the paper. LC contributed to data analysis, project administration and management, and contributed to writing and revising the paper. PJ developed and framed research question(s), analyzed data, contributed to data analysis, and contributed to writing and revising the paper. JE contributed to data analysis, developed software, validated results or software, and contributed to writing and revising the paper. All authors contributed to the article and approved the submitted version.</p></sec>
</body>
<back>
<sec sec-type="funding-information" id="s7">
<title>Funding</title>
<p>This study was supported by the National Science Foundation grant awards &#x00023;1940330, &#x00023;1939945, &#x00023;1940059, and &#x00023;1940062. CyVerse is based upon work supported by the National Science Foundation grant awards &#x00023;0735191, &#x00023;1265383, and &#x00023;1743442.</p>
</sec>
<ack><p>The authors would like to thank Sierra Moxon and Harry Caufield for their technical assistance and helpful conversation. The preprint for this paper can be found at <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.7764762">https://doi.org/10.5281/zenodo.7764762</ext-link>.</p>
</ack>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s8">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Arnaud</surname> <given-names>E.</given-names></name> <name><surname>Hazekamp</surname> <given-names>T.</given-names></name> <name><surname>Laporte</surname> <given-names>M. A.</given-names></name> <name><surname>Antezana</surname> <given-names>E.</given-names></name></person-group> (<year>2020</year>). <source>Crop Ontology Governance and Stewardship Framework</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://cgspace.cgiar.org/handle/10568/118001">https://cgspace.cgiar.org/handle/10568/118001</ext-link> (accessed May 31, 2023).</citation>
</ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Barker</surname> <given-names>R.</given-names></name> <name><surname>Kruse</surname> <given-names>C. P. S.</given-names></name> <name><surname>Johnson</surname> <given-names>C.</given-names></name> <name><surname>Saravia-Butler</surname> <given-names>A.</given-names></name> <name><surname>Fogle</surname> <given-names>H.</given-names></name> <name><surname>Chang</surname> <given-names>H-S.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Meta-analysis of the space flight and microgravity response of the arabidopsis plant transcriptome</article-title>. <source>NPJ Microgr.</source> <volume>9</volume>, <fpage>21</fpage>. <pub-id pub-id-type="doi">10.1038/s41526-023-00247-6</pub-id><pub-id pub-id-type="pmid">36941263</pub-id></citation></ref>
<ref id="B3">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Caufield</surname> <given-names>J. H.</given-names></name> <name><surname>Putman</surname> <given-names>T.</given-names></name> <name><surname>Schaper</surname> <given-names>K.</given-names></name> <name><surname>Unni</surname> <given-names>D. R.</given-names></name> <name><surname>Hegde</surname> <given-names>H.</given-names></name> <name><surname>Callahan</surname> <given-names>T. J.</given-names></name> <etal/></person-group>. (<year>2023</year>). <source>KG-Hub &#x02013; building and exchanging biological knowledge graphs. arXiv</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="http://arxiv.org/abs/2302.10800">http://arxiv.org/abs/2302.10800</ext-link></citation>
</ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chan</surname> <given-names>L. E.</given-names></name> <name><surname>Thessen</surname> <given-names>A. E.</given-names></name> <name><surname>Duncan</surname> <given-names>W. D.</given-names></name> <name><surname>Matentzoglu</surname> <given-names>N.</given-names></name> <name><surname>Schmitt</surname> <given-names>C.</given-names></name> <name><surname>Grondin</surname> <given-names>C. J.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>The Environmental Conditions, Treatments, and Exposures Ontology (ECTO): Connecting Toxicology and Exposure to Human Health and beyond</article-title>. <source>J. Biomed. Semantics.</source> <volume>14</volume>, <fpage>3</fpage>. <pub-id pub-id-type="doi">10.1186/s13326-023-00283-x</pub-id><pub-id pub-id-type="pmid">36823605</pub-id></citation></ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>D.</given-names></name> <name><surname>Yan</surname> <given-names>W.</given-names></name> <name><surname>Fu</surname> <given-names>L. Y.</given-names></name> <name><surname>Kaufmann</surname> <given-names>K.</given-names></name></person-group> (<year>2018</year>). <article-title>Architecture of gene regulatory networks controlling flower development in <italic>Arabidopsis thaliana</italic></article-title>. <source>Nat. Commun.</source> <volume>9</volume>, <fpage>4534</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-018-06772-3</pub-id><pub-id pub-id-type="pmid">30382087</pub-id></citation></ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chow</surname> <given-names>C. N.</given-names></name> <name><surname>Zheng</surname> <given-names>H. Q.</given-names></name> <name><surname>Wu</surname> <given-names>N. Y.</given-names></name> <name><surname>Chien</surname> <given-names>C. H.</given-names></name> <name><surname>Huang</surname> <given-names>H. D.</given-names></name> <name><surname>Lee</surname> <given-names>T. Y.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>PlantPAN 2.0: an update of plant promoter analysis navigator for reconstructing transcriptional regulatory networks in plants</article-title>. <source>Nucleic Acids Res.</source> <volume>44</volume>, <fpage>D1154</fpage>&#x02013;<lpage>D1160</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkv1035</pub-id><pub-id pub-id-type="pmid">26476450</pub-id></citation></ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cooper</surname> <given-names>L.</given-names></name> <name><surname>Meier</surname> <given-names>A.</given-names></name> <name><surname>Laporte</surname> <given-names>M-A.</given-names></name> <name><surname>Elser</surname> <given-names>J. L.</given-names></name> <name><surname>Mungall</surname> <given-names>C.</given-names></name> <name><surname>Sinn</surname> <given-names>B. T.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>The planteome database: an integrated resource for reference ontologies, plant genomics and phenomics</article-title>. <source>Nucleic Acids Res.</source> <volume>46</volume>, <fpage>D1168</fpage>&#x02013;<lpage>D1180</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkx1152</pub-id><pub-id pub-id-type="pmid">29186578</pub-id></citation></ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>de Simone</surname> <given-names>A.</given-names></name> <name><surname>Hubbard</surname> <given-names>R.</given-names></name> <name><surname>Torre</surname> <given-names>N. V.</given-names></name> <name><surname>Velappan</surname> <given-names>Y.</given-names></name> <name><surname>Wilson</surname> <given-names>M.</given-names></name> <name><surname>Considine</surname> <given-names>M. J.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Redox changes during the cell cycle in the embryonic root meristem of <italic>Arabidopsis Thaliana</italic></article-title>. <source>Antioxid Redox Signal.</source> <volume>27</volume>, <fpage>1505</fpage>&#x02013;<lpage>1519</lpage>. <pub-id pub-id-type="doi">10.1089/ars.2016.6959</pub-id><pub-id pub-id-type="pmid">28457165</pub-id></citation></ref>
<ref id="B9">
<citation citation-type="web"><person-group person-group-type="author"><collab>EMBL-EBI</collab></person-group>. n.d. <source>ENA Browser</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.ebi.ac.uk/ena/browser/view/GCA_902167145.1">https://www.ebi.ac.uk/ena/browser/view/GCA_902167145.1</ext-link> (accessed March 10, 2023).</citation>
</ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fecho</surname> <given-names>K.</given-names></name> <name><surname>Thessen</surname> <given-names>A. E.</given-names></name> <name><surname>Baranzini</surname> <given-names>S. E.</given-names></name> <name><surname>Bizon</surname> <given-names>C.</given-names></name> <name><surname>Hadlock</surname> <given-names>J. J.</given-names></name> <name><surname>Huang</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Progress toward a Universal Biomedical Data Translator</article-title>. <source>Clin. Transl. Sci.</source> <volume>15</volume>, <fpage>1838</fpage>&#x02013;<lpage>1847</lpage>. <pub-id pub-id-type="doi">10.1111/cts.13301</pub-id><pub-id pub-id-type="pmid">35611543</pub-id></citation></ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Filichkin</surname> <given-names>S. A.</given-names></name> <name><surname>Hamilton</surname> <given-names>M.</given-names></name> <name><surname>Dharmawardhana</surname> <given-names>P. D.</given-names></name> <name><surname>Singh</surname> <given-names>S. K.</given-names></name> <name><surname>Sullivan</surname> <given-names>C.</given-names></name> <name><surname>Ben-Hur</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Abiotic stresses modulate landscape of poplar transcriptome via alternative splicing, differential intron retention, and isoform ratio switching</article-title>. <source>Front. Plant Sci.</source> <volume>9</volume>, <fpage>5</fpage>. <pub-id pub-id-type="doi">10.3389/fpls.2018.00005</pub-id><pub-id pub-id-type="pmid">29483921</pub-id></citation></ref>
<ref id="B12">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gkoutos</surname> <given-names>G. V.</given-names></name> <name><surname>Schofield</surname> <given-names>P. N.</given-names></name> <name><surname>Hoehndorf</surname> <given-names>R.</given-names></name></person-group> (<year>2018</year>). <article-title>The anatomy of phenotype ontologies: Principles, properties and applications</article-title>. <source>Brief. Bioinform.</source> <volume>19</volume>, <fpage>1008</fpage>&#x02013;<lpage>1021</lpage>. <pub-id pub-id-type="doi">10.1093/bib/bbx035</pub-id><pub-id pub-id-type="pmid">28387809</pub-id></citation></ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gogleva</surname> <given-names>A.</given-names></name> <name><surname>Polychronopoulos</surname> <given-names>D.</given-names></name> <name><surname>Pfeifer</surname> <given-names>M.</given-names></name> <name><surname>Poroshin</surname> <given-names>V.</given-names></name> <name><surname>Ughetto</surname> <given-names>M.</given-names></name> <name><surname>Martin</surname> <given-names>M. J.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Knowledge graph-based recommendation framework identifies drivers of resistance in EGFR mutant non-small cell lung cancer</article-title>. <source>Nat. Commun.</source> <volume>13</volume>, <fpage>1667</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-022-29292-7</pub-id><pub-id pub-id-type="pmid">35351890</pub-id></citation></ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hassani-Pak</surname> <given-names>K.</given-names></name> <name><surname>Singh</surname> <given-names>A.</given-names></name> <name><surname>Brandizi</surname> <given-names>M.</given-names></name> <name><surname>Hearnshaw</surname> <given-names>J.</given-names></name> <name><surname>Parsons</surname> <given-names>J. D.</given-names></name> <name><surname>Amberkar</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>KnetMiner: A comprehensive approach for supporting evidence-based gene discovery and complex trait analysis across species</article-title>. <source>Plant Biotechnol. J.</source> <volume>19</volume>, <fpage>1670</fpage>&#x02013;<lpage>1678</lpage>. <pub-id pub-id-type="doi">10.1111/pbi.13583</pub-id><pub-id pub-id-type="pmid">33750020</pub-id></citation></ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jackson</surname> <given-names>R. C.</given-names></name> <name><surname>Balhoff</surname> <given-names>J. P.</given-names></name> <name><surname>Douglass</surname> <given-names>E.</given-names></name> <name><surname>Harris</surname> <given-names>N. L.</given-names></name> <name><surname>Mungall</surname> <given-names>C. J.</given-names></name> <name><surname>Overton</surname> <given-names>J. A. R. O. B. O. T.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>A tool for automating ontology workflows</article-title>. <source>BMC Bioinformatics.</source> <volume>20</volume>, <fpage>407</fpage>. <pub-id pub-id-type="doi">10.1186/s12859-019-3002-3</pub-id><pub-id pub-id-type="pmid">31357927</pub-id></citation></ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kulkarni</surname> <given-names>S. R.</given-names></name> <name><surname>Vandepoele</surname> <given-names>K.</given-names></name></person-group> (<year>2020</year>). <article-title>Inference of plant gene regulatory networks using data driven methods: A practical overview</article-title>. <source>Gene Regul. Mecha.</source> <volume>1863</volume>, <fpage>194447</fpage>. <pub-id pub-id-type="doi">10.1016/j.bbagrm.2019.194447</pub-id><pub-id pub-id-type="pmid">31678628</pub-id></citation></ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>L.</given-names></name> <name><surname>White</surname> <given-names>M. J.</given-names></name> <name><surname>MacRae</surname> <given-names>T. H.</given-names></name></person-group> (<year>1999</year>). <article-title>Transcription factors and their genes in higher plants functional domains, evolution and regulation</article-title>. <source>Eur. J. Biochem.</source> <volume>262</volume>, <fpage>247</fpage>&#x02013;<lpage>257</lpage>. <pub-id pub-id-type="doi">10.1046/j.1432-1327.1999.00349.x</pub-id><pub-id pub-id-type="pmid">10336605</pub-id></citation></ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Maher</surname> <given-names>K. A.</given-names></name> <name><surname>Bajic</surname> <given-names>M.</given-names></name> <name><surname>Kajala</surname> <given-names>K.</given-names></name> <name><surname>Reynoso</surname> <given-names>M.</given-names></name> <name><surname>Pauluzzi</surname> <given-names>G.</given-names></name> <name><surname>West</surname> <given-names>D. A.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Profiling of accessible chromatic regions across multiple plant species and cell types reveals common gene regulatory principles and new control modules</article-title>. <source>Plant Cell.</source> <volume>30</volume>, <fpage>15</fpage>&#x02013;<lpage>36</lpage>. <pub-id pub-id-type="doi">10.1105/tpc.17.00581</pub-id><pub-id pub-id-type="pmid">29229750</pub-id></citation></ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>McCormick</surname> <given-names>R. F.</given-names></name> <name><surname>Truong</surname> <given-names>S. K.</given-names></name> <name><surname>Sreedasyam</surname> <given-names>A.</given-names></name> <name><surname>Jenkins</surname> <given-names>J.</given-names></name> <name><surname>Shu</surname> <given-names>S.</given-names></name> <name><surname>Sims</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>The sorghum bicolor reference genome: improved assembly, gene annotations, a transcriptome atlas, and signatures of genome organization</article-title>. <source>The Plant J.</source> <volume>93</volume>, <fpage>338</fpage>&#x02013;<lpage>354</lpage>. <pub-id pub-id-type="doi">10.1111/tpj.13781</pub-id><pub-id pub-id-type="pmid">29161754</pub-id></citation></ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Metsalu</surname> <given-names>T.</given-names></name> <name><surname>Vilo</surname> <given-names>J.</given-names></name></person-group> (<year>2015</year>). <article-title>ClustVis: a web tool for visualizing clustering of multivariate data using principal component analysis and heatmap</article-title>. <source>Nucleic Acids Res.</source> <volume>43</volume>, <fpage>W566</fpage>&#x02013;<lpage>W570</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkv468</pub-id><pub-id pub-id-type="pmid">25969447</pub-id></citation></ref>
<ref id="B21">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>M&#x000FC;ller</surname> <given-names>E.</given-names></name> <name><surname>Assent</surname> <given-names>I.</given-names></name> <name><surname>Krieger</surname> <given-names>R.</given-names></name> <name><surname>Jansen</surname> <given-names>T.</given-names></name> <name><surname>Seidl</surname> <given-names>T.</given-names></name></person-group> (<year>2008</year>). <article-title>&#x0201C;Morpheus,&#x0201D;</article-title> in <source>Proceedings of the 14th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source> (<publisher-loc>New York, NY, USA</publisher-loc>: <publisher-name>ACM</publisher-name>). <pub-id pub-id-type="doi">10.1145/1401890.1402026</pub-id></citation>
</ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Myburg</surname> <given-names>A. A.</given-names></name> <name><surname>Grattapaglia</surname> <given-names>D.</given-names></name> <name><surname>Tuskan</surname> <given-names>G. A.</given-names></name> <name><surname>Hellsten</surname> <given-names>U.</given-names></name> <name><surname>Hayes</surname> <given-names>R. D.</given-names></name> <name><surname>Grimwood</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2014</year>). <article-title>The genome of eucalyptus grandis</article-title>. <source>Nature.</source> <volume>510</volume>, <fpage>356</fpage>&#x02013;<lpage>362</lpage>. <pub-id pub-id-type="doi">10.1038/nature13308</pub-id><pub-id pub-id-type="pmid">24919147</pub-id></citation></ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Naithani</surname> <given-names>S.</given-names></name> <name><surname>Gupta</surname> <given-names>P.</given-names></name> <name><surname>Preece</surname> <given-names>J.</given-names></name> <name><surname>D&#x00027;Eustachio</surname> <given-names>P.</given-names></name> <name><surname>Elser</surname> <given-names>J. L.</given-names></name> <name><surname>Garg</surname> <given-names>P.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Plant reactome: a knowledgebase and resource for comparative pathway analysis</article-title>. <source>Nucleic Acids Res.</source> <volume>48</volume>, <fpage>D1093</fpage>&#x02013;<lpage>1103</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkz996</pub-id><pub-id pub-id-type="pmid">31680153</pub-id></citation></ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>O&#x00027;Malley</surname> <given-names>R. C.</given-names></name> <name><surname>Huang</surname> <given-names>S. C.</given-names></name> <name><surname>Song</surname> <given-names>L.</given-names></name> <name><surname>Lewsey</surname> <given-names>M. G.</given-names></name> <name><surname>Bartlett</surname> <given-names>A.</given-names></name> <name><surname>Nery</surname> <given-names>J. R.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>Cistrome and epicistrome features shape the regulatory DNA landscape</article-title>. <source>Cell.</source> <volume>166</volume>, <fpage>1598</fpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2016.08.063</pub-id><pub-id pub-id-type="pmid">27610578</pub-id></citation></ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ouyang</surname> <given-names>S.</given-names></name> <name><surname>Zhu</surname> <given-names>W.</given-names></name> <name><surname>Hamilton</surname> <given-names>J.</given-names></name> <name><surname>Lin</surname> <given-names>H.</given-names></name> <name><surname>Campbell</surname> <given-names>M.</given-names></name> <name><surname>Childs</surname> <given-names>K.</given-names></name> <etal/></person-group>. (<year>2007</year>). <article-title>The TIGR rice genome annotation resource: improvements and new features</article-title>. <source>Nucleic Acids Res.</source> <volume>35</volume>, <fpage>D883</fpage>&#x02013;<lpage>D887</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkl976</pub-id><pub-id pub-id-type="pmid">17145706</pub-id></citation></ref>
<ref id="B26">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Papatheodorou</surname> <given-names>I.</given-names></name> <name><surname>Moreno</surname> <given-names>P.</given-names></name> <name><surname>Manning</surname> <given-names>J.</given-names></name> <name><surname>Mu&#x000F1;oz-Pomer Fuentes</surname> <given-names>A.</given-names></name> <name><surname>George</surname> <given-names>N.</given-names></name> <name><surname>Fexova</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Expression atlas update: from tissues to single cells</article-title>. <source>Nucleic Acids Res.</source> <volume>48</volume>, <fpage>D77</fpage>&#x02013;<lpage>83</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkz947</pub-id><pub-id pub-id-type="pmid">31665515</pub-id></citation></ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Paysan-Lafosse</surname> <given-names>T.</given-names></name> <name><surname>Blum</surname> <given-names>M.</given-names></name> <name><surname>Chuguransky</surname> <given-names>S.</given-names></name> <name><surname>Grego</surname> <given-names>T.</given-names></name> <name><surname>Pinto</surname> <given-names>B. L.</given-names></name> <name><surname>Salazar</surname> <given-names>G. A.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>InterPro in 2022</article-title>. <source>Nucleic Acids Res.</source> <volume>51</volume>, <fpage>D418</fpage>&#x02013;<lpage>D427</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkac993</pub-id><pub-id pub-id-type="pmid">36350672</pub-id></citation></ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Portwood</surname> <given-names>J. L.</given-names></name> <name><surname>Woodhouse</surname> <given-names>M. R.</given-names></name> <name><surname>Cannon</surname> <given-names>E. K.</given-names></name> <name><surname>Gardiner</surname> <given-names>J. M.</given-names></name> <name><surname>Harper</surname> <given-names>L. C.</given-names></name> <name><surname>Schaeffer</surname> <given-names>M. L.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>MaizeGDB 2018: the maize multi-genome genetics and genomics database</article-title>. <source>Nucleic Acids Res.</source> <volume>47</volume>, <fpage>D1146</fpage>&#x02013;<lpage>D1154</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gky1046</pub-id><pub-id pub-id-type="pmid">30407532</pub-id></citation></ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Reese</surname> <given-names>J. T.</given-names></name> <name><surname>Unni</surname> <given-names>D.</given-names></name> <name><surname>Callahan</surname> <given-names>T. J.</given-names></name> <name><surname>Cappelletti</surname> <given-names>L.</given-names></name> <name><surname>Ravanmehr</surname> <given-names>V.</given-names></name> <name><surname>Carbon</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>KG-COVID-19: A framework to produce customized knowledge graphs for COVID-19 response</article-title>. <source>Patterns.</source> <volume>2</volume>, <fpage>100155</fpage>. <pub-id pub-id-type="doi">10.1016/j.patter.2020.100155</pub-id><pub-id pub-id-type="pmid">33196056</pub-id></citation></ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Reiser</surname> <given-names>L.</given-names></name> <name><surname>Subramaniam</surname> <given-names>S.</given-names></name> <name><surname>Li</surname> <given-names>D.</given-names></name> <name><surname>Huala</surname> <given-names>E.</given-names></name></person-group> (<year>2022</year>). <article-title>Using the Arabidopsis Information Resource (TAIR) to find information about arabidopsis genes</article-title>. <source>Current Protocols.</source> <volume>2</volume>, <fpage>e574</fpage>. <pub-id pub-id-type="doi">10.1002/cpz1.574</pub-id><pub-id pub-id-type="pmid">36200836</pub-id></citation></ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sakai</surname> <given-names>H.</given-names></name> <name><surname>Lee</surname> <given-names>S. S.</given-names></name> <name><surname>Tanaka</surname> <given-names>T.</given-names></name> <name><surname>Numa</surname> <given-names>H.</given-names></name> <name><surname>Kim</surname> <given-names>J.</given-names></name> <name><surname>Kawahara</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2013</year>). <article-title>Rice annotation project database (rap-db): an integrative and interactive database for rice genomics</article-title>. <source>Plant Cell Physiol.</source> <volume>54</volume>, <fpage>e6</fpage>. <pub-id pub-id-type="doi">10.1093/pcp/pcs183</pub-id><pub-id pub-id-type="pmid">23299411</pub-id></citation></ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sheth</surname> <given-names>A.</given-names></name> <name><surname>Padhee</surname> <given-names>S.</given-names></name> <name><surname>Gyrard</surname> <given-names>A.</given-names></name></person-group> (<year>2019</year>). <article-title>Knowledge graphs and knowledge networks: the story in brief</article-title>. <source>IEEE Internet Comput.</source> <volume>23</volume>, <fpage>67</fpage>&#x02013;<lpage>75</lpage>. <pub-id pub-id-type="doi">10.1109/MIC.2019.2928449</pub-id></citation>
</ref>
<ref id="B33">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shrestha</surname> <given-names>R.</given-names></name> <name><surname>Arnaud</surname> <given-names>E.</given-names></name> <name><surname>Mauleon</surname> <given-names>R.</given-names></name> <name><surname>Senger</surname> <given-names>M.</given-names></name> <name><surname>Davenport</surname> <given-names>G. F.</given-names></name> <name><surname>Hancock</surname> <given-names>D.</given-names></name></person-group> (<year>2010</year>). <article-title>Multifunctional crop trait ontology for breeders&#x00027; data: Field book, annotation, data discovery and semantic enrichment of the literature</article-title>. <source>AoB Plants</source>. 2010, lq008. <pub-id pub-id-type="doi">10.1093/aobpla/plq008</pub-id><pub-id pub-id-type="pmid">22476066</pub-id></citation></ref>
<ref id="B34">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shulaev</surname> <given-names>V.</given-names></name> <name><surname>Sargent</surname> <given-names>D. J.</given-names></name> <name><surname>Crowhurst</surname> <given-names>R. N.</given-names></name> <name><surname>Mockler</surname> <given-names>T. C.</given-names></name> <name><surname>Folkerts</surname> <given-names>O.</given-names></name> <name><surname>Delcher</surname> <given-names>A. L.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title>The genome of woodland strawberry (Fragaria Vesca)</article-title>. <source>Nat. Genet.</source> <volume>43</volume>, <fpage>109</fpage>&#x02013;<lpage>116</lpage>. <pub-id pub-id-type="doi">10.1038/ng.740</pub-id><pub-id pub-id-type="pmid">21186353</pub-id></citation></ref>
<ref id="B35">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Silverman</surname> <given-names>E. K.</given-names></name> <name><surname>Schmidt</surname> <given-names>H. H. H. W.</given-names></name> <name><surname>Anastasiadou</surname> <given-names>E.</given-names></name> <name><surname>Altucci</surname> <given-names>L.</given-names></name> <name><surname>Angelini</surname> <given-names>M.</given-names></name> <name><surname>Badimon</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Molecular networks in Network Medicine: Development and applications</article-title>. <source>Syst. Biol. Med.</source> <volume>12</volume>, <fpage>e1489</fpage>. <pub-id pub-id-type="doi">10.1002/wsbm.1489</pub-id><pub-id pub-id-type="pmid">32307915</pub-id></citation></ref>
<ref id="B36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Spooner</surname> <given-names>W.</given-names></name> <name><surname>Youens-Clark</surname> <given-names>K.</given-names></name> <name><surname>Staines</surname> <given-names>D.</given-names></name> <name><surname>Ware</surname> <given-names>D.</given-names></name></person-group> (<year>2012</year>). <article-title>GrameneMart: The BioMart data portal for the gramene project</article-title>. <source>Datab.</source> 2012, bar056. <pub-id pub-id-type="doi">10.1093/database/bar056</pub-id><pub-id pub-id-type="pmid">22374386</pub-id></citation></ref>
<ref id="B37">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tello-Ruiz</surname> <given-names>M. K.</given-names></name> <name><surname>Naithani</surname> <given-names>S.</given-names></name> <name><surname>Stein</surname> <given-names>J. C.</given-names></name> <name><surname>Gupta</surname> <given-names>P.</given-names></name> <name><surname>Campbell</surname> <given-names>M.</given-names></name> <name><surname>Olson</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Gramene 2018: unifying comparative genomics and pathway resources for plant research</article-title>. <source>Nucleic Acids Res.</source> <volume>46</volume>, <fpage>D1181</fpage>&#x02013;<lpage>D1189</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkx1111</pub-id><pub-id pub-id-type="pmid">29165610</pub-id></citation></ref>
<ref id="B38">
<citation citation-type="journal"><person-group person-group-type="author"><collab>The GO Consortium</collab></person-group> (<year>2021</year>). <article-title>The gene ontology resource: enriching a GOld mine</article-title>. <source>Nucleic Acids Res.</source> <volume>49</volume>, <fpage>D325</fpage>&#x02013;<lpage>D334</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkaa1113</pub-id><pub-id pub-id-type="pmid">33290552</pub-id></citation></ref>
<ref id="B39">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Thudi</surname> <given-names>M.</given-names></name> <name><surname>Palakurthi</surname> <given-names>R.</given-names></name> <name><surname>Schnable</surname> <given-names>J. C.</given-names></name> <name><surname>Chitikineni</surname> <given-names>A.</given-names></name> <name><surname>Dreisigacker</surname> <given-names>S.</given-names></name> <name><surname>Mace</surname> <given-names>E.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Genomic resources in plant breeding for sustainable agriculture</article-title>. <source>J. Plant Physiol.</source> <volume>257</volume>, <fpage>153351</fpage>. <pub-id pub-id-type="doi">10.1016/j.jplph.2020.153351</pub-id><pub-id pub-id-type="pmid">33412425</pub-id></citation></ref>
<ref id="B40">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tian</surname> <given-names>Z.</given-names></name> <name><surname>Wang</surname> <given-names>J. W. L. i. J</given-names></name> <name><surname>Han</surname> <given-names>B.</given-names></name></person-group> (<year>2021</year>). <article-title>Designing future crops: challenges and strategies for sustainable agriculture</article-title>. <source>Plant J.</source> <volume>105</volume>, <fpage>1165</fpage>&#x02013;<lpage>1178</lpage>. <pub-id pub-id-type="doi">10.1111/tpj.15107</pub-id><pub-id pub-id-type="pmid">33258137</pub-id></citation></ref>
<ref id="B41">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Tiddi</surname> <given-names>I.</given-names></name> <name><surname>Balliet</surname> <given-names>D.</given-names></name> <name><surname>ten Teije</surname> <given-names>A.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;Fostering scientific meta-analysis with knowledge graphs: A case study,&#x0201D;</article-title> in <source>The Semantic Web ESWC 2020. Lecture Notes in Computer Science</source> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name>) 287&#x02013;303. <pub-id pub-id-type="doi">10.1007/978-3-030-49461-2_17</pub-id></citation>
</ref>
<ref id="B42">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tirosh</surname> <given-names>I.</given-names></name> <name><surname>Weinberger</surname> <given-names>A.</given-names></name> <name><surname>Bezalel</surname> <given-names>D.</given-names></name> <name><surname>Kaganovich</surname> <given-names>M.</given-names></name> <name><surname>Barkai</surname> <given-names>N.</given-names></name></person-group> (<year>2008</year>). <article-title>On the relation between promoter divergence and gene expression evolution</article-title>. <source>Mol. Syst. Biol.</source> <volume>4</volume>, <fpage>159</fpage>. <pub-id pub-id-type="doi">10.1038/msb4100198</pub-id><pub-id pub-id-type="pmid">18197176</pub-id></citation></ref>
<ref id="B43">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Unni</surname> <given-names>D. R.</given-names></name> <name><surname>Moxon</surname> <given-names>S. A. T.</given-names></name> <name><surname>Bada</surname> <given-names>M.</given-names></name> <name><surname>Brush</surname> <given-names>M.</given-names></name> <name><surname>Bruskiewich</surname> <given-names>R.</given-names></name> <name><surname>Caufield</surname> <given-names>J. H.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Biolink model: a universal schema for knowledge graphs in clinical, biomedical, and translational science</article-title>. <source>Clin. Transl. Sci.</source> <volume>15</volume>, <fpage>1848</fpage>&#x02013;<lpage>1888</lpage>. <pub-id pub-id-type="doi">10.1111/cts.13302</pub-id><pub-id pub-id-type="pmid">36125173</pub-id></citation></ref>
<ref id="B44">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Walls</surname> <given-names>R. L.</given-names></name> <name><surname>Cooper</surname> <given-names>L.</given-names></name> <name><surname>Elser</surname> <given-names>J.</given-names></name> <name><surname>Gandolfo</surname> <given-names>M. A.</given-names></name> <name><surname>Mungall</surname> <given-names>C. J.</given-names></name> <name><surname>Smith</surname> <given-names>B.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>The plant ontology facilitates comparisons of plant development stages across species</article-title>. <source>Front. Plant Sci.</source> <volume>10</volume>, <fpage>631</fpage>. <pub-id pub-id-type="doi">10.3389/fpls.2019.00631</pub-id><pub-id pub-id-type="pmid">31214208</pub-id></citation></ref>
<ref id="B45">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zemojtel</surname> <given-names>T.</given-names></name> <name><surname>K&#x000F6;hler</surname> <given-names>S.</given-names></name> <name><surname>Mackenroth</surname> <given-names>L.</given-names></name> <name><surname>J&#x000E4;ger</surname> <given-names>M.</given-names></name> <name><surname>Hecht</surname> <given-names>J.</given-names></name> <name><surname>Krawitz</surname> <given-names>P.</given-names></name> <etal/></person-group>. (<year>2014</year>). <article-title>Effective diagnosis of genetic disease by computational phenotype analysis of the disease-associated genome</article-title>. <source>Sci. Transl. Med.</source> <volume>6</volume>, <fpage>252r</fpage>a123. <pub-id pub-id-type="doi">10.1126/scitranslmed.3009262</pub-id><pub-id pub-id-type="pmid">25186178</pub-id></citation></ref>
</ref-list>
<app-group>
<app id="A1">
<title>Appendix</title>
<p>All supplementary files can be accessed in GitHub under a CC-0 license (<ext-link ext-link-type="uri" xlink:href="https://github.com/diatomsRcool/supplementary_material/tree/main/promoter_region">https://github.com/diatomsRcool/supplementary_material/tree/main/promoter_region</ext-link>).</p>
<list list-type="order">
<list-item><p>drought_expression.tsv</p></list-item>
<list-item><p>drought_genes.tsv</p></list-item>
<list-item><p>GO_annotations.tsv</p></list-item>
<list-item><p>panther_results folder</p></list-item>
<list-item><p>promoter_region_clustvis_data0.tsv</p></list-item>
<list-item><p>orthologous_genes.tsv</p></list-item>
</list>
<p>Clustvis analysis is at <ext-link ext-link-type="uri" xlink:href="https://biit.cs.ut.ee/clustvis/?s=IWJNurmUtGWZoMt">https://biit.cs.ut.ee/clustvis/?s=IWJNurmUtGWZoMt</ext-link>.</p>
</app>
</app-group>
</back>
</article>