<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2023.1104303</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Analysis of Arabidopsis non-reference accessions reveals high diversity of metabolic gene clusters and discovers new candidate cluster members</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Marszalek-Zenczak</surname>
<given-names>Malgorzata</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2115035"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Satyr</surname>
<given-names>Anastasiia</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wojciechowski</surname>
<given-names>Pawel</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1094709"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zenczak</surname>
<given-names>Michal</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2147692"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Sobieszczanska</surname>
<given-names>Paula</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Brzezinski</surname>
<given-names>Krzysztof</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2140886"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Iefimenko</surname>
<given-names>Tetiana</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Figlerowicz</surname>
<given-names>Marek</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/698389"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Zmienko</surname>
<given-names>Agnieszka</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/279724"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Institute of Bioorganic Chemistry, Polish Academy of Sciences</institution>, <addr-line>Poznan</addr-line>, <country>Poland</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Institute of Computing Science, Faculty of Computing and Telecommunications, Poznan University of Technology</institution>, <addr-line>Poznan</addr-line>, <country>Poland</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Department of Biology, National University of Kyiv-Mohyla Academy</institution>, <addr-line>Kyiv</addr-line>, <country>Ukraine</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Li Wang, Agricultural Genomics Institute at Shenzhen, Chinese Academy of Agricultural Sciences, China</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Mei Yang, Wuhan Botanical Garden, Chinese Academy of Sciences (CAS), China; Aalt-Jan Van Dijk, Wageningen University and Research, Netherlands; Ancheng Huang, Southern University of Science and Technology, China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Agnieszka Zmienko, <email xlink:href="mailto:akisiel@ibch.poznan.pl">akisiel@ibch.poznan.pl</email>
</p>
</fn>
<fn fn-type="other" id="fn002">
<p>This article was submitted to Plant Metabolism and Chemodiversity, a section of the journal Frontiers in Plant Science</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>26</day>
<month>01</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>14</volume>
<elocation-id>1104303</elocation-id>
<history>
<date date-type="received">
<day>21</day>
<month>11</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>11</day>
<month>01</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Marszalek-Zenczak, Satyr, Wojciechowski, Zenczak, Sobieszczanska, Brzezinski, Iefimenko, Figlerowicz and Zmienko</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Marszalek-Zenczak, Satyr, Wojciechowski, Zenczak, Sobieszczanska, Brzezinski, Iefimenko, Figlerowicz and Zmienko</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Metabolic gene clusters (MGCs) are groups of genes involved in a common biosynthetic pathway. They are frequently formed in dynamic chromosomal regions, which may lead to intraspecies variation and cause phenotypic diversity. We examined copy number variations (CNVs) in four <italic>Arabidopsis thaliana</italic> MGCs in over one thousand accessions with experimental and bioinformatic approaches. Tirucalladienol and marneral gene clusters showed little variation, and the latter was fixed in the population. Thalianol and especially arabidiol/baruol gene clusters displayed substantial diversity. The compact version of the thalianol gene cluster was predominant and more conserved than the noncontiguous version. In the arabidiol/baruol cluster, we found a large genomic insertion containing divergent duplicates of the <italic>CYP705A2</italic> and <italic>BARS1</italic> genes. The <italic>BARS1</italic> paralog, which we named <italic>BARS</italic>2, encoded a novel oxidosqualene synthase. The expression of the entire arabidiol/baruol gene cluster was altered in the accessions with the duplication. Moreover, they presented different root growth dynamics and were associated with warmer climates compared to the reference-like accessions. In the entire genome, paired genes encoding terpene synthases and cytochrome P450 oxidases were more variable than their nonpaired counterparts. Our study highlights the role of dynamically evolving MGCs in plant adaptation and phenotypic diversity.</p>
</abstract>
<kwd-group>
<kwd>copy number variation</kwd>
<kwd>biosynthetic gene cluster</kwd>
<kwd>secondary metabolism</kwd>
<kwd>oxidosqualene cyclase</kwd>
<kwd>triterpenes</kwd>
<kwd>cytochrome P450</kwd>
</kwd-group>
<contract-num rid="cn001">2014/13/B/NZ2/03837,  2017/26/D/NZ2/01079</contract-num>
<contract-sponsor id="cn001">Narodowe Centrum Nauki<named-content content-type="fundref-id">10.13039/501100004281</named-content>
</contract-sponsor>
<contract-sponsor id="cn002">Polska Akademia Nauk<named-content content-type="fundref-id">10.13039/501100004382</named-content>
</contract-sponsor>
<counts>
<fig-count count="5"/>
<table-count count="0"/>
<equation-count count="0"/>
<ref-count count="83"/>
<page-count count="16"/>
<word-count count="11125"/>
</counts>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<title>Introduction</title>    <p>Plants are able to produce a variety of low molecular weight organic compounds, which enhance their ability to compete and survive in nature. Secondary metabolites are not essential for plant growth and development. However, they are often multifunctional and may act both as plant growth regulators and be engaged in primary metabolism or plant protection (<xref ref-type="bibr" rid="B29">Isah, 2019</xref>; <xref ref-type="bibr" rid="B19">Erb and Kliebenstein, 2020</xref>). The ability to produce particular types of compounds is usually restricted to individual species or genera. Therefore, these compounds are enormously diverse and have a wide range of biological activities. In plants, genes involved in a common metabolic pathway are typically dispersed across the genome. In contrast, functionally related genes that encode the enzymes involved in specialized metabolite biosynthesis in bacteria and fungi are frequently coexpressed and organized in so-called operons (<xref ref-type="bibr" rid="B9">Boycheva et&#xa0;al., 2014</xref>; <xref ref-type="bibr" rid="B57">N&#xfc;tzmann et&#xa0;al., 2018</xref>). Similar gene organization units called biosynthetic gene clusters or metabolic gene clusters (MGCs) have recently been found in numerous plant species. MGCs have typically been defined as a group of three or more genes that i) encode a minimum of three different types of biosynthetic enzymes, ii) are involved in the consecutive steps of a specific metabolic pathway and iii) are localized in adjacent positions in the genome or are interspersed by a limited number of intervening (i.e., not functionally related) genes (<xref ref-type="bibr" rid="B56">N&#xfc;tzmann and Osbourn, 2014</xref>; <xref ref-type="bibr" rid="B35">Kautsar et&#xa0;al., 2017</xref>). A typical MGC contains a &#x201c;signature&#x201d; enzyme gene involved in the major (usually first) step of a biosynthetic pathway. In this step, the metabolite scaffold is generated that determines the class of the pathway products (e.g., terpenes or alkaloids). This scaffold is further modified by &#x201c;tailoring&#x201d; enzymes encoded by other clustered genes, e.g., cytochrome P450 oxidases (CYPs), acyltransferases or alcohol dehydrogenases. The contribution of other enzymes encoded by peripheral genes (i.e., located outside the MGC), and the connection network between different metabolite biosynthesis pathways may result in additional diversification of the biosynthetic products (<xref ref-type="bibr" rid="B27">Huang et&#xa0;al., 2019</xref>). Currently, there are over 30 known MGCs in plants from various phylogenetic clades, and new MGCs are being discovered. Their sizes range from 35 kb to several hundred kb. However, clusters of functionally related nonhomologous genes are still considered unusual in plant genomes.</p>
<p>In <italic>Arabidopsis thaliana</italic> (hereafter Arabidopsis), four MGCs have been discovered thus far (<xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S1</bold>
</xref>). They are involved in the metabolism of specialized triterpenes: thalianol, marneral, tirucalladienol, arabidiol and baruol. Triterpenes constitute a large and diverse group of natural compounds derived from 2,3-oxidosqualene cyclization in a reaction catalyzed by oxidosqualene cyclases (OSCs) (<xref ref-type="bibr" rid="B69">Thimmappa et&#xa0;al., 2014</xref>). Out of 13 OSC genes known in the Arabidopsis genome, five (<italic>THAS1</italic>, <italic>MRN1</italic>, <italic>PEN3</italic>, <italic>PEN1</italic>, <italic>BARS1</italic>) are located within MGCs and encode the &#x201c;signature&#x201d; enzymes of the MGCs (<xref ref-type="bibr" rid="B24">Field and Osbourn, 2008</xref>; <xref ref-type="bibr" rid="B23">Field et&#xa0;al., 2011</xref>; <xref ref-type="bibr" rid="B8">Boutanaev et&#xa0;al., 2015</xref>). The thalianol gene cluster contains five members involved in thalianol production and in its conversion to another triterpene, thalianin (<xref ref-type="bibr" rid="B22">Fazio et&#xa0;al., 2004</xref>; <xref ref-type="bibr" rid="B24">Field and Osbourn, 2008</xref>; <xref ref-type="bibr" rid="B27">Huang et&#xa0;al., 2019</xref>). In the reference genome, this MGC is ~45 kb in size. The thalianol synthase gene <italic>THAS1</italic> as well as <italic>CYP708A2</italic>, <italic>CYP705A5</italic> and <italic>AT5G47980</italic> (BAHD acyltransferase) genes are tightly clustered together, with only one noncoding transcribed locus (<italic>AT5G07035</italic>) between them. The fifth member, acyltransferase <italic>AT5G47950</italic>, is separated from the rest of the cluster by <italic>RABA4C</italic> and <italic>AT5G47970</italic> intervening genes. The marneral gene cluster is ~35 kb in size and is the most compact plant MGC described to date. It is made up of three members: the marneral synthase gene <italic>MRN1</italic>, the marneral oxidase gene <italic>CYP71A16</italic> and the gene <italic>CYP705A12</italic>, whose function is unknown (<xref ref-type="bibr" rid="B78">Xiong et&#xa0;al., 2006</xref>; <xref ref-type="bibr" rid="B23">Field et&#xa0;al., 2011</xref>; <xref ref-type="bibr" rid="B26">Go et&#xa0;al., 2012</xref>). Additionally, there are three noncoding transcribed loci (<italic>AT5G00580</italic>, <italic>AT5G06325</italic> and <italic>AT5G06335</italic>) located between <italic>CYP701A16</italic> and <italic>MRN1.</italic> The tirucalladienol gene cluster is ~47 kb in size and includes five members: tirucalla-7,24-dien-3&#x3b2;-ol synthase gene <italic>PEN3</italic>, an uncharacterized acyltransferase gene <italic>SCPL1</italic>, which was identified based on its coexpression with <italic>PEN3</italic>, <italic>CYP716A1</italic>, which is involved in the hydroxylation of tirucalla-7,24-dien-3&#x3b2;-ol, as well as <italic>AT5G36130</italic> and <italic>CYP716A2</italic> (<xref ref-type="bibr" rid="B52">Morlacchi et&#xa0;al., 2009</xref>; <xref ref-type="bibr" rid="B8">Boutanaev et&#xa0;al., 2015</xref>; <xref ref-type="bibr" rid="B76">Wisecaver et&#xa0;al., 2017</xref>). The contiguity of this MGC is interrupted by four intervening genes (<italic>CCB3</italic>, <italic>AT5G36125</italic>, <italic>HCF109</italic> and <italic>AT5G36160</italic>) and the noncoding locus <italic>AT5G05325</italic>. The arabidiol/baruol gene cluster is most complex and has an estimated size of 83 kb. It encompasses two closely located OSCs, <italic>PEN1</italic> and <italic>BARS1</italic>, sharing 91% similarity at the amino acid level. <italic>BARS1</italic> encodes a multifunctional cyclase that produces baruol as its main product (<xref ref-type="bibr" rid="B46">Lodeiro et&#xa0;al., 2007</xref>). <italic>PEN1</italic> encodes arabidiol synthase and is adjacent to <italic>CYP705A1</italic>, which is involved in arabidiol degradation upon jasmonic acid treatment (<xref ref-type="bibr" rid="B77">Xiang et&#xa0;al., 2006</xref>; <xref ref-type="bibr" rid="B11">Castillo et&#xa0;al., 2013</xref>; <xref ref-type="bibr" rid="B67">Sohrabi et&#xa0;al., 2015</xref>). The role of the remaining genes in the arabidiol/baruol gene cluster (<italic>CYP702A2</italic>, <italic>CYP702A3</italic>, <italic>CYP705A2</italic>, <italic>CYP705A3</italic>, <italic>CYP705A4</italic>, <italic>CYP702A5</italic>, <italic>CYP702A6</italic> as well as acyltransferases <italic>AT4G15390</italic> and <italic>BIA1</italic>) has not been determined; however, they displayed coexpression with either <italic>PEN1</italic> or <italic>BARS1</italic> (<xref ref-type="bibr" rid="B73">Wada et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B76">Wisecaver et&#xa0;al., 2017</xref>). There are few intervening loci in the arabidiol/baruol gene cluster, including a protein-coding gene <italic>CSLB06</italic>, two pseudogenes <italic>CYP702A4P</italic> and <italic>CYP702A7P</italic> and one novel transcribed region <italic>AT4G06325</italic>.</p>
<p>Plant MGCs are thought to have arisen by duplication and subsequent neo- or subfunctionalization of genes involved in primary metabolism, which might have been followed by the recruitment of additional genes to the newly forming biosynthetic pathway (<xref ref-type="bibr" rid="B56">N&#xfc;tzmann and Osbourn, 2014</xref>). MGCs are frequently located within dynamic chromosomal regions, e.g., subtelomeric regions, centromeric regions or regions rich in transposable elements (TEs), where the possibility of bringing together the beneficial sets of genes by structural rearrangements may be higher than in the rest of the genome, thus promoting MGC formation (<xref ref-type="bibr" rid="B23">Field et&#xa0;al., 2011</xref>). However, the same factors may also contribute to further genetic modifications and alteration of the plant metabolic profile, thus making such MGCs &#x201c;evolutionary hotspots&#x201d;. To verify this scenario, we evaluated the intraspecific diversity of Arabidopsis MGCs and examined whether this diversity is associated with trait variation. Here, we present a detailed picture of MGC copy number variations (CNVs), describe the discovery of novel, nonreference genes in the arabidiol/baruol gene cluster and reveal the links between the variation in MGC structure and plant adaptation to different natural environments.</p>
</sec>
<sec id="s2" sec-type="results">
<title>Results</title>
<sec id="s2_1">
<title>MGCs differ in levels of copy number polymorphism</title>
<p>We started our analysis by aligning each MGC with the common CNVs in the Arabidopsis genome, which were identified previously (<xref ref-type="bibr" rid="B83">Zmienko et&#xa0;al., 2020</xref>). As expected, each MGC had a substantial overlap with the variable regions: 100% for the thalianol gene cluster, 79.6% for the tirucalladienol gene cluster, 53.1% for the arabidiol/baruol gene cluster, and 52.8% for the marneral gene cluster (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1A</bold>
</xref>). However, the potential impact of CNVs on the clustered genes differed among the MGCs (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S1</bold>
</xref>; <xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S2</bold>
</xref>). In the thalianol gene cluster, most CNVs were grouped in the region spanning <italic>AT5G47980</italic>, <italic>CYP705A5</italic>, <italic>CYP708A2</italic> and <italic>THAS1</italic>, while <italic>AT5G47950</italic> was covered only by the largest variant CNV_18592 (241 kb in size), which encompassed the entire cluster. In the arabidiol/baruol gene cluster, the CNVs (0.6 kb to 21 kb in size) were grouped into three distinct regions separated by invariable segments. The first variable region overlapped with <italic>CYP702A2</italic> and <italic>CYP702A3</italic>. The second variable region overlapped with <italic>CYP705A2</italic>, <italic>CYP705A3</italic> and <italic>BARS1</italic>. The CNVs in the third variable region were mostly intergenic and overlapped with only two genes, <italic>CYP702A5</italic> and <italic>CYP702A6</italic>. <italic>CYP705A1, PEN1</italic>, <italic>CYP705A4</italic>, <italic>AT4G15390</italic> and <italic>BIA1</italic> were not covered by any common CNV. In the tirucalladienol gene cluster, the CNVs accumulated in the 5&#x2019; part of the cluster, and none of them overlapped with <italic>SCPL1</italic>. Notably, upstream of the tirucalladienol gene cluster, a region genetically divergent from the surrounding genomic segments, called a hotspot of rearrangements, was previously described (<xref ref-type="bibr" rid="B30">Jiao and Schneeberger, 2020</xref>). Smaller hotspots of rearrangements were also found between <italic>CYP716A1</italic> and <italic>AT5G36130</italic> in the same MGC as well as in one variable segment of the arabidiol/baruol gene cluster. It was demonstrated that the hotspots of rearrangements are highly variable in the Arabidopsis population, which was in agreement with the observed increased CNV rate in these genomic regions. The CNV arrangement in the marneral gene cluster was strikingly different from that in any other MGC in that all variants were intergenic and did not overlap with the marneral cluster genes.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Copy number variation of Arabidopsis metabolic gene clusters. <bold>(A)</bold> MGC overlap with CNV regions. Colored arrows with white filling denote CYPs. Arrows with dark color filling denote OSCs. Arrows with light color filling denote other types of MGC genes. Intervening genes are in light grey. Noncoding genes are in dark grey. Grey boxes indicate overlap with CNV regions. HR &#x2013; hotspot of rearrangements; <bold>(B)</bold> Number and overlap among the accessions with detected gene copy numbers in each of four MGCs; <bold>(C)</bold> Patterns of gene copy number variation in each MGC. Red &#x2013; gain; blue &#x2013; loss, grey &#x2013; no assignment. Names of the genes considered as MGC members are in black; names of the intervening genes are in grey. Source data for histograms are in <xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S6</bold>
</xref>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1104303-g001.tif"/>
</fig>
<p>For each MGC, there were CNVs that overlapped only part of the cluster. This indicated that in some accessions, gene deletions/duplications might have altered MGC composition and consequently affected the entire biosynthetic pathway. To evaluate this possibility, we retrieved copy number data for 31 genes (clustered and intervening genes in all MGCs), each from 1,056 accessions (RD dataset; <xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S3</bold>
</xref>), and supplemented them with multiplex ligation-dependent amplification assays for 232 accessions (MLPA dataset; <xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S4</bold>
</xref>) and droplet digital PCR-based genotyping assays for 20 accessions (ddPCR dataset; <xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S5</bold>
</xref>). We defined the thresholds for detecting duplications and deletions for each data type. Next, we assigned the copy number status of each gene in each accession (&#x201c;REF&#x201d;, &#x201c;LOSS&#x201d; or &#x201c;GAIN&#x201d;) by combining all three datasets (<xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S6</bold>
</xref>). Out of the genotypes assigned with two or three approaches, 98.8% were fully concordant, and most of the remaining discrepancies could be resolved manually (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figures S2-S4</bold>
</xref>; <xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S7</bold>
</xref>). The combined genotyping data for 1,152 accessions were further used to assess and compare MGC variation at the gene level.</p>
<p>Only 28.6% of the assayed accessions had no gene gains or losses in any MGC (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1B</bold>
</xref>). This included 65% of accessions from the German genetic group and 39% of accessions from the Central Europe group. In contrast, the vast majority (at least 90%) of accessions from groups known to be genetically distant from the reference genome (North Sweden, Spain, Italy-Balkan-Caucasus, and Relict groups) displayed gene CNV in at least one MGC. We note that the real number of invariable accessions could be even lower since for 96 accessions, some MGC genes were not genotyped. Altogether, 19 genes were affected: four in the thalianol cluster, one in the marneral cluster, three in the tirucalladienol cluster and 11 in the arabidiol/baruol cluster (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1C</bold>
</xref>). The latter was also most variable in terms of the number of accessions carrying CNVs and the diversity of CNV patterns. For two genes, we detected only copy gains, and for 11, we detected only losses, while six genes were multiallelic (with both gains and losses). As expected, these genes resided in the previously defined variable regions. Remarkably, we did not observe complete loss or gain of the entire MGC in any accession. In the next step, we inspected in more detail the level of diversity of each MGC.</p>
</sec>
<sec id="s2_2">
<title>The compact version of the thalianol gene cluster is predominant and more conserved than the reference-like noncontiguous version</title>
<p>A survey with a combination of RD, MLPA and ddPCR approaches revealed 54 accessions with copy number changes in the thalianol gene cluster, which followed five distinct patterns, and <italic>AT5G47950</italic> was the only invariant gene in all accessions (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2A</bold>
</xref>). The most common (variant A) was the deletion of a region encompassing <italic>AT5G47980</italic> and <italic>CYP705A5</italic>, combined with the deletion of <italic>THAS1</italic>. We detected this variant in 37 accessions from six countries: Sweden (13), Italy (8), Germany (6), Spain (5), Bulgaria (3) and Portugal (2). We also confirmed the existence of two previously reported rare variants (<xref ref-type="bibr" rid="B43">Liu et&#xa0;al., 2020a</xref>). One of them (variant B) was a large deletion spanning <italic>AT5G47980, CYP705A5</italic> and <italic>CYP708A2</italic>. We found this variant in two accessions from Germany (Bch-1, Sp-0), in one from Italy (Etna-2) and in one from Spain (IP-Mon-5). The other one (variant C) was a deletion of a single gene, <italic>CYP708A2</italic>, which we found in five accessions, mainly Relicts, originating from Spain (Can-0, Ped-0, IP-Her-12 and Nac-0) and Portugal (IP-Mos-1). We also found a new type of deletion (variant D) in two Spanish Relicts (IP-Rel-0 and Con-0) and one non-Relict (IP-All-0). The deletion spanned <italic>CYP705A5</italic>, <italic>CYP708A2</italic> and <italic>THAS1</italic> <bold>(</bold>
<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S5</bold>
</xref>). The last variant (variant E) was a duplication of the acyltransferase gene <italic>AT5G47980</italic>, which was found in four accessions from Italy (Mitterberg-1-179, Mitterberg-1-180, Mitterberg-1-183, Mitterberg-2-185) and one from Greece (Olympia-2). The presence of a tandem duplication ~3 kb in size in Mitterberg-2-185 was confirmed by sequence analysis of its <italic>de novo</italic> genomic assembly <bold>(</bold>
<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S6</bold>
</xref>). The duplication spanned the entire <italic>AT5G47980</italic> and its flanks (0.5 kb upstream and 0.7 kb downstream) and differed from its copy only by two mismatches and a 1-bp gap. The predicted protein products of both gene copies were identical and shorter than the reference acyltransferase (404 aa versus 443 aa), but they possessed complete transferase domains (pfam02458).</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Structural variation of thalianol gene cluster. <bold>(A)</bold> Five types of CNVs that change the number of thalianol cluster genes. The position of intervening genes is ignored and they are not shown. Gene orientation is disregarded. <bold>(B)</bold> Two versions of thalianol gene cluster organization. Clustered genes are in black; interfering genes are in white. <bold>(C)</bold> The frequency of the two thalianol gene cluster versions (discontiguous and compact) among the genetic groups. <bold>(D)</bold> Rate of copy number polymorphism within discontiguous and compact clusters. <bold>(E)</bold> Frequency of variants presented in <bold>(A)</bold> among the accessions with different cluster organizations. The number of presented accessions in panels is 1,152 for <bold>(A)</bold> &#x2013; genotyping, 997 for <bold>(B, C)</bold> &#x2013; inversion detection and 992 for <bold>(D, E)</bold> &#x2013; the intersection of the above.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1104303-g002.tif"/>
</fig>
<p>In the Mitterberg-2-185 assembly, we also detected a chromosomal inversion (with respect to the reference genome orientation) spanning <italic>AT5G47950</italic> and two intervening genes, <italic>RABA4C</italic> and <italic>AT5G47970</italic>. This resulted in a more compact cluster organization compared to the reference (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2B</bold>
</xref>). Similar inversions were previously detected in 17 other accessions (out of 22 analyzed), which indicated that the compact version of the thalianol gene cluster might be predominant in Arabidopsis (<xref ref-type="bibr" rid="B43">Liu et&#xa0;al., 2020a</xref>). To verify this possibility, we set up a bioinformatic pipeline for detecting genomic inversions based on paired-end genomic read analysis in 997 accessions. We correctly detected inversions in 12 out of 15 previously analyzed accessions, which indicated the good sensitivity of our method. Altogether, we found inversions, 12.8 kb to 15.4 kb in size, spanning the <italic>AT5G47950</italic>, <italic>RBAA4C</italic> and A<italic>T5G47970</italic> genes in 649 accessions (65%), which fully confirmed our predictions (<xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S8</bold>
</xref>). The compact version of the thalianol gene cluster was dominant in the South and North Sweden genetic groups as well as in the Asia group (83.6% to 88.9%), while the discontiguous version was mainly observed among the U.S.A. accessions and was also slightly more abundant in the Spain genetic group (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2C</bold>
</xref>). There was a similar frequency of discontiguous and compact versions among the Relicts (12 and 10 accessions, respectively). Interestingly, the CNV frequency substantially differed between the accessions with different cluster organization (<xref ref-type="fig" rid="f2">
<bold>Figures&#xa0;2D, E</bold>
</xref>
<bold>)</bold>. The compact cluster was more conserved; copy number changes (variants B and E) affected only 1.1% of the accessions in this group. The remaining variants, including deletions spanning the <italic>THAS1</italic> signature gene, were found exclusively among the accessions with the reference-like cluster type. Altogether, 12.7% of accessions with discontiguous clusters were affected by CNVs.</p>
</sec>
<sec id="s2_3">
<title>Marneral and tirucalladienol gene clusters display little structural variation</title>
<p>Analysis of RD and MLPA data confirmed exceptionally low variability of marneral cluster genes. One private variant, which we detected in Mir-0 and confirmed by Sanger sequencing, was 1.2 kb in size and spanned the first exon of the <italic>CYP705A12</italic> gene, which resulted in the truncation of its predicted protein product <bold>(</bold>
<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S7</bold>
</xref>). Apart from that, we did not detect any common gene duplications or deletions within this MGC. Likewise, we observed low variation in the tirucalladienol gene cluster. In 15 accessions (1.4%), deletions or duplications occurred in the region spanning the <italic>AT5G36130</italic>, <italic>CYP716A2</italic> and <italic>PEN3</italic> genes and affected one, two or all of them. Differences between the countries indicated that these structural variants were of local origin (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure&#xa0;8</bold>
</xref>). Sequence analysis of <italic>de novo</italic> genomic assemblies for Ty-1 and Dolna-1-40 confirmed the predicted deletion patterns in these accessions. It should be noted that, according to a recent study, <italic>AT5G36130</italic> and <italic>CYP716A2</italic> gene models are misannotated, and they jointly encode a single protein of the CYP716A subfamily with cytochrome oxidase activity (<xref ref-type="bibr" rid="B79">Yasumoto et&#xa0;al., 2016</xref>) (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S9</bold>
</xref>). Therefore, a full-length gene was absent from all 15 accessions with CNVs in the tirucalladienol gene cluster (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1C</bold>
</xref>).</p>
</sec>
<sec id="s2_4">
<title>Intraspecies variation in the arabidiol/baruol gene cluster reveals a novel OSC gene</title>
<p>The arabidiol/baruol gene cluster was the most heterogeneous of all the MGCs. Consistent with the segmental CNV coverage, there were apparent differences in the variation frequency between the genes. At the cluster&#x2019;s 5&#x2019; end, <italic>CYP702A2</italic> was duplicated in 50 accessions, and <italic>CYP702A3</italic> was deleted in 564 accessions, including approximately 70% of all analyzed accessions from Sweden and Spain. In contrast, genes located at the 3&#x2019; end of the cluster showed little variation. There were <italic>CYP702A5</italic> deletions in 35 accessions, <italic>CYP705A4</italic> deletions in two accessions, and <italic>BIA1</italic> deletion in one accession, while <italic>CYP702A6</italic> and <italic>AT4G15390</italic> were invariable in copy number.</p>
<p>The two OSCs, <italic>PEN1</italic> and <italic>BARS1</italic>, were located in segments with opposite variation levels. <italic>PEN1</italic> and the neighboring gene <italic>CYP705A1</italic>, both implicated in the arabidiol biosynthesis pathway, were stable in copy number, except for three accessions with full or partial gene deletions: the Qui-0 and IP-Deh-1 accessions from Spain and the Kyoto accession from Japan. In the latter, we confirmed partial deletion of both genes by analysis of its <italic>de novo</italic> genomic assembly (<xref ref-type="bibr" rid="B30">Jiao and Schneeberger, 2020</xref>). In contrast, <italic>BARS1</italic>, <italic>CYP705A2</italic> and <italic>CYP705A3</italic> were all deleted in several accessions originating from Sweden. We also observed smaller deletions or duplications in this genomic segment, of which the most remarkable was the duplication of <italic>CYP705A2</italic>, detected in 433 (37.6%) accessions. Since the genotypic data for <italic>CYP705A2</italic> and <italic>BARS1</italic> were noisy and indicated more variation than could be revealed by our standard genotyping, we manually inspected short read genomic data that mapped in this region (examples are presented in <xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S10</bold>
</xref>). In most accessions, <italic>BARS1</italic> lacked the largest intron, where the <italic>ATREP11</italic> TE (RC/Helitron superfamily) is annotated, which might explain the lower RD values for <italic>BARS1</italic> compared to other genes (see <xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S3</bold>
</xref>). Surprisingly, we also observed a mix of reads mapping to <italic>CYP705A2</italic> and <italic>BARS1</italic> loci with and without mismatches in a large number of accessions. Thus, we called SNPs in the coding sequences of both genes to obtain more information on their diversity. Numerous heterozygous SNPs were called in both genes in the above accessions. Because Arabidopsis is a self-pollinating species and therefore highly homozygous, we hypothesized that the reads with mismatches originated from duplicated loci, which showed similarity to <italic>CYP705A2</italic> and <italic>BARS1</italic> and mapped to the reference gene models, resulting in heterozygous SNP calls. In support of this hypothesis, we detected heterozygous SNPs at the <italic>CYP705A2</italic> locus in 90.6% of accessions with this gene&#x2019;s duplication but only in 10.7% of accessions without changes in its copy number (Wilcoxon rank sum test with continuity correction, <italic>p</italic> value &lt;2.2&#xd7;10<sup>-16</sup>; <xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S11A</bold>
</xref>). Additionally, heterozygous SNPs at the <italic>BARS1</italic> locus were present in the same accessions (Pearson&#x2019;s correlation coefficient r = 0.86; <xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S11B</bold>
</xref>), although we found only one duplication of <italic>BARS1</italic> with our genotyping methods. We concluded that the sequence differences between <italic>BARS1</italic> and its duplicate prevented its detection by RD or MLPA assays. We also observed low but nonzero read coverage and homozygous SNPs at both loci in some accessions with intermediate RD values for <italic>CYP705A2</italic> (RD<sub>mean</sub> = 1.5) and <italic>BARS1</italic> (RD<sub>mean</sub> = 0.6) and with the clear loss of <italic>CYP705A3</italic> (RD<sub>mean</sub> = 0). In agreement with the gene duplication scenario, this could be explained by the presence of <italic>CYP705A2</italic> and <italic>BARS1</italic> duplicates but absence of the entire region spanning the reference genes <italic>CYP705A2</italic>, <italic>CYP705A3</italic> and <italic>BARS1</italic>.</p>
<p>To identify the cryptic <italic>BARS1</italic> duplication, we analyzed genomic assemblies of seven accessions: An-1, Cvi-0, Kyoto, Ler-0, C24, Eri-1 and Sha (<xref ref-type="bibr" rid="B30">Jiao and Schneeberger, 2020</xref>), four of which were also genotyped in our study (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3A</bold>
</xref>). We reannotated the entire arabidiol/baruol cluster region in each accession and compared it with the reference (<xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S9</bold>
</xref>). In six accessions, <italic>BARS1</italic> lacked the largest intron, as indicated earlier by short read data (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S12</bold>
</xref>). In the Cvi-0, Eri-1 and Ler-0 accessions, we identified a nonreference gene encoding a protein with ~91% identity to baruol synthase 1 (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S13</bold>
</xref>). In C24, it was also present but interrupted by ATCOPIA52 retrotransposon insertion, resulting in two shorter ORFs. Based on phylogenetic analysis, we concluded that the identified gene was indeed a <italic>BARS1</italic> duplicate, and we named it <italic>BARS2</italic> (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3B</bold>
</xref>). The differences in the exons of the <italic>BARS1</italic> and <italic>BARS2</italic> sequences matched the heterozygous SNP positions very well (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S14</bold>
</xref>). Their introns were much more divergent, which likely affected RD genotyping. Likewise, the probe targeting the <italic>BARS1</italic> locus was located in a highly divergent region, which prevented us from detecting this duplication with MLPA.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>
<italic>BARS2</italic> is a <italic>BARS1</italic> duplicate absent from the reference genome and encodes oxidosqualene synthase. <bold>(A)</bold> Organization of arabidiol/baruol gene cluster in Col-0 and seven nonreference accessions. The genomic insertion including <italic>CYP705A2a</italic> and <italic>BARS2</italic> genes is marked with a triangle above the reference cluster. <bold>(B)</bold> Phylogeny of amino acid sequences of clade II OSCs residing in clusters. BARS1 ortholog from <italic>A.lyrata</italic> (LOC9306317) is included. The maximum likelihood tree was generated using the MEGA11 package with Jones-Taylor (JTT) substitution matrix and uniform rates among sites. Values along branches are frequencies obtained from 1000 bootstrap replications. <bold>(C)</bold> Conserved protein domains encoded in <italic>BARS1</italic> (Col-0) and <italic>BARS2</italic> (Cvi-0, Eri-1, Ler-0) genes. SQHop_cyclase_N - squalene-hopene cyclase N-terminal domain (Pfam 13249). SQHop_cyclase_C - squalene-hopene cyclase C-terminal domain (pfam13243) <bold>(D)</bold> 3D models of baruol synthase proteins encoded by <italic>BARS1</italic> and <italic>BARS2</italic>, predicted by ColabFold software, superposed with the crystal structure of human oxidosqualene cyclase in a complex with lanosterol (LAN). The enlargement box highlights the positions of the catalytic aspartate residue in the predicted models. Colors mark superposed models: green (Col-0 BARS1 isoform NP_193272.1), red (Col-0 BARS1 isoform NP_001329547.1), purple (Cvi-0 BARS1 ATCVI-4G38020), grey (Cvi-0 BARS2 ATCVI-4G38110) and yellow (human OSC PDB ID: 1W6K).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1104303-g003.tif"/>
</fig>
<p>The proteins encoded by <italic>BARS2</italic> in Cvi-0, Eri-1 and Ler-0 possessed both N-terminal and C-terminal squalene-hopene cyclase domains, typical for OSCs (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3C</bold>
</xref>). We performed three-dimensional (3D) modeling of two reference (Col-0) isoforms of baruol synthase 1 (the product of <italic>BARS1</italic>) and its counterpart from Relict Cvi-0 as well as putative baruol synthase 2 (the product of <italic>BARS</italic>2) from Cvi-0 using ColabFold software. Next, we superposed these models with the experimental crystal structure of human OSC, available in a complex with its reaction product lanosterol (<xref ref-type="bibr" rid="B70">Thoma et&#xa0;al., 2004</xref>; <xref ref-type="bibr" rid="B31">Jumper et&#xa0;al., 2021</xref>) (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Information</bold>
</xref>). All structures were highly similar, and we were able to identify potential substrate-binding cavities in the plant enzymes (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3D</bold>
</xref>; <xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S10</bold>
</xref>). Notably, the catalytic aspartate residue D455 present in the human cyclase had its counterparts in the plant OSCs: D493 in the reference isoform NP_193272.1 and D490 in the remaining proteins (<xref ref-type="supplementary-material" rid="SF3">
<bold>Supplemental Data 1-5</bold>
</xref>). Together, our data indicated that <italic>BARS2</italic> encoded a novel, thus far uncharacterized OSC. As expected, we also found <italic>CYP705A2</italic> duplication in the C24, Cvi-0, Eri-1 and Ler-0 assemblies, and we named it <italic>CYP705A2a</italic>. It had 84% identity with <italic>CYP705A2</italic> at the nucleotide level and 88% similarity at the protein level (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S15</bold>
</xref>). <italic>CYP705A2a</italic> and <italic>BARS2</italic> were adjacent to each other and located on the minus strand of the large genomic sequence insertion between <italic>CYP702A6</italic> and <italic>BIA</italic> genes (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3A</bold>
</xref>), next to an ~5 kb long interspersed nuclear element 1 (LINE-1) retrotransposon and some shorter, undefined ORFs. The presence of the insertion increased the size of the entire arabidiol/baruol gene cluster by 21-27 kb.</p>
</sec>
<sec id="s2_5">
<title>Structural diversity of the arabidiol/baruol gene cluster is associated with the climatic gradient and root growth variation</title>
<p>In the next step, we used the results from the SNP analysis to evaluate the presence/absence variation of both reference (<italic>CYP705A2</italic> + <italic>BARS1</italic>) and nonreference (<italic>CYP705A2a</italic> + <italic>BARS2</italic>) gene pairs in the Arabidopsis population (<xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S11</bold>
</xref>). The group with only the reference gene pair present was the largest (PP-AA; 628 accessions). Nearly one-third of the population had both gene pairs (PP-PP; 326 accessions). We also separated two smaller groups with the local range of occurrence. The first one, with only the nonreference gene pair, was found in Azerbaijan, Spain, Bulgaria, Russia, Serbia and the U.S.A. (AA-PP; 14 accessions). The last group, where we did not detect any of these genes, was mostly observed at the Bothnian Bay coast collection site in North Sweden (AA-AA; 15 accessions). For 73 accessions, the data were inconclusive. The accuracy of group assignments was validated by sequence analysis of <italic>de novo</italic> genomic assemblies for An-1, Kyoto, Mitterberg-2-185 and Kn-0 (PP-AA group) as well as Cvi-0, Ler-0, Dolna-1-40 and Ty-1 (PP-PP group). Additionally, the results of PCR amplification with gene-specific primers and genomic DNA template for a subset of 36 accessions from all four groups confirmed the differences between them (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S16</bold>
</xref>). We could not detect <italic>BARS2</italic>-specific products in many samples from the AA-PP group; however, we did detect the band for <italic>CYP705A2a</italic>. We suppose that the <italic>BARS2</italic> sequence might further diverge in this minor group.</p>
<p>The accessions with the nonreference gene pair (AA-PP; PP-PP) dominated among Relicts (81%) and among the Spain (60%) and Italy/Balkan/Caucasus (89.6%) genetic groups but constituted the minority at the northern and eastern margins of the species range (North Sweden 18.6%, South Sweden 16%, Asia 9.4%; <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4B</bold>
</xref>). They were also mostly absent among U.S.A. accessions. The widespread presence of <italic>CYP705A2a</italic> and <italic>BARS2</italic> genes in Relicts suggested that the duplication event preceded the recent massive species migration, which took place in the postglacial period and shaped the current Arabidopsis population structure (<xref ref-type="bibr" rid="B39">Lee et&#xa0;al., 2017</xref>). We next visualized the four groups in principal component analysis (PCA) plots generated with genome-wide biallelic SNPs (<xref ref-type="bibr" rid="B1">1001 Genomes Consortium, 2016</xref>; <xref ref-type="bibr" rid="B83">Zmienko et&#xa0;al., 2020</xref>). At a low linkage disequilibrium parameter, where the contribution of the ancestral alleles to PCA was highest, there was a clear convergence of the PC1 and PC2 components with the presence/absence of gene duplication (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4C</bold>
</xref>; <xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S17</bold>
</xref>). This suggested that the presence/absence of the genomic insertion containing <italic>CYP705A2a</italic> and <italic>BARS2</italic> genes had some impact on the current geographic distribution of the Arabidopsis accessions. We then evaluated the accessions&#x2019; latitudes of origin and found that accessions with the nonreference gene pair originated from significantly lower latitudes compared to the remaining accessions (one-way rank-based analysis of variance, ANOVA, <italic>p</italic> value&lt;0.001, followed by Dunn&#x2019;s test with BH correction, <italic>p</italic> value&lt;0.001) (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4D</bold>
</xref>). This difference was noticeable even within individual countries and was significant for Germany, Spain and Italy (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S18</bold>
</xref>). We observed the reverse trend in Russia, where PP-AA accessions were in great excess (88%), and in France; however, we also noticed that PP-AA accessions outnumbered PP-PP accessions in the Pyrenees, Alps and Tian Shan mountain ranges (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Information</bold>
</xref>). This result suggested that there was an association between arabidiol/baruol gene cluster variation and environmental conditions; therefore, we decided to investigate this in the next step. Since climate is a substantial selection factor, we also checked for phenotypic variability between the most abundant PP-AA and PP-PP groups. To this end, we performed two-group comparisons of 516 phenotypic and climatic variables retrieved from the Arapheno database (<xref ref-type="bibr" rid="B64">Seren et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B71">Togninalli et&#xa0;al., 2020</xref>) and focused on those that significantly differed between both groups (Wilcoxon rank sum test with continuity correction, <italic>p</italic> value &lt;0.05) (<xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S12</bold>
</xref>). Notably, we observed differences in 88 climatic variables (<xref ref-type="bibr" rid="B20">Exposito-Alonso et&#xa0;al., 2019</xref>), especially maximal and minimal temperature conditions, precipitation and evapotranspiration (<xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5A</bold>
</xref>). Apart from the climate data, 40 diverse phenotypes varied significantly between both groups. Although some of these differences, e.g., flowering-related phenotypes, might be influenced by another genetic factor, independent from the arabidiol/baruol gene cluster structure (<xref ref-type="bibr" rid="B40">Li et&#xa0;al., 2010</xref>), we paid special attention to root growth-related phenotypes, since all Arabidopsis MGCs are considered to have root-specific expression (<xref ref-type="bibr" rid="B27">Huang et&#xa0;al., 2019</xref>). We observed significant differences between the PP-AA and PP-PP groups in root growth dynamics, which was analyzed during the first week after germination by <xref ref-type="bibr" rid="B7">Bouain et&#xa0;al. (2018)</xref>. More specifically, the roots of PP-PP accessions elongated slower than those of PP-AA accessions (<xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5B</bold>
</xref>). Additionally, PP-PP accessions showed a significantly lower rate of root organogenesis from explants under one of three growth conditions tested in another study (<xref ref-type="bibr" rid="B38">Lardon et&#xa0;al., 2020</xref>) (<xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5C</bold>
</xref>). We next applied a linear mixed model in a genome-wide association study on the same 516 phenotypes to independently evaluate the significance of our observations after correction for the population structure and multiple testing. We used a genome-wide matrix of over 250 thousand biallelic SNPs supplemented with SNP-like encoded information about the gene duplication status (only PP-AA and PP-PP groups were analyzed). Although the association of <italic>CYP705A2a</italic> and <italic>BARS2</italic> presence/absence variation was not statistically significant for any variable we tested, we again obtained the lowest <italic>p</italic> values for the climatic data and root organogenesis phenotypes (<xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5D</bold>
</xref>, <xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S12</bold>
</xref>). We then checked for the genetic interactions between the thalianol and arabidiol/baruol clusters to exclude the possibility that they affected our results, since the distribution of discontiguous and compact versions of the thalianol gene cluster was also strongly associated with latitude (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S19</bold>
</xref>). However, structural variation of the arabidiol/baruol gene cluster better explained the geographical distribution of the accessions. Moreover, variation in thalianol gene cluster organization did not affect the expression of the thalianol biosynthesis genes and had little impact on root growth phenotypic variation (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S20</bold>
</xref>).</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Population-scale diversity of <italic>CYP705A2</italic> and <italic>BARS1</italic> duplication status. <bold>(A)</bold> The sizes of four groups differing by the presence (PP)/absence (AA) of <italic>CYP705A2</italic>-<italic>BARS1</italic> and <italic>CYP705A2</italic>-<italic>BARS2</italic> gene pairs. <bold>(B)</bold> Group distribution among the genetic groups. U.S. accessions from the German group were separated from the remaining accessions. <bold>(C)</bold> Principal component analysis (PCA) plots, generated at linkage disequilibrium LD = 0.3. The first two components are presented. Accessions are colored according to their genetic group (left) or CYP-BARS status (right). U.S. accessions were not included in the analysis, in order to better visualize the remaining groups. PCA plots with other LD parameters are in <xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S17</bold>
</xref>. <bold>(D)</bold> Latitudes of accessions&#x2019; sites of origin, grouped by CYP-BARS status. One-way rank-based analysis of variance ANOVA, <italic>p</italic> value&lt;0.001, followed by Dunn&#x2019;s test with BH correction, **<italic>p</italic> value&lt;0.05 (PP-PP vs AA-PP); ***<italic>p</italic> value &lt; 0.001 (all the other pairwise comparisons).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1104303-g004.tif"/>
</fig>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Phenotypic variation of PP-AA and PP-PP groups. <bold>(A-C)</bold> Two-group comparisons of climatic <bold>(A)</bold>, root growth dynamics <bold>(B)</bold> and root organogenesis <bold>(C)</bold> data between PP-AA (green) and PP-PP (orange) accessions. Stars denote the significance of Wilcoxon rank sum test with continuity correction, *p.value&lt;0.1, **p.value&lt;0.05, ***p.value&lt;0.001. <bold>(D)</bold> Results of a genome-wide association study for PP-AA/PP-PP allelic variation. Study with climatic data is in the grey box <bold>(E)</bold> Tissue specificity of arabidiol/baruol gene cluster expression in Col-0 and Cvi-0. <bold>(F)</bold> Population-level differences in gene expression in leaves among the PP-AA, PP-PP and AA-PP groups. Expression levels are shown as log<sub>2</sub>(TPM+1). Stars denote the significance of one-way rank-based analysis of variance ANOVA, p.value&lt;0.001, followed by Dunn&#x2019;s test with BH correction, **p.value&lt;0.05, ***p.value&lt;0.001. Source data are available in the Arapheno database (plots <bold>A-C</bold>), <xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S12</bold>
</xref> (plot <bold>D</bold>) and <xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S13</bold>
</xref> (plots <bold>E-F</bold>).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1104303-g005.tif"/>
</fig>
<p>In the reference accession Col-0, all genes in the arabidiol/baruol cluster were expressed at low levels and were active almost exclusively in roots (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S21</bold>
</xref>). In search of the possible links between arabidiol/baruol gene cluster structure and phenotypic variation, we investigated <italic>CYP705A2</italic>, <italic>BARS1</italic>, <italic>CYP705A2a</italic> and <italic>BARS2</italic> expression profiles in Col-0 and Cvi-0. We used RNA-Seq data from roots, shoots and leaves, which we retrieved from the studies where these accessions were grown in parallel under standard conditions (<xref ref-type="bibr" rid="B36">Kawakatsu et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B72">van Veen et&#xa0;al., 2016</xref>). We mapped the data to the respective (Col-0 or Cvi-0) annotated genome and compared the gene expression profiles (<xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5E</bold>
</xref>; <xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S13</bold>
</xref>). In both accessions, the arabidiol/baruol gene cluster was silenced in shoots, except for the low activity of acyltransferase gene <italic>AT4G15390</italic>, detected in Cvi-0. Additionally, in both accessions, the clusters were active in roots, and the expression of <italic>AT4G15390</italic> was much stronger than that of the remaining genes. In Cvi-0, genes located in the genomic insertion (<italic>CYP705A2a</italic>, <italic>BARS2</italic> and <italic>ATCVI-4G38100</italic>, the latter encoding a protein with partial similarity to acyltransferase) were also expressed, although at a lower level, compared to the rest of the cluster. Surprisingly, in leaves of Cvi-0, but not Col-0, we also detected transcriptional activity within the arabidiol/baruol gene cluster. Most clustered genes were expressed at lower levels than in Cvi-0 roots, and the transcripts of <italic>CYP705A2</italic>, <italic>CYP705A3</italic> and <italic>BARS1</italic> were barely detectable. However, <italic>ATCVI-4G38100</italic>, <italic>CYP705A2a</italic> and <italic>BARS2</italic> had similar expression in leaves and roots. Taking these observations into account, it should not be excluded that the metabolic products of arabidiol/baruol gene cluster activity in the roots and leaves of the Cvi-0 accession are not identical.</p>
<p>Since the PP-PP group represented a substantial fraction of the Arabidopsis population, we wanted to check whether the gene expression profile, which we observed in leaves of Cvi-0, was ubiquitous among the accessions from this group. To this end, we analyzed RNA-Seq data for 552 accessions mapped against the reference genome (<xref ref-type="bibr" rid="B36">Kawakatsu et&#xa0;al., 2016</xref>), and we compared the <italic>BARS1</italic> expression level between the AA-PP, PP-PP and PP-AA groups. It was significantly higher in accessions with the <italic>CYP705A2a</italic> + <italic>BARS2</italic> gene pair than in the PP-AA group (one-way rank-based analysis of variance, ANOVA, <italic>p</italic> value&lt;0.001, followed by Dunn&#x2019;s test with BH correction, <italic>p</italic> value &lt;0.05) (<xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5F</bold>
</xref>), in agreement with our predictions that <italic>BARS2</italic> was expressed in the leaves of these accessions and that reads from <italic>BARS2</italic> transcripts mapped to the <italic>BARS1</italic> locus, elevating its measured expression level. We also remapped the raw RNA-Seq reads from the Ty-1 and Cdm-1 accessions (PP-PP group), as well as from the Kn-0 and Sha (PP-AA group) accessions to their respective genomic assemblies and separately measured the expression levels of <italic>BARS1</italic> and <italic>BARS2.</italic> As expected, <italic>BARS2</italic> was expressed in the leaves of PP-PP accessions, while <italic>BARS1</italic> was not (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S21</bold>
</xref>).</p>
</sec>
<sec id="s2_6">
<title>Paired terpenoid synthase and cytochrome P450 genes are more variable than nonpaired genes</title>
<p>In many plant genomes, genes encoding terpenoid synthases (TSs, including the OSCs analyzed in our study) are positioned in the vicinity of CYPs more often than expected by chance. Therefore, they frequently exist as TS-CYP pairs (<xref ref-type="bibr" rid="B8">Boutanaev et&#xa0;al., 2015</xref>). TS-CYP pairs located in MGCs had similar (either high or low) copy number diversity and were frequently duplicated or deleted together. We wanted to check whether this observation could be extended to other TS-CYP pairs in the Arabidopsis genome. Therefore, we created a comprehensive list of 48 TSs and 242 CYPs based on trusted sources (<xref ref-type="bibr" rid="B58">Paquette et&#xa0;al., 2000</xref>; <xref ref-type="bibr" rid="B4">Bak et&#xa0;al., 2011</xref>; <xref ref-type="bibr" rid="B54">Nelson and Werck-Reichhart, 2011</xref>; <xref ref-type="bibr" rid="B8">Boutanaev et&#xa0;al., 2015</xref>). We then retrieved information about each gene&#x2019;s copy number diversity among 1,056 accessions (<xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Tables S14-S15</bold>
</xref>). For 13 TSs, including <italic>THAS1</italic> and <italic>BARS1</italic>, we observed gains or losses in at least 1% of accessions. Only 33 CYPs showed such variability, and they represented three clans: CYP71 (26 variable genes out of 151), CYP85 (6 variable genes out of 29) and CYP72 (1 variable gene out of 19). The remaining clans showed very low variability. Next, for each TS, we selected all CYPs within +/- 30-kb distance, which produced 38 pairs between 18 TSs and 27 CYPs, including pairs in thalianol, marneral, tirucalladienol and arabidiol/baruol gene clusters, as well as other putative secondary metabolism clusters, listed in the plantiSMASH resource (<xref ref-type="bibr" rid="B35">Kautsar et&#xa0;al., 2017</xref>). Subsequent group comparisons revealed that TSs and CYPs occurring in pairs were more variable than their nonpaired counterparts (Wilcoxon rank sum test with continuity correction, <italic>p</italic> value&lt;0.01 for TSs, <italic>p</italic> value&lt;0.001 for CYPs).</p>
</sec>
</sec>
<sec id="s3" sec-type="discussion">
<title>Discussion</title>
<p>According to our current understanding of the MGC formation phenomenon, nonrandom gene clustering in eukaryotes is linked with highly dynamic chromosomal regions. Numerous studies have highlighted that structural variations are the main genetic drivers of metabolic profile diversity and MGC evolution in plants (<xref ref-type="bibr" rid="B21">Fan et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B42">Li et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B43">Liu et&#xa0;al., 2020a</xref>; <xref ref-type="bibr" rid="B44">Liu et&#xa0;al., 2020b</xref>; <xref ref-type="bibr" rid="B82">Zhan et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B34">Katz et&#xa0;al., 2021</xref>). These studies suggested that plant MGCs are dynamically evolving and that the genetic mechanisms that originally led to their formation may be captured at the intraspecies genetic variation level. Similar conclusions were drawn from a previous study of the filamentous fungus <italic>Aspergillus fumigatus</italic>, in which secondary metabolic pathway genes were commonly organized into clusters (<xref ref-type="bibr" rid="B41">Lind et&#xa0;al., 2017</xref>). During evolution, new biochemical pathways are tuned and tested by many rounds of natural selection. The analysis of intraspecies MGC variants, which are more recent than the variants found in interspecies comparisons, may provide important insight into the formation of clustered gene architectures and plant metabolic diversity in a small evolutionary time frame. Accordingly, in our study we established that the mechanisms driving gene duplications and deletions contributed to the formation of Arabidopsis MGC in their present form and that they are still involved in shaping their structures. The dynamics of these mechanisms is e.g. marked by the observed extensive variation of the thalianol gene cluster and the arabidiol/baruol gene cluster.</p>
<p>The four MGCs in Arabidopsis are implicated in the biosynthesis of structurally diverse triterpenes and are dated after the &#x3b1; whole-genome duplication event, which occurred in the Brassicaceae lineage ~23-43 Mya (<xref ref-type="bibr" rid="B23">Field et&#xa0;al., 2011</xref>). These MGCs are assembled around the gene(s) encoding clade II OSCs. It has been shown that in various Brassicaceae genomes, clade II OSCs are often colocalized with genes from the CYP705, CYP708 and CYP702 clans and with genes from the acyltransferase IIIa subfamily (<xref ref-type="bibr" rid="B44">Liu et&#xa0;al., 2020b</xref>). Bioinformatic studies have also revealed that TSs and CYPs are paired in plant genomes more frequently than expected (<xref ref-type="bibr" rid="B8">Boutanaev et&#xa0;al, 2015</xref>). We found that in Arabidopsis, the physical proximity of CYPs and TSs was associated with increased CNV rates for these genes compared to the nonpaired ones. This might suggest that the occurrence of such a specific gene mix, combined with the structural instability of its genomic neighborhood, boosted the potential to produce novel metabolic pathways. The four Arabidopsis MGCs had different levels of variation (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>), which generally reflected the phylogeny of clade II OSCs contained in these clusters (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3C</bold>
</xref>). Of them, MRN1 is most divergent in amino acid sequence. It is also mono-functional, i.e., catalyzes the formation of one specific product &#x2013; marneral (<xref ref-type="bibr" rid="B78">Xiong et&#xa0;al., 2006</xref>). Functional studies have indicated a critical role of marneral synthase in Arabidopsis development (<xref ref-type="bibr" rid="B26">Go et&#xa0;al., 2012</xref>). Consistent with these findings, <italic>MRN1</italic> was the only clustered OSC gene, which was not affected by deletions or duplications, in any accession. Additionally, the neighboring CYPs were stable in copy number. Our results indicate that the marneral gene cluster is fixed in the Arabidopsis genome.</p>
<p>The arabidiol/baruol gene cluster was the most variable MGC. It comprises few gene subfamilies but is significantly expanded compared to the sister species <italic>A. lyrata</italic>, which is suggestive of recent duplications. For example, <italic>PEN1</italic> and <italic>BARS1</italic> have only one ortholog in <italic>A. lyrata</italic>, <italic>LOC9306317</italic>. Accordingly, we observed an exceptionally high rate of intraspecific gene gains and losses within this MGC. The segmentation of the arabidiol/baruol gene cluster into variable and invariable gene blocks may result from the ongoing process of selection-driven fixation of the arabidiol subcluster. The products of <italic>PEN1</italic> and <italic>CYP705A1</italic> are involved in the response to jasmonic acid treatment and infection with the root-rot pathogen <italic>Pythium irregulare</italic> (<xref ref-type="bibr" rid="B67">Sohrabi et&#xa0;al., 2015</xref>). Moreover, arabidiol may be further converted to arabidin in the pathway involving acyltransferase encoded by <italic>AT5G47950</italic>, which is located in the thalianol gene cluster (<xref ref-type="bibr" rid="B27">Huang et&#xa0;al., 2019</xref>) and which was also invariable in copy number in the present study. The fixation of genes involved in arabidin biosynthesis may indicate the biological significance of this pathway. CRISPR mutants with a disrupted <italic>AT5G47950</italic> gene has been shown to have significantly shorter roots than wild-type plants, and arabidin did not accumulate in these roots (<xref ref-type="bibr" rid="B3">Bai et&#xa0;al., 2021</xref>). Interestingly, <italic>A. lyrata</italic> is able to convert apo-arabidiol (the product of arabidiol degradation) into downstream compounds, despite the lack of arabidiol synthase (<xref ref-type="bibr" rid="B66">Sohrabi et&#xa0;al., 2017</xref>). This indicates that there may be modularity of the biosynthetic pathways in plants. This modularity might facilitate the assembly of a biosynthesis network and lead to an increase in the repertoire of secondary metabolites produced by the plant. Understanding the complexity of this network may be supported by in-depth analysis of MGC intraspecies variation.</p>
<p>The initial diversity of 2,3-oxidosqualene cyclization products generated by the plant is determined by OSC diversity. Here, we report the discovery of the <italic>BARS2</italic> gene, which was found in numerous accessions but was absent from Col-0; hence, it was absent from the reference genome (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3A</bold>
</xref>). Our data indicated that <italic>BARS2</italic> encodes a functional clade II OSC (<xref ref-type="fig" rid="f3">
<bold>Figures&#xa0;3C, D</bold>
</xref>). Notably, baruol synthase 1 encoded by its closest paralog, <italic>BARS1</italic>, has the lowest product specificity among plant OSCs (<xref ref-type="bibr" rid="B46">Lodeiro et&#xa0;al., 2007</xref>; <xref ref-type="bibr" rid="B25">Ghosh, 2016</xref>). Why some OSCs are highly multifunctional is not well understood. It has been suggested that they are undergoing evolution toward increased product specificity. It has been demonstrated that only two amino acid changes in cycloartenol synthase lead to its conversion into an accurate lanosterol synthase (<xref ref-type="bibr" rid="B45">Lodeiro et&#xa0;al., 2005</xref>). Biochemical characterization of baruol synthase 2 and its comparison with baruol synthase 1 may help reveal the role of particular amino acids in acquiring specificity for given products.</p>
<p>According to our data, the <italic>BARS2</italic> and <italic>CYP705A2a</italic> gene pair may be present in nearly one-third of the Arabidopsis population (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4A</bold>
</xref>), and their presence/absence variation is associated with the climatic gradient and root growth dynamics (<xref ref-type="fig" rid="f5">
<bold>Figures&#xa0;5A-D</bold>
</xref>). In Col-0, MGCs are embedded in local hotspots of three-dimensional chromatin interactions. Their activation in roots and repression in leaves is combined with the distinct chromatin condensation states and nuclear repositioning of MGC regions between these tissues (<xref ref-type="bibr" rid="B55">N&#xfc;tzmann et&#xa0;al., 2020</xref>). Loss of the histone mark H3K27me3 in the <italic>clf/swn</italic> mutant resulted in the loss of interactive domains associated with the thalianol, marneral and arabidiol/baruol cluster regions, indicating that different transcriptional states of these MGCs are strictly regulated by the switches in their conformation. Curiously, in accessions with <italic>CYP705A2a</italic> and <italic>BARS2</italic>, we observed some transcriptional activity of arabidiol/baruol cluster genes in leaves (<xref ref-type="fig" rid="f5">
<bold>Figures&#xa0;5E, F</bold>
</xref>). The presence of an ~25-kb insertion in the arabidiol/baruol gene cluster may alter its structure and affect the epigenetic regulation of its activity. Thus, variation at the epigenetic and transcriptional level might lead to phenotypic differences, which could in turn contribute to local adaptation and eventually affect the global distribution of Arabidopsis accessions. However, additional studies are needed to assess whether the association between <italic>BARS2</italic> and C<italic>YP705A2a</italic> presence/absence variation and the global distribution of Arabidopsis accessions may be linked to the expression of these two genes or to the differences in transcriptional activity of the entire cluster (<xref ref-type="bibr" rid="B74">Wegel et&#xa0;al., 2009</xref>; <xref ref-type="bibr" rid="B81">Yu et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B61">Roul&#xe9; et&#xa0;al., 2022</xref>).</p>
<p>The thalianol gene cluster was the second most variable MGC in our analysis. The first evidence for its structural diversity comes from the study of <xref ref-type="bibr" rid="B43">Liu et&#xa0;al. (2020a)</xref>, who found large deletions affecting thalianol biosynthesis genes in ~2% of the studied accessions. Since our approach was specifically focused on CNV analysis and was duplication-aware, we were able to detect over two times more CNVs in a similar population (4.7%), with 49 accessions carrying gene deletions and five accessions with gene duplications (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2A</bold>
</xref>). Apart from the identification of two new variants &#x2013; one large deletion and a duplication &#x2013; we validated earlier assumptions that the nonreference compact version of the thalianol gene cluster is predominant in Arabidopsis (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2B</bold>
</xref>). Moreover, it is also better conserved than the discontiguous version (<xref ref-type="fig" rid="f2">
<bold>Figures&#xa0;2D, E</bold>
</xref>). It remains to be investigated whether tighter clustering of the thalianol gene cluster may be advantageous in certain environmental conditions or whether it is just less prone to structural variation due to physical constraints.</p>
<p>Triterpenes are high-molecular-weight nonvolatile compounds that are likely to act locally. However, they may be further processed and generate various breakdown products, both volatile and nonvolatile, which may be biologically active (<xref ref-type="bibr" rid="B67">Sohrabi et&#xa0;al., 2015</xref>; <xref ref-type="bibr" rid="B66">Sohrabi et&#xa0;al., 2017</xref>). Compounds of plant origin may also be metabolized by plant-associated microbiota. A recent study demonstrated that various combinations of thalianin, thalianyl fatty acid esters and arabidin attracted or repelled various microbial communities present in the soil and participated in the plant&#x2019;s active selection of root microbiota (<xref ref-type="bibr" rid="B27">Huang et&#xa0;al., 2019</xref>). In fact, a small but significant effect of Arabidopsis genotype on the root microbiome has been demonstrated previously (<xref ref-type="bibr" rid="B10">Bulgarelli et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B47">Lundberg et&#xa0;al., 2012</xref>). In a recent study by <xref ref-type="bibr" rid="B33">Karasov et&#xa0;al. (2022)</xref>, bacterial communities that colonized the leaves of 267 local Arabidopsis populations, assessed at various localizations in Europe, formed two distinct groups strongly associated with the latititude. Specifically, a significant latitudinal cline was observed for the strains of the <italic>Sphingomonas</italic> genus, which is commonly associated with Arabidopsis (<xref ref-type="bibr" rid="B6">Bodenhausen et&#xa0;al., 2013</xref>). Various <italic>Sphingomonas</italic> species possess a range of biodegradative and biosynthetic capabilities (<xref ref-type="bibr" rid="B51">Mohn et&#xa0;al., 1999</xref>; <xref ref-type="bibr" rid="B2">Asaf et&#xa0;al., 2020</xref>). <italic>Sphingomonas</italic> is implicated in promoting Arabidopsis growth, increasing drought resistance and protecting plants against the leaf-pathogenic <italic>Pseudomonas syringae</italic> (<xref ref-type="bibr" rid="B28">Innerebner et&#xa0;al., 2011</xref>; <xref ref-type="bibr" rid="B48">Luo et&#xa0;al., 2019</xref>). Notably, in the study by <xref ref-type="bibr" rid="B33">Karasov et&#xa0;al. (2022)</xref>, the host plant genotype alone could explain 52% to 68% of the observed variance in the phyllosphere microbiota. Moreover, the microbiome type was strongly associated with the dryness index of the local environment based on recent precipitation and temperature data. We propose that the genetic diversity of terpenoid metabolism pathways in Arabidopsis may be interdependent on the diversity of soil bacterial communities present in various environments, and this relationship might play a role in Arabidopsis adaptation to climate-driven selective pressures. Further exploration of MGC diversity may help us understand these biotic interactions.</p>
<p>Currently, the bioinformatic identification of new MGC candidates is mainly based on the combination of physical gene grouping and coexpression analyses. The accuracy and sensitivity of such approaches strongly depend on the abundance of data from various tissues, time points, and environmental conditions (<xref ref-type="bibr" rid="B76">Wisecaver et&#xa0;al., 2017</xref>). We suggest that the analysis of intraspecies genetic and transcriptomic variation may provide a valuable addition to MGC studies. The genome of one individual may not be representative enough to reveal the entire complexity of a given pathway, not to mention the metabolic diversity of the entire species (<xref ref-type="bibr" rid="B36">Kawakatsu et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B65">Shirai et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B83">Zmienko et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B34">Katz et&#xa0;al., 2021</xref>). With the rapid increase in the number of near-to-complete assemblies of individuals&#x2019; genomes facilitated by the development of third-generation sequencing technologies, we are now entering the era of intense exploration of the impressive plasticity of plant metabolic pathways.</p>
</sec>
<sec id="s4" sec-type="materials|methods">
<title>Materials and methods</title>
<sec id="s4_1">
<title>Plant material and DNA samples</title>
<p>Arabidopsis seeds were obtained from The Nottingham Arabidopsis Stock Centre. The seeds were surface-sterilized, vernalized for 3 days, and grown on Jiffy pellets in ARASYSTEM containers (BETATECH) in a growth chamber (Percival Scientific). A light intensity of 175 mmol m<sup>-2</sup> s<sup>-1</sup> with proportional blue, red, and the far red light was provided by a combination of fluorescent lamps (Philips) and GroLEDs red/far red LED Strips (CLF PlantClimatics). Plants were grown for 3 weeks under a 16-h light (22&#xb0;C)/8-h dark (18&#xb0;C) cycle, at 70% RH, nourished with half-strength Murashige &amp; Skoog medium (Serva). Genomic DNA for MLPA and ddPCR assays was extracted from 100 mg leaves with a DNeasy Plant Mini Kit (Qiagen), according to manufacturer&#x2019;s protocol, which included RNase A treatment step.</p>
</sec>
<sec id="s4_2">
<title>RD assays</title>
<p>To determine the boundaries of each MGC, the relevant literature and gene coexpression datasets were surveyed (<xref ref-type="bibr" rid="B22">Fazio et&#xa0;al., 2004</xref>; <xref ref-type="bibr" rid="B78">Xiong et&#xa0;al., 2006</xref>; <xref ref-type="bibr" rid="B77">Xiang et&#xa0;al., 2006</xref>; <xref ref-type="bibr" rid="B46">Lodeiro et&#xa0;al., 2007</xref>; <xref ref-type="bibr" rid="B24">Field and Osbourn, 2008</xref>; <xref ref-type="bibr" rid="B52">Morlacchi et&#xa0;al., 2009</xref>; <xref ref-type="bibr" rid="B23">Field et&#xa0;al., 2011</xref>; <xref ref-type="bibr" rid="B26">Go et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B69">Thimmappa et&#xa0;al., 2014</xref>; <xref ref-type="bibr" rid="B67">Sohrabi et&#xa0;al., 2015</xref>; <xref ref-type="bibr" rid="B79">Yasumoto et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B76">Wisecaver et&#xa0;al., 2017</xref>). TAIR10 genome version and Araport 11 annotations (<xref ref-type="bibr" rid="B13">Cheng et&#xa0;al., 2017</xref>) were used as a reference in all analyses. Short read sequencing data from Arabidopsis 1001 Genomes Project (<xref ref-type="bibr" rid="B1">1001 Genomes Consortium, 2016</xref>) were downloaded from National Center for Biotechnology Information Sequence Read Archive repository (PRJNA273563), processed and mapped to the reference genome as described in (<xref ref-type="bibr" rid="B83">Zmienko et&#xa0;al., 2020</xref>). The gene copy number estimates based on read-depth analysis of short reads (RD dataset) were generated previously and are available at <uri xlink:href="http://athcnv.ibch.poznan.pl">http://athcnv.ibch.poznan.pl</uri>. Accessions BRR57 (ID 504), KBS-Mac-68 (ID 1739), KBS-Mac-74 (ID 1741) and Ull2-5 (ID 6974), which we previously identified as harboring unusually high level of duplications, were removed from the analysis.</p>
</sec>
<sec id="s4_3">
<title>MLPA assays</title>
<p>MLPA probes were designed according to a procedure designed previously and presented in detail in (<xref ref-type="bibr" rid="B62">Samelak-Czajka et&#xa0;al., 2017</xref>). Probe genomic target coordinates are listed in <xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S16</bold>
</xref>. The MLPA assays were performed using 5 ng of DNA template with the SALSA MLPA reagent kit FAM (MRC-Holland). The MLPA products were separated by capillary electrophoresis in an ABI Prism 3130XL analyzer at the Molecular Biology Techniques Facility in the Department of Biology at Adam Mickiewicz University, Poznan, Poland. Raw electropherograms were quality-checked and quantified with GeneMarker v.2.4.2 (SoftGenetics), with peak intensity and internal control probe normalization options enabled. Data were further processed in Excel (Microsoft). To allow easy comparison of RD and MLPA values, the MLPA results were normalized to a median of all samples&#x2019; intensities and then multiplied by 2, separately for each gene/MLPA probe.</p>
</sec>
<sec id="s4_4">
<title>ddPCR assays</title>
<p>Genomic DNA samples were digested with XbaI (Promega). DNA template (2.5 ng) was mixed with 1&#xd7; EvaGreen ddPCR Supermix (Bio-Rad), 200 nM gene-specific primers (<xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S17</bold>
</xref>) and 70 &#x3bc;l of Droplet Generation Oil (Bio-Rad), then partitioned into approximately 18,000 droplets in a QX200 Droplet Generator (Bio-Rad), and amplified in a C1000 Touch Thermal Cycler (Bio-Rad), with the following cycling conditions: 1&#xd7; (95&#xb0;C for 5&#xa0;min), 40&#xd7; (95&#xb0;C for 30 s, 57&#xb0;C for 30 s, 72&#xb0;C for 45 s), 1&#xd7; (4&#xb0;C for 5&#xa0;min, 90&#xb0;C for 5&#xa0;min), with 2&#xb0;C/s ramp rate. Immediately following end-point amplification, the fluorescence intensity of the individual droplets was measured using the QX200 Droplet Reader (Bio-Rad). Positive and negative droplet populations were automatically detected by QuantaSoft droplet reader software (Bio-Rad). For each accession and each gene, the template CNs [copies/&#x3bc;l PCR] were calculated using Poisson statistics, background-corrected based on the no-template control sample and normalized against the data for previously verified non-variable control gene <italic>DCL1</italic>.</p>
</sec>
<sec id="s4_5">
<title>PCR assays</title>
<p>Genomic DNA samples (5 ng) were used as templates in 20 &#x3bc;l reactions performed with PrimeSTAR GXL DNA Polymerase (TaKaRa), according to the manufacturer&#x2019;s instructions, in a three-step PCR. Amplicons (10 ul) were analyzed on 1% agarose with 1kb Gene Ruler DNA ladder (Fermentas). Primer sequences are listed in <xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S17</bold>
</xref>. Primer pairs for <italic>BARS1-BARS2</italic> and <italic>CYP705A2-CYP705A2a</italic> were designed in corresponding genomic regions, that assured primer divergence between the paralogs. However, primers designed for <italic>CYP705A2</italic> produced unspecific bands of ~5kb in many samples. Therefore, this gene was excluded from the analysis.</p>
</sec>
<sec id="s4_6">
<title>Genotype assignments</title>
<p>For MLPA dataset, genotypes were assigned to each gene and each accession based on normalized MLPA values of &#x2264;1 for LOSS genotype and &gt;3 for GAIN genotype. The remaining cases were assigned REF genotype. For RD dataset, the respective RD thresholds were &#x2264;1 for LOSS genotype and &gt;3.4 for GAIN genotype, except for <italic>BARS1</italic>, for which both thresholds were lowered by 0.2. The remaining cases were assigned REF genotype. For ddPCR, genes with normalized CN=0 were assigned LOSS genotype and genes with normalized CN=2 were assigned REF genotype. The RD, MLPA and ddPCR datasets were then combined using the following procedure. For genes and accessions covered by multiple datasets, the final genotype was assigned based on all data. Discordant genotype assignments (21 out of 1,784 covered by multiple datasets) were manually investigated and 19 of them were resolved (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure S4</bold>
</xref>; <xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S7</bold>
</xref>). Out of the remaining 32,000, which were assayed with one method only, the genotype was manually corrected in 13 cases with values very close to the arbitrary threshold, based on population data distribution. Final genotype assignments for each gene and each accession are listed in <xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Table S6</bold>
</xref>.</p>
</sec>
<sec id="s4_7">
<title>Sanger sequencing</title>
<p>The genomic DNA of Mir-0 accession (ID 8337) was used as a template (2 ng) for amplification using PrimeSTAR<sup>&#xae;</sup> GXL DNA Polymerase (TaKaRa), in a 40-&#xb5;l PCR reaction with 0.3 &#xb5;M primers OP009 and OP010, according to general manufacturer instructions. The amplified product, of ~8 kb in length, was purified with DNA Clean &amp; Concentrator (ZYMO Research) and checked by gel electrophoresis and analysis on NanoDrop&#x2122; 2000 Spectrophotometer. The purified product (110 ng) was mixed with 1 ul of sequencing primer Mar02_R and sequenced on ABI Prism 3130XL analyzer at the Molecular Biology Techniques Facility in the Department of Biology at Adam Mickiewicz University, Poznan, Poland. Sequencing files were analyzed with Chromas Lite v. 2.6.6. (Technelysium) software.</p>
</sec>
<sec id="s4_8">
<title>
<italic>De novo</italic> genomic assemblies generation, annotation and analysis</title>
<p>Mitterberg-2-185 and Dolna-1-40 genomic sequences were extracted, sequenced on 1 MinION flowcell (<italic>Oxford Nanopore Technologies</italic>) each and assembled <italic>de novo</italic> with Canu. Genomic sequences of interest (corresponding to thalianol gene cluster for Mitterberg-2-185 and tirucalladienol gene cluster for Dolna-1-40) were then retrieved with megablast (blast-2.10.0+ package) using TAIR10 reference genomic sequence as a query. The remaining <italic>de novo</italic> assemblies were retrieved from the following public databases. The PacBio-based genomic assemblies, gene annotations and orthogroups for An-1, C24, Cvi-0, Eri-1, Kyoto, Ler-0 and Sha accessions, as well as the reference genome coordinates of the hotspots of rearrangements, were downloaded from Arabidopsis 1001 Genomes Project Data Center (MPIPZJiao2020) or retrieved from the corresponding paper (<xref ref-type="bibr" rid="B30">Jiao and Schneeberger, 2020</xref>). Assembled genomic sequences of Ty-1 (PRJEB37258), Cdm-0 (PRJEB40125) and Kn-0 (PRJEB37260) accessions were retrieved from NCBI/Assembly database (<xref ref-type="bibr" rid="B63">Sayers et&#xa0;al., 2022</xref>). Gene prediction was performed with Augustus v.3.3.3 (<xref ref-type="bibr" rid="B68">Stanke and Morgenstern, 2005</xref>) with the following settings: &#x201c;Species <italic>Arabidopsis thaliana&#x201d;</italic>, &#x201c;both strands&#x201d;, &#x201c;few alternative transcripts&#x201d; or &#x201c;none alternative transcripts&#x201d;, &#x201c;predict only complete genes&#x201d;. These parameters were first optimized by gene prediction in the corresponding TAIR 10 genomic sequence and comparison with Araport 11 annotation. For previously annotated assemblies, we added information about the newly predicted genes to existing annotations. The protein sequences of <italic>de novo</italic> predicted genes and the information about their best blast hit in the reference genome are available in <xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Information</bold>
</xref>. The search for conserved domain organization was performed with the online NCBI search tool against Pfam v.33.1 databases. Protein sequence alignment was done with Multalin or EMBL online tools (<xref ref-type="bibr" rid="B15">Corpet, 1988</xref>; <xref ref-type="bibr" rid="B49">Madeira et&#xa0;al., 2019</xref>). TEs were annotated with RepeatMasker software version 4.1.2 (<uri xlink:href="http://www.repeatmasker.org">http://www.repeatmasker.org</uri>), using homology-based method with TAIR10-transposable-elements reference library.</p>
</sec>
<sec id="s4_9">
<title>Identification of chromosomal inversions</title>
<p>The BreakDancerMax program from the BreakDancer package v.1.3.6 (<xref ref-type="bibr" rid="B14">Chen et&#xa0;al., 2009</xref>) was used to detect inversions in each of 997 samples with paired-end data and unimodal insert size distribution. Variants were called separately for each accession and each chromosome. Only calls with lengths within the range 0.5 kbp &#x2013; 50 kbp and with the Confidence Score &gt;35 were retained. Since BreakDancerMax output included numerous overlapping calls for individual accessions, we first minimized its redundancy. From the overlapping regions, we kept one variant with i) the highest Confidence Score, and ii) the highest number of supporting reads. If two or more overlapping variants had the same score and the number of supporting reads number, maximized coordinates of these variants were used. This step was carried out in two iterations, considering the 50% reciprocal overlap of the variants. Then, the inversions that overlapped with the thalianol gene cluster were selected from each genome-wide dataset.</p>
</sec>
<sec id="s4_10">
<title>SNP calling at <italic>CYP705A2</italic> and <italic>BARS1</italic> genes</title>
<p>Variants (SNPs and short indels) were called with DeepVariant v.1.3.0 in WGS mode and merged with GLnexus (<xref ref-type="bibr" rid="B80">Yun et&#xa0;al., 2021</xref>). Analysis was performed for <italic>CYP705A2</italic> and <italic>BARS1</italic> genomic loci. The results were further filtered to include only biallelic variants, that were located in the exons of each gene (for <italic>BARS1</italic>, exon intersections from two transcript models were used). The number of heterozygous positions was then calculated for each accession and each gene. The same procedure was repeated by taking into account only biallelic variants with at least 1% frequency, which resulted in nearly identical results. Both types of analysis led to the selection of the same set of accessions with duplication at both loci.</p>
</sec>
<sec id="s4_11">
<title>Genome-wide SNP analysis</title>
<p>Variants for 983 accessions with known <italic>CYP705A2</italic> + <italic>BARS1</italic> and <italic>CYP705A2a</italic> + <italic>BARS2</italic> pair status were downloaded from the 1001 Genomes Project Data Center (1001genomes_snp-short-indel_only_ACGTN_v3.1.vcf.snpeff file) (<xref ref-type="bibr" rid="B1">1001 Genomes Consortium, 2016</xref>). Data preprocessing was performed using PLINK v.1.90b3w (<uri xlink:href="https://www.cog-genomics.org/plink/1.9/">https://www.cog-genomics.org/plink/1.9/</uri>; <xref ref-type="bibr" rid="B12">Chang et&#xa0;al., 2015</xref>). Variants with missing call rates exceeding value 0.5 and variants with minor allele frequency below 3% were filtered out. The LD parameter for linkage disequilibrium-based filtration was set as follows: indep-pairwise 200&#x2019;kb&#x2019; 25 0.3. For PCA analysis with EIGENSOFT v.7.2.1 (<xref ref-type="bibr" rid="B60">Price et&#xa0;al., 2006</xref>; <xref ref-type="bibr" rid="B59">Patterson et&#xa0;al., 2006</xref>)  at least 130,000 SNPs were used. PCA for a wide LD range between 0.3 - 0.9 was then calculated in a similar manner. U.S.A accessions which only recently separated geographically from the rest of the population (<xref ref-type="bibr" rid="B39">Lee et&#xa0;al., 2017</xref>) were excluded, to ensure better visibility of the remaining accessions. The ggplot2 package was used for data visualization in R v4.0.4 (<uri xlink:href="https://www.r-project.org">https://www.r-project.org</uri>; <xref ref-type="bibr" rid="B75">Wickham, 2016</xref>).</p>
</sec>
<sec id="s4_12">
<title>Genome-wide association study and phenotype analysis</title>
<p>The entire set of 516 phenotypes from 26 studies was downloaded from the Arapheno database on 26 April 2022 (<xref ref-type="bibr" rid="B64">Seren et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B71">Togninalli et&#xa0;al., 2020</xref>). The above genome-wide SNP dataset, to which we added a biallelic variant representing PP-AA or PP-PP group assignment, was used. The IBS kinship matrix was calculated on 954 accessions. Association analysis was performed for each phenotype using a mixed model correcting for population structure using Efficient Mixed-Model Association eXpedited, version emmax-beta-07Mar2010 (<xref ref-type="bibr" rid="B32">Kang et&#xa0;al., 2008</xref>). Input file generation and analysis of the results were performed with PLINK v.1.90b3w and R v4.0.4.</p>
</sec>
<sec id="s4_13">
<title>Analysis of RNA-Seq data</title>
<p>Processed RNA-seq data from leaves for 728 accessions (552 in common with our study) mapped to the reference transcriptome (<xref ref-type="bibr" rid="B36">Kawakatsu et&#xa0;al., 2016</xref>) were downloaded from NCBI/SRA (PRJNA319904), normalized and used to compare <italic>BARS1</italic> expression levels between PP-AA, PP-PP and AA-PP groups. Additionally, raw RNA-Seq reads from leaves were downloaded from the same source for accessions-specific mapping and analysis of Cdm-0, Col-0, Cvi-0, Kn-0, Ty-1 and Sha accessions. Raw RNA-Seq reads from roots and shoots of Col-0 and Cvi-0 accessions were retrieved from BioProject PRJEB14092 (<xref ref-type="bibr" rid="B72">van Veen et&#xa0;al., 2016</xref>). SRA Toolkit v2.8.2. (<uri xlink:href="https://github.com/ncbi/sra-tools">https://github.com/ncbi/sra-tools</uri>) and FastQC v0.11.4 (<uri xlink:href="https://www.bioinformatics.babraham.ac.uk/projects/fastqc/">https://www.bioinformatics.babraham.ac.uk/projects/fastqc/</uri>) were used for downloading the raw reads and for the quality analysis. For Cdm-0, Kn-0 and Ty-1 genomes.gtf files were generated based on Augustus results, that included the annotations for the genes of interest (provided as <xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Information</bold>
</xref>). Raw reads were mapped to the respective genomes using the STAR aligner version 2.7.8a (<xref ref-type="bibr" rid="B16">Dobin et&#xa0;al., 2013</xref>). STAR indices were generated with parameters: &#x201c;&#x2013;runThreadN 24 &#x2013;sjdbOverhang 99 &#x2013;genomeSAindexNbases 12&#x201d;. The following parameters were used for the mapping step: &#x201c;&#x2013;runThreadN 24 &#x2013;quantMode GeneCounts &#x2013;outFilterMultimapNmax 1 &#x2013;outSAMtype BAM SortedByCoordinate &#x2013;outSAMunmapped Within&#x201d;. Bioinfokit v1.0.8 <uri xlink:href="https://zenodo.org/record/3964972#.Yyw6oRzP1hE">https://zenodo.org/record/3964972#.Yyw6oRzP1hE</uri>) was used to convert.gff3 to.gtf files. Transcripts per million (TPM) values and fragments per kilobase exon per million reads (FPKM) with total exon length for each gene were computed in R v4.0.4.</p>
</sec>
<sec id="s4_14">
<title>Analysis of TS-CYP pairs</title>
<p>A list of Arabidopsis CYP genes was created by collecting information from previous studies and acknowledged website resources (Arabidopsis Cytochromes P450; <xref ref-type="bibr" rid="B58">Paquette et&#xa0;al., 2000</xref>; <xref ref-type="bibr" rid="B17">Ehlting et&#xa0;al., 2008</xref>; <xref ref-type="bibr" rid="B53">Nelson, 2009</xref>; <xref ref-type="bibr" rid="B4">Bak et&#xa0;al., 2011</xref>; <xref ref-type="bibr" rid="B54">Nelson and Werck-Reichhart, 2011</xref>; <xref ref-type="bibr" rid="B8">Boutanaev et&#xa0;al., 2015</xref>) (<uri xlink:href="http://www.p450.kvl.dk/p450.shtml">http://www.p450.kvl.dk/p450.shtml</uri>). Genes marked in Araport 11 as pseudogenes were excluded from the further analysis. Genes were assigned to clans and families according to the information from the above resources. A list of TS genes was created based on a previous study (<xref ref-type="bibr" rid="B8">Boutanaev et&#xa0;al., 2015</xref>) and restricted to genes with valid Araport 11 locus. Genotypes were assigned based on criteria defined for RD dataset: (CN =&lt; 1 as losses, CN &gt;=3.4 as gains, the remaining genotypes were classified as unchanged). Genes from thalianol, tirucalladienol, arabidiol/baruol and marneral gene clusters were already genotyped. Gene coordinates were downloaded from Araport 11. All CYP genes positioned at a distance +/- 30 kb from TS gene borders were classified as paired with a given TS gene. Information about predicted secondary metabolism clusters was retrieved from plantiSMASH resource (<xref ref-type="bibr" rid="B35">Kautsar et&#xa0;al., 2017</xref>).</p>
</sec>
<sec id="s4_15">
<title>Prediction and analysis of BARS1 and BARS2 3D protein structures</title>
<p>The three-dimensional structures of the reference baruol synthase 1 proteins NP_193272.1, NP_001329547.1, as well as Cvi-0 proteins encoded by <italic>ATCVI-4G38020</italic> (<italic>BARS1</italic>) and <italic>ATCVI-4G38110</italic> (<italic>BARS2</italic>), were predicted from their amino acid sequences using the AlphaFold2 code through the ColabFold software (<xref ref-type="bibr" rid="B31">Jumper et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B50">Mirdita et&#xa0;al., 2022</xref>). The modeling studies were performed for a single amino acid chain. A crystal structure of human OSC in a complex with lanosterol (ID 1W6K) was retrieved from the Protein Data Bank (<xref ref-type="bibr" rid="B70">Thoma et&#xa0;al., 2004</xref>; <xref ref-type="bibr" rid="B5">Berman et&#xa0;al., 2007</xref>). The SSM algorithm implemented in COOT was used for superpositions of protein models (<xref ref-type="bibr" rid="B37">Krissinel and Henrick, 2004</xref>; <xref ref-type="bibr" rid="B18">Emsley et&#xa0;al., 2010</xref>) (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Information</bold>
</xref>).</p>
</sec>
</sec>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. Sequence data can be found at the National Center for Biotechnology Information (<uri xlink:href="https://www.ncbi.nlm.nih.gov/bioproject/PRJNA273563/">https://www.ncbi.nlm.nih.gov/bioproject/PRJNA273563/</uri>, <uri xlink:href="https://www.ncbi.nlm.nih.gov/bioproject/PRJEB31147/">https://www.ncbi.nlm.nih.gov/bioproject/PRJEB31147/</uri>; <uri xlink:href="https://www.ncbi.nlm.nih.gov/bioproject/PRJEB37258/">https://www.ncbi.nlm.nih.gov/bioproject/PRJEB37258/</uri>; <uri xlink:href="https://www.ncbi.nlm.nih.gov/bioproject/PRJEB40125/">https://www.ncbi.nlm.nih.gov/bioproject/PRJEB40125/</uri>; <uri xlink:href="https://www.ncbi.nlm.nih.gov/bioproject/PRJEB37260/">https://www.ncbi.nlm.nih.gov/bioproject/PRJEB37260/</uri>; <uri xlink:href="https://www.ncbi.nlm.nih.gov/bioproject/PRJNA319904/">https://www.ncbi.nlm.nih.gov/bioproject/PRJNA319904/</uri>; and <uri xlink:href="https://www.ncbi.nlm.nih.gov/bioproject/PRJEB14092/">https://www.ncbi.nlm.nih.gov/bioproject/PRJEB14092/</uri>). Genomic variants can be found in the 1,001 Genomes Project resources (<uri xlink:href="https://1001genomes.org/data/GMI-MPI/releases/v3.1/1001genomes_snpeff_v3.1/">https://1001genomes.org/data/GMI-MPI/releases/v3.1/1001genomes_snpeff_v3.1/</uri>). All phenotyping data and the associated metadata can be found in the AraPheno database (<uri xlink:href="https://arapheno.1001genomes.org/static/database.zip">https://arapheno.1001genomes.org/static/database.zip</uri>). Individual phenotypes with their DOI identifiers can be additionally accessed and downloaded from <uri xlink:href="https://arapheno.1001genomes.org/phenotypes/">https://arapheno.1001genomes.org/phenotypes/</uri>. The original contributions presented in the study are included in the article/<xref ref-type="supplementary-material" rid="s10">
<bold>Supplementary Material</bold>
</xref>, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s6" sec-type="author-contributions">
<title>Author contributions</title>
<p>Conceptualization: AZ. Methodology: MM-Z, PW, and AZ. Investigation: MM-Z, AS, PW, PS, KB, and TI. Software: MM-Z, AS, PW, and MZ. Visualization: MM-Z, KB, and AZ. Formal analysis: MM-Z. Writing &#x2013; original draft: MM-Z, and AZ. Writing &#x2013; review and editing: MM-Z, KB, MF, MZ, and AZ. Supervision: MF, and AZ. Project administration: AZ. Funding acquisition: MF, and AZ. All authors contributed to the article and approved the submitted version.</p>
</sec>
</body>
<back>
<sec id="s7" sec-type="funding-information">
<title>Funding</title>
<p>This work was supported by the National Science Centre (Poland) grants 2014/13/B/NZ2/03837 to MF and 2017/26/D/NZ2/01079 to AZ. TI obtained funding from the support program for Ukrainian researchers under the Agreement between the Polish Academy of Sciences and the U.S. National Academy of Sciences. The funding agencies had no role in the design of the study and collection, analysis, and interpretation of data and in writing the manuscript.</p>
</sec>
<ack>
<title>Acknowledgments</title>
<p>We thank Piotr Koz&#x142;owski for fruitful discussions and comments on the manuscript. Computations were supported in part by PLGrid Infrastructure.</p>
</ack>
<sec id="s8" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s9" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fpls.2023.1104303/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fpls.2023.1104303/full#supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Table_1.xlsx" id="SF1" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet">
<label>Supplemental File 1</label>
<caption>
<p>Supplemental Tables S1-S17.</p>
</caption>
</supplementary-material>
<supplementary-material xlink:href="DataSheet_2.pdf" id="SF2" mimetype="application/pdf">
<label>Supplemental File 2</label>
<caption>
<p>Supplemental information and Supplemental Figures S1-S21.</p>
</caption>
</supplementary-material>
<supplementary-material xlink:href="DataSheet_1.zip" id="SF3" mimetype="application/zip">
<label>Supplementary Data Sheet 15</label>
<caption>
<p>Superposed 3D models of BARS1, BARS2 and human oxidosqualene cyclase proteins.</p>
</caption>
</supplementary-material>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<collab>1001 Genomes Consortium</collab>
</person-group> (<year>2016</year>). <article-title>1,135 genomes reveal the global pattern of polymorphism in <italic>Arabidopsis thaliana</italic>
</article-title>. <source>Cell</source> <volume>166</volume>, <fpage>1</fpage>&#x2013;<lpage>11</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.cell.2016.05.063</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Asaf</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Numan</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Khan</surname> <given-names>A. L.</given-names>
</name>
<name>
<surname>Al-Harrasi</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>
<italic>Sphingomonas</italic>: from diversity and genomics to functional role in environmental remediation and plant growth</article-title>. <source>Crit. Rev. Biotechnol.</source> <volume>40</volume>, <fpage>138</fpage>&#x2013;<lpage>152</lpage>. doi: <pub-id pub-id-type="doi">10.1080/07388551.2019.1709793</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bai</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Fern&#xe1;ndez-Calvo</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Ritter</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>A. C.</given-names>
</name>
<name>
<surname>Morales-Herrera</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Bicalho</surname> <given-names>K. U.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Modulation of arabidopsis root growth by specialized triterpenes</article-title>. <source>New Phytol.</source> <volume>230</volume>, <fpage>228</fpage>&#x2013;<lpage>243</lpage>. doi: <pub-id pub-id-type="doi">10.1111/nph.17144</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bak</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Beisson</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Bishop</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Hamberger</surname> <given-names>B.</given-names>
</name>
<name>
<surname>H&#xf6;fer</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Paquette</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2011</year>). <article-title>Cytochromes p450</article-title>. <source>Arabiopsis Book</source> <volume>9</volume>, <elocation-id>e0144</elocation-id>. doi: <pub-id pub-id-type="doi">10.1199/tab.0144</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Berman</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Henrick</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Nakamura</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Markley</surname> <given-names>J. L.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>The worldwide protein data bank (wwPDB): ensuring a single, uniform archive of PDB data</article-title>. <source>Nucleic Acids Res.</source> <volume>35</volume>, <fpage>D301</fpage>&#x2013;<lpage>D303</lpage>. doi: <pub-id pub-id-type="doi">10.1093/nar/gkl971</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bodenhausen</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Horton</surname> <given-names>M. W.</given-names>
</name>
<name>
<surname>Bergelson</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Bacterial communities associated with the leaves and the roots of <italic>Arabidopsis thaliana</italic>
</article-title>. <source>PloS One</source> <volume>8</volume>, <elocation-id>e56329</elocation-id>. doi: <pub-id pub-id-type="doi">10.1371/journal.pone.0056329</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bouain</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Satbhai</surname> <given-names>S. B.</given-names>
</name>
<name>
<surname>Korte</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Saenchai</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Desbrosses</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Berthomieu</surname> <given-names>P.</given-names>
</name>
<etal/>
</person-group>. (<year>2018</year>). <article-title>Natural allelic variation of the AZI1 gene controls root growth under zinc-limiting condition</article-title>. <source>PloS Genet.</source> <volume>14</volume>, <elocation-id>e1007304</elocation-id>. doi: <pub-id pub-id-type="doi">10.1371/journal.pgen.1007304</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Boutanaev</surname> <given-names>A. M.</given-names>
</name>
<name>
<surname>Moses</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zi</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Nelson</surname> <given-names>D. R.</given-names>
</name>
<name>
<surname>Mugford</surname> <given-names>S. T.</given-names>
</name>
<name>
<surname>Peters</surname> <given-names>R. J.</given-names>
</name>
<etal/>
</person-group>. (<year>2015</year>). <article-title>Investigation of terpene diversification across multiple sequenced plant genomes</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>112</volume>, <fpage>E81</fpage>&#x2013;<lpage>E88</lpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.1419547112</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Boycheva</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Daviet</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Wolfender</surname> <given-names>J. L.</given-names>
</name>
<name>
<surname>Fitzpatrick</surname> <given-names>T. B.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>The rise of operon-like gene clusters in plants</article-title>. <source>Trends Plant Sci.</source> <volume>19</volume>, <fpage>447</fpage>&#x2013;<lpage>459</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.tplants.2014.01.013</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bulgarelli</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Rott</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Schlaeppi</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Ver Loren van Themaat</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Ahmadinejad</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Assenza</surname> <given-names>F.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>Revealing structure and assembly cues for arabidopsis root-inhabiting bacterial microbiota</article-title>. <source>Nature</source> <volume>488</volume>, <fpage>91</fpage>&#x2013;<lpage>95</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nature11336</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Castillo</surname> <given-names>D. A.</given-names>
</name>
<name>
<surname>Kolesnikova</surname> <given-names>M. D.</given-names>
</name>
<name>
<surname>Matsuda</surname> <given-names>S. P.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>An effective strategy for exploring unknown metabolic pathways by genome mining</article-title>. <source>J. Am. Chem. Soc</source> <volume>135</volume>, <fpage>5885</fpage>&#x2013;<lpage>5894</lpage>. doi: <pub-id pub-id-type="doi">10.1021/ja401535g</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chang</surname> <given-names>C. C.</given-names>
</name>
<name>
<surname>Chow</surname> <given-names>C. C.</given-names>
</name>
<name>
<surname>Tellier</surname> <given-names>L. C.</given-names>
</name>
<name>
<surname>Vattikuti</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Purcell</surname> <given-names>S. M.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>J. J.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Second-generation PLINK: rising to the challenge of larger and richer datasets</article-title>. <source>Gigascience</source> <volume>4</volume>, <fpage>7</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s13742-015-0047-8</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheng</surname> <given-names>C. Y.</given-names>
</name>
<name>
<surname>Krishnakumar</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Chan</surname> <given-names>A. P.</given-names>
</name>
<name>
<surname>Thibaud-Nissen</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Schobel</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Town</surname> <given-names>C. D.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Araport11: a complete reannotation of the <italic>Arabidopsis thaliana</italic> reference genome</article-title>. <source>Plant J.</source> <volume>89</volume>, <fpage>789</fpage>&#x2013;<lpage>804</lpage>. doi: <pub-id pub-id-type="doi">10.1111/tpj.13415</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Wallis</surname> <given-names>J. W.</given-names>
</name>
<name>
<surname>McLellan</surname> <given-names>M. D.</given-names>
</name>
<name>
<surname>Larson</surname> <given-names>D. E.</given-names>
</name>
<name>
<surname>Kalicki</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Pohl</surname> <given-names>C. S.</given-names>
</name>
<etal/>
</person-group>. (<year>2009</year>). <article-title>BreakDancer: an algorithm for high-resolution mapping of genomic structural variation</article-title>. <source>Nat. Methods</source> <volume>6</volume>, <fpage>677</fpage>&#x2013;<lpage>681</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nmeth.1363</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Corpet</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>1988</year>). <article-title>Multiple sequence alignment with hierarchical clustering</article-title>. <source>Nucleic Acids Res.</source> <volume>16</volume>, <fpage>10881</fpage>&#x2013;<lpage>10890</lpage>. doi: <pub-id pub-id-type="doi">10.1093/nar/16.22.10881</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dobin</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Davis</surname> <given-names>C. A.</given-names>
</name>
<name>
<surname>Schlesinger</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Drenkow</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zaleski</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Jha</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2013</year>). <article-title>STAR: ultrafast universal RNA-seq aligner</article-title>. <source>Bioinformatics</source> <volume>29</volume>, <fpage>15</fpage>&#x2013;<lpage>21</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/bts635</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ehlting</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Sauveplane</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Olry</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Ginglinger</surname> <given-names>J. F.</given-names>
</name>
<name>
<surname>Provart</surname> <given-names>N. J.</given-names>
</name>
<name>
<surname>Werck-Reichhart</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>An extensive (co-)expression analysis tool for the cytochrome P450 superfamily in <italic>Arabidopsis thaliana</italic>
</article-title>. <source>BMC Plant Biol.</source> <volume>8</volume>, <fpage>47</fpage>. doi: <pub-id pub-id-type="doi">10.1186/1471-2229-8-47</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Emsley</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Lohkamp</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Scott</surname> <given-names>W. G.</given-names>
</name>
<name>
<surname>Cowtan</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Features and development of coot</article-title>. <source>Acta Crystallogr. D. Biol. Crystallogr.</source> <volume>66</volume>, <fpage>486</fpage>&#x2013;<lpage>501</lpage>. doi: <pub-id pub-id-type="doi">10.1107/S0907444910007493</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Erb</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Kliebenstein</surname> <given-names>D. J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Plant secondary metabolites as defenses, regulators, and primary metabolites: The blurred functional trichotomy</article-title>. <source>Plant Physiol.</source> <volume>184</volume>, <fpage>39</fpage>&#x2013;<lpage>52</lpage>. doi: <pub-id pub-id-type="doi">10.1104/pp.20.00433</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Exposito-Alonso</surname> <given-names>M.</given-names>
</name>
<collab>500 Genomes Field Experiment Team</collab>
<name>
<surname>Burbano</surname> <given-names>H. A.</given-names>
</name>
<name>
<surname>Bossdorf</surname> <given-names>O.</given-names>
</name>
<name>
<surname>Nielsen</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Weigel</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Natural selection on the <italic>Arabidopsis thaliana</italic> genome in present and future climates</article-title>. <source>Nature</source> <volume>573</volume>, <fpage>126</fpage>&#x2013;<lpage>129</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41586-019-1520-9</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fan</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Lou</surname> <given-names>Y. R.</given-names>
</name>
<name>
<surname>Leong</surname> <given-names>B. J.</given-names>
</name>
<name>
<surname>Moore</surname> <given-names>B. M.</given-names>
</name>
<name>
<surname>Schenck</surname> <given-names>C. A.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Evolution of a plant gene cluster in <italic>Solanaceae</italic> and emergence of metabolic diversity</article-title>. <source>Elife</source> <volume>9</volume>, <elocation-id>e56717</elocation-id>. doi: <pub-id pub-id-type="doi">10.7554/eLife.56717.sa2</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fazio</surname> <given-names>G. C.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Matsuda</surname> <given-names>S. P. T.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Genome mining to identify new plant triterpenoids</article-title>. <source>J. Am. Chem. Soc</source> <volume>126</volume>, <fpage>5678</fpage>&#x2013;<lpage>5679</lpage>. doi: <pub-id pub-id-type="doi">10.1021/ja0318784</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Field</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Fiston-Lavier</surname> <given-names>A. S.</given-names>
</name>
<name>
<surname>Kemen</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Geisler</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Quesneville</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Osbourn</surname> <given-names>A. E.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Formation of plant metabolic gene clusters within dynamic chromosomal regions</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>108</volume>, <fpage>16116</fpage>&#x2013;<lpage>16121</lpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.1109273108</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Field</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Osbourn</surname> <given-names>A. E.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Metabolic diversification&#x2013;independent assembly of operon-like gene clusters in different plants</article-title>. <source>Science</source> <volume>320</volume>, <fpage>543</fpage>&#x2013;<lpage>547</lpage>. doi: <pub-id pub-id-type="doi">10.1126/science.1154990</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ghosh</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Biosynthesis of structurally diverse triterpenes in plants: the role of oxidosqualene cyclase</article-title>. <source>Proc. Indian Natl. Sci. Acad.</source> <volume>82</volume>, <fpage>1189</fpage>&#x2013;<lpage>1210</lpage>. doi: <pub-id pub-id-type="doi">10.16943/ptinsa/2016/48578</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Go</surname> <given-names>Y. S.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>S. B.</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>H. J.</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Park</surname> <given-names>H. Y.</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>J. K.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>Identification of marneral synthase, which is critical for growth and development in arabidopsis</article-title>. <source>Plant J.</source> <volume>72</volume>, <fpage>791</fpage>&#x2013;<lpage>804</lpage>. doi: <pub-id pub-id-type="doi">10.1111/j.1365-313X.2012.05120.x</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname> <given-names>A. C.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Y. X.</given-names>
</name>
<name>
<surname>Bai</surname> <given-names>Y. C. Y.</given-names>
</name>
<name>
<surname>Reed</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Qu</surname> <given-names>B.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>A specialized metabolic network selectively modulates arabidopsis root microbiota</article-title>. <source>Science</source> <volume>364</volume>, <elocation-id>eaau6389</elocation-id>. doi: <pub-id pub-id-type="doi">10.1126/science.aau6389</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Innerebner</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Knief</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Vorholt</surname> <given-names>J. A.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Protection of <italic>Arabidopsis thaliana</italic> against leaf-pathogenic <italic>Pseudomonas syringae</italic> by <italic>Sphingomonas</italic> strains in a controlled model system</article-title>. <source>Appl. Environ. Microbiol.</source> <volume>77</volume>, <fpage>3202</fpage>&#x2013;<lpage>3210</lpage>. doi: <pub-id pub-id-type="doi">10.1128/AEM.00133-11</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Isah</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Stress and defense responses in plant secondary metabolites production</article-title>. <source>Biol. Res.</source> <volume>52</volume>, <fpage>39</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s40659-019-0246-3</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiao</surname> <given-names>W. B.</given-names>
</name>
<name>
<surname>Schneeberger</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Chromosome-level assemblies of multiple arabidopsis genomes reveal hotspots of rearrangements with altered evolutionary dynamics</article-title>. <source>Nat. Commun.</source> <volume>11</volume>, <fpage>989</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-020-14779-y</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jumper</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Evans</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Pritzel</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Green</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Figurnov</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Ronneberger</surname> <given-names>O.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Highly accurate protein structure prediction with AlphaFold</article-title>. <source>Nature</source> <volume>596</volume>, <fpage>583</fpage>&#x2013;<lpage>589</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41586-021-03819-2</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kang</surname> <given-names>H. M.</given-names>
</name>
<name>
<surname>Zaitlen</surname> <given-names>N. A.</given-names>
</name>
<name>
<surname>Wade</surname> <given-names>C. M.</given-names>
</name>
<name>
<surname>Kirby</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Heckerman</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Daly</surname> <given-names>M. J.</given-names>
</name>
<etal/>
</person-group>. (<year>2008</year>). <article-title>Efficient control of population structure in model organism association mapping</article-title>. <source>Genetics</source> <volume>178</volume>, <fpage>1709</fpage>&#x2013;<lpage>1723</lpage>. doi: <pub-id pub-id-type="doi">10.1534/genetics.107.080101</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Karasov</surname> <given-names>T. L.</given-names>
</name>
<name>
<surname>Neumann</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Shirsekar</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Monroe</surname> <given-names>G.</given-names>
</name>
<collab>PATHODOPSIS Team</collab>
<name>
<surname>Weigel</surname> <given-names>D.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>) (Accessed <access-date>November 2, 2022</access-date>).</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Katz</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J. J.</given-names>
</name>
<name>
<surname>Jaegle</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Ashkenazy</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Abrahams</surname> <given-names>S. R.</given-names>
</name>
<name>
<surname>Bagaza</surname> <given-names>C.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Genetic variation, environment and demography intersect to shape arabidopsis defense metabolite variation across Europe</article-title>. <source>Elife</source> <volume>10</volume>, <elocation-id>e67784</elocation-id>. doi: <pub-id pub-id-type="doi">10.7554/eLife.67784.sa2</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kautsar</surname> <given-names>S. A.</given-names>
</name>
<name>
<surname>Suarez Duran</surname> <given-names>H. G.</given-names>
</name>
<name>
<surname>Blin</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Osbourn</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Medema</surname> <given-names>M. H.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>plantiSMASH: automated identification, annotation and expression analysis of plant biosynthetic gene clusters</article-title>. <source>Nucleic Acids Res.</source> <volume>45</volume>, <fpage>W55</fpage>&#x2013;<lpage>W63</lpage>. doi: <pub-id pub-id-type="doi">10.1093/nar/gkx305</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kawakatsu</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>S. S. C.</given-names>
</name>
<name>
<surname>Jupe</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Sasaki</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Schmitz</surname> <given-names>R. J.</given-names>
</name>
<name>
<surname>Urich</surname> <given-names>M. A.</given-names>
</name>
<etal/>
</person-group>. (<year>2016</year>). <article-title>Epigenomic diversity in a global collection of <italic>Arabidopsis thaliana</italic> accessions</article-title>. <source>Cell</source> <volume>166</volume>, <fpage>492</fpage>&#x2013;<lpage>505</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cell.2016.06.044</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Krissinel</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Henrick</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Secondary-structure matching (SSM), a new tool for fast protein structure alignment in three dimensions</article-title>. <source>Acta Crystallogr. D. Biol. Crystallogr.</source> <volume>60</volume>, <fpage>2256</fpage>&#x2013;<lpage>2268</lpage>. doi: <pub-id pub-id-type="doi">10.1107/S0907444904026460</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lardon</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Wijnker</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Keurentjes</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Geelen</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>The genetic framework of shoot regeneration in arabidopsis comprises master regulators and conditional fine-tuning factors</article-title>. <source>Commun. Biol.</source> <volume>3</volume>, <fpage>549</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s42003-020-01274-9</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname> <given-names>C. R.</given-names>
</name>
<name>
<surname>Svardal</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Farlow</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Exposito-Alonso</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Ding</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Novikova</surname> <given-names>P.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>On the post-glacial spread of human commensal <italic>Arabidopsis thaliana</italic>
</article-title>. <source>Nat. Commun.</source> <volume>8</volume>, <fpage>14458</fpage>. doi: <pub-id pub-id-type="doi">10.1038/ncomms14458</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Bergelson</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Nordborg</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Borevitz</surname> <given-names>J. O.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Association mapping of local climate-sensitive quantitative trait loci in <italic>Arabidopsis thaliana</italic>
</article-title>. <source>Proc. Natl. Acad. Sci. U. S. A.</source> <volume>107</volume>, <fpage>21199</fpage>&#x2013;<lpage>21204</lpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.1007431107</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lind</surname> <given-names>A. L.</given-names>
</name>
<name>
<surname>Wisecaver</surname> <given-names>J. H.</given-names>
</name>
<name>
<surname>Lameirasm</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wiemann</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Palmer</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Keller</surname> <given-names>N. P.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>Drivers of genetic diversity in secondary metabolic gene clusters within a fungal species</article-title>. <source>PloS Biol.</source> <volume>15</volume>, <elocation-id>e2003583</elocation-id>. doi: <pub-id pub-id-type="doi">10.1371/journal.pbio.2003583</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Ramasamy</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Singh</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Hagel</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Dunemann</surname> <given-names>S. M.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>X.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Gene clustering and copy number variation in alkaloid metabolic pathways of opium poppy</article-title>. <source>Nat. Commun.</source> <volume>11</volume>, <fpage>1190</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-020-15040-2</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Cheema</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Vigouroux</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Hill</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Reed</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Paajanen</surname> <given-names>P.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>a). <article-title>Formation and diversification of a paradigm biosynthetic gene cluster in plants</article-title>. <source>Nat. Commun.</source> <volume>11</volume>, <fpage>5354</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-020-19153-6</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Suarez Duran</surname> <given-names>H. G.</given-names>
</name>
<name>
<surname>Harnvanichvech</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Stephenson</surname> <given-names>M. J.</given-names>
</name>
<name>
<surname>Schranz</surname> <given-names>M. E.</given-names>
</name>
<name>
<surname>Nelson</surname> <given-names>D.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>b). <article-title>Drivers of metabolic diversification: how dynamic genomic neighbourhoods generate new biosynthetic pathways in the brassicaceae</article-title>. <source>New Phytol.</source> <volume>227</volume>, <fpage>1109</fpage>&#x2013;<lpage>1123</lpage>. doi: <pub-id pub-id-type="doi">10.1111/nph.16338</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lodeiro</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Schulz-Gasch</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Matsuda</surname> <given-names>S. P. T.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Enzyme redesign: two mutations cooperate to convert cycloartenol synthase into an accurate lanosterol synthase</article-title>. <source>J. Am. Chem. Soc</source> <volume>127</volume>, <fpage>14132</fpage>&#x2013;<lpage>14133</lpage>. doi: <pub-id pub-id-type="doi">10.1021/ja053791j</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lodeiro</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Xiong</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Wilson</surname> <given-names>W. K.</given-names>
</name>
<name>
<surname>Kolesnikova</surname> <given-names>M. D.</given-names>
</name>
<name>
<surname>Onak</surname> <given-names>C. S.</given-names>
</name>
<name>
<surname>Matsuda</surname> <given-names>S. P. T.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>An oxidosqualene cyclase makes numerous products by diverse mechanisms: a challenge to prevailing concepts of triterpene biosynthesis</article-title>. <source>J. Am. Chem. Soc</source> <volume>129</volume>, <fpage>11213</fpage>&#x2013;<lpage>11222</lpage>. doi: <pub-id pub-id-type="doi">10.1021/ja073133u</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lundberg</surname> <given-names>D. S.</given-names>
</name>
<name>
<surname>Lebeis</surname> <given-names>S. L.</given-names>
</name>
<name>
<surname>Paredes</surname> <given-names>S. H.</given-names>
</name>
<name>
<surname>Yourstone</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Gehring</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Malfatti</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>Defining the core <italic>Arabidopsis thaliana</italic> root microbiome</article-title>. <source>Nature</source> <volume>488</volume>, <fpage>86</fpage>&#x2013;<lpage>90</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nature11237</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luo</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Yan</surname> <given-names>T.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>
<italic>Sphingomonas</italic> sp. Cra20 increases plant growth rate and alters rhizosphere microbial community structure of <italic>Arabidopsis thaliana</italic> under drought stress</article-title>. <source>Front. Microbiol.</source> <volume>10</volume>, <elocation-id>1221</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fmicb.2019.01221</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Madeira</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Park</surname> <given-names>Y. M.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Buso</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Gur</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Madhusoodanan</surname> <given-names>N.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>The EMBL-EBI search and sequence analysis tools APIs in 2019</article-title>. <source>Nucleic Acids Res.</source> <volume>47</volume>, <fpage>W636</fpage>&#x2013;<lpage>W641</lpage>. doi: <pub-id pub-id-type="doi">10.1093/nar/gkz268</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mirdita</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Sch&#xfc;tze</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Moriwaki</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Heo</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Ovchinnikov</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Steinegger</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>ColabFold: making protein folding accessible to all</article-title>. <source>Nat. Methods</source> <volume>19</volume>, <fpage>679</fpage>&#x2013;<lpage>682</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41592-022-01488-1</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mohn</surname> <given-names>W. W.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Moore</surname> <given-names>E. R.</given-names>
</name>
<name>
<surname>Muttray</surname> <given-names>A. F.</given-names>
</name>
</person-group> (<year>1999</year>). <article-title>Lessons learned from <italic>Sphingomonas</italic> species that degrade abietane triterpenoids</article-title>. <source>J. Ind. Microbiol. Biotechnol.</source> <volume>23</volume>, <fpage>374</fpage>&#x2013;<lpage>379</lpage>. doi: <pub-id pub-id-type="doi">10.1038/sj.jim.2900731</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Morlacchi</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Wilson</surname> <given-names>W. K.</given-names>
</name>
<name>
<surname>Xiong</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Bhaduri</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Sttivend</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Kolesnikova</surname> <given-names>M. D.</given-names>
</name>
<etal/>
</person-group>. (<year>2009</year>). <article-title>Product profile of PEN3: The last unexamined oxidosqualene cyclase in <italic>Arabidopsis thaliana</italic>
</article-title>. <source>Org. Lett.</source> <volume>11</volume>, <fpage>2627</fpage>&#x2013;<lpage>2630</lpage>. doi: <pub-id pub-id-type="doi">10.1021/ol9005745</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nelson</surname> <given-names>D. R.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>The cytochrome p450 homepage</article-title>. <source>Hum. Genomics</source> <volume>4</volume>, <fpage>59</fpage>&#x2013;<lpage>65</lpage>. doi: <pub-id pub-id-type="doi">10.1186/1479-7364-4-1-59</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nelson</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Werck-Reichhart</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>A P450-centric view of plant evolution</article-title>. <source>Plant J.</source> <volume>66</volume>, <fpage>194</fpage>&#x2013;<lpage>211</lpage>. doi: <pub-id pub-id-type="doi">10.1111/j.1365-313X.2011.04529.x</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>N&#xfc;tzmann</surname> <given-names>H. W.</given-names>
</name>
<name>
<surname>Doerr</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Ram&#xed;rez-Colmenero</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Sotelo-Fonseca</surname> <given-names>J. E.</given-names>
</name>
<name>
<surname>Wegel</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Di Stefano</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Active and repressed biosynthetic gene clusters have spatially distinct chromosome states</article-title>. <source>Proc. Natl. Acad. Sci. U. S. A.</source> <volume>117</volume>, <fpage>13800</fpage>&#x2013;<lpage>13809</lpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.1920474117</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>N&#xfc;tzmann</surname> <given-names>H. W.</given-names>
</name>
<name>
<surname>Osbourn</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Gene clustering in plant specialized metabolism</article-title>. <source>Curr. Opin. Biotechnol.</source> <volume>26</volume>, <fpage>91</fpage>&#x2013;<lpage>99</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.copbio.2013.10.009</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>N&#xfc;tzmann</surname> <given-names>H. W.</given-names>
</name>
<name>
<surname>Scazzocchio</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Osbourn</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Metabolic gene clusters in eukaryotes</article-title>. <source>Annu. Rev. Genet.</source> <volume>52</volume>, <fpage>159</fpage>&#x2013;<lpage>183</lpage>. doi: <pub-id pub-id-type="doi">10.1146/annurev-genet-120417-031237</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Paquette</surname> <given-names>S. M.</given-names>
</name>
<name>
<surname>Bak</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Feyereisen</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>Intron-exon organization and phylogeny in a large superfamily, the paralogous cytochrome P450 genes of <italic>Arabidopsis thaliana</italic>
</article-title>. <source>DNA Cell Biol.</source> <volume>19</volume>, <fpage>307</fpage>&#x2013;<lpage>317</lpage>. doi: <pub-id pub-id-type="doi">10.1089/10445490050021221</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Patterson</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Price</surname> <given-names>A. L.</given-names>
</name>
<name>
<surname>Reich</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Population structure and eigenanalysis</article-title>. <source>PloS Genet.</source> <volume>2</volume>, <elocation-id>e190</elocation-id>. doi: <pub-id pub-id-type="doi">10.1371/journal.pgen.0020190</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Price</surname> <given-names>A. L.</given-names>
</name>
<name>
<surname>Patterson</surname> <given-names>N. J.</given-names>
</name>
<name>
<surname>Plenge</surname> <given-names>R. M.</given-names>
</name>
<name>
<surname>Weinblatt</surname> <given-names>M. E.</given-names>
</name>
<name>
<surname>Shadick</surname> <given-names>N. A.</given-names>
</name>
<name>
<surname>Reich</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Principal components analysis corrects for stratification in genome-wide association studies</article-title>. <source>Nat. Genet.</source> <volume>38</volume>, <fpage>904</fpage>&#x2013;<lpage>909</lpage>. doi: <pub-id pub-id-type="doi">10.1038/ng1847</pub-id>
</citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Roul&#xe9;</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Christ</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Hussain</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Hartmann</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Benhamed</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>The lncRNA MARS modulates the epigenetic reprogramming of the marneral cluster in response</article-title>. <source>Mol. Plant</source> <volume>15</volume>, <fpage>840</fpage>&#x2013;<lpage>856</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.molp.2022.02.007</pub-id>
</citation>
</ref>
<ref id="B62">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Samelak-Czajka</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Marszalek-Zenczak</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Marcinkowska-Swojak</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Kozlowski</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Figlerowicz</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Zmienko</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>MLPA-based analysis of copy number variation in plant populations</article-title>. <source>Front. Plant Sci.</source> <volume>8</volume>, <elocation-id>222</elocation-id>. doi: <pub-id pub-id-type="doi">10.3389/fpls.2017.00222</pub-id>
</citation>
</ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sayers</surname> <given-names>E. W.</given-names>
</name>
<name>
<surname>Bolton</surname> <given-names>E. E.</given-names>
</name>
<name>
<surname>Brister</surname> <given-names>J. R.</given-names>
</name>
<name>
<surname>Canese</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Chan</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Comeau</surname> <given-names>D. C.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Database resources of the national center for biotechnology information</article-title>. <source>Nucleic Acids Res.</source> <volume>50</volume>, <fpage>D20</fpage>&#x2013;<lpage>D26</lpage>. doi: <pub-id pub-id-type="doi">10.1093/nar/gkab1112</pub-id>
</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Seren</surname> <given-names>&#xdc;</given-names>
</name>
<name>
<surname>Grimm</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Fitz</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Weigel</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Nordborg</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Borgwardt</surname> <given-names>K.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>AraPheno: a public database for <italic>Arabidopsis thaliana</italic> phenotypes</article-title>. <source>Nucleic Acids Res.</source> <volume>45</volume>, <fpage>D1054</fpage>&#x2013;<lpage>D1059</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nar/gkw986</pub-id>
</citation>
</ref>
<ref id="B65">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shirai</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Matsuda</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Nakabayashi</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Okamoto</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Tanaka</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Fujimoto</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>A highly specific genome-wide association study integrated with transcriptome data reveals the contribution of copy number variations to specialized metabolites in <italic>Arabidopsis thaliana</italic> accessions</article-title>. <source>Mol. Biol. Evol.</source> <volume>34</volume>, <fpage>3111</fpage>&#x2013;<lpage>3122</lpage>. doi: <pub-id pub-id-type="doi">10.1093/molbev/msx234</pub-id>
</citation>
</ref>
<ref id="B66">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sohrabi</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Ali</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Harinantenaina Rakotondraibe</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Tholl</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Formation and exudation of non-volatile products of the arabidiol triterpenoid degradation pathway in arabidopsis roots</article-title>. <source>Plant Signal. Behav.</source> <volume>12</volume>, <elocation-id>e1265722</elocation-id>. doi: <pub-id pub-id-type="doi">10.1080/15592324.2016.1265722</pub-id>
</citation>
</ref>
<ref id="B67">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sohrabi</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Huh</surname> <given-names>J. H.</given-names>
</name>
<name>
<surname>Badieyan</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Rakotondraibe</surname> <given-names>L. H.</given-names>
</name>
<name>
<surname>Kliebenstein</surname> <given-names>D. J.</given-names>
</name>
<name>
<surname>Sobrado</surname> <given-names>P.</given-names>
</name>
<etal/>
</person-group>. (<year>2015</year>). <article-title>In planta variation of volatile biosynthesis: an alternative biosynthetic route to the formation of the pathogen-induced volatile homoterpene DMNT <italic>via</italic> triterpene degradation in arabidopsis roots</article-title>. <source>Plant Cell</source> <volume>27</volume>, <fpage>874</fpage>&#x2013;<lpage>890</lpage>. doi: <pub-id pub-id-type="doi">10.1105/tpc.114.132209</pub-id>
</citation>
</ref>
<ref id="B68">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stanke</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Morgenstern</surname> <given-names>B.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>AUGUSTUS: a web server for gene prediction in eukaryotes that allows user-defined constraints</article-title>. <source>Nucleic Acids Res.</source> <volume>33</volume>, <fpage>W465</fpage>&#x2013;<lpage>W467</lpage>. doi: <pub-id pub-id-type="doi">10.1093/nar/gki458</pub-id>
</citation>
</ref>
<ref id="B69">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Thimmappa</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Geisler</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Louveau</surname> <given-names>T.</given-names>
</name>
<name>
<surname>O&#x2019;Maille</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Osbourn</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Triterpene biosynthesis in plants</article-title>. <source>Annu. Rev. Plant Biol.</source> <volume>65</volume>, <fpage>225</fpage>&#x2013;<lpage>257</lpage>. doi: <pub-id pub-id-type="doi">10.1146/annurev-arplant-050312-120229</pub-id>
</citation>
</ref>
<ref id="B70">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Thoma</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Schulz-Gasch</surname> <given-names>T.</given-names>
</name>
<name>
<surname>D&#x2019;Arcy</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Benz</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Aebi</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Dehmlow</surname> <given-names>H.</given-names>
</name>
<etal/>
</person-group>. (<year>2004</year>). <article-title>Insight into steroid scaffold formation from the structure of human oxidosqualene cyclase</article-title>. <source>Nature</source> <volume>432</volume>, <fpage>118</fpage>&#x2013;<lpage>122</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nature02993</pub-id>
</citation>
</ref>
<ref id="B71">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Togninalli</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Seren</surname> <given-names>&#xdc;</given-names>
</name>
<name>
<surname>Freudenthal</surname> <given-names>J. A.</given-names>
</name>
<name>
<surname>Monroe</surname> <given-names>J. G.</given-names>
</name>
<name>
<surname>Meng</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Nordborg</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>AraPheno and the AraGWAS catalog 2020: a major database update including RNA-seq and knockout mutation data for <italic>Arabidopsis thaliana</italic>
</article-title>. <source>Nucleic Acids Res.</source> <volume>48</volume>, <fpage>D1063</fpage>&#x2013;<lpage>D1068</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nar/gkz925</pub-id>
</citation>
</ref>
<ref id="B72">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>van Veen</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Vashisht</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Akman</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Girke</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Mustroph</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Reinen</surname> <given-names>E.</given-names>
</name>
<etal/>
</person-group>. (<year>2016</year>). <article-title>Transcriptomes of eight <italic>Arabidopsis thaliana</italic> accessions reveal core conserved, genotype- and organ-specific responses to flooding stress</article-title>. <source>Plant Physiol.</source> <volume>172</volume>, <fpage>668</fpage>&#x2013;<lpage>689</lpage>. doi: <pub-id pub-id-type="doi">10.1104/pp.16.00472</pub-id>
</citation>
</ref>
<ref id="B73">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wada</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Takahashi</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Altaf-Ul-Amin</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Nakamura</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Hirai</surname> <given-names>M. Y.</given-names>
</name>
<name>
<surname>Ohta</surname> <given-names>D.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>Prediction of operon-like gene clusters in the <italic>Arabidopsis thaliana</italic> genome based on co-expression analysis of neighboring genes</article-title>. <source>Gene</source> <volume>503</volume>, <fpage>56</fpage>&#x2013;<lpage>64</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.gene.2012.04.043</pub-id>
</citation>
</ref>
<ref id="B74">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wegel</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Koumproglou</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Shaw</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Osbourn</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Cell type-specific chromatin decondensation of a metabolic gene cluster in oats</article-title>. <source>Plant Cell</source> <volume>21</volume>, <fpage>3926</fpage>&#x2013;<lpage>3936</lpage>. doi: <pub-id pub-id-type="doi">10.1105/tpc.109.072124</pub-id>
</citation>
</ref>
<ref id="B75">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wickham</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2016</year>). <source>ggplot2</source> (<publisher-name>Springer Cham</publisher-name>). 2nd ed. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-3-319-24277-4</pub-id>
</citation>
</ref>
<ref id="B76">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wisecaver</surname> <given-names>J. H.</given-names>
</name>
<name>
<surname>Borowsky</surname> <given-names>A. T.</given-names>
</name>
<name>
<surname>Tzin</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Jander</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Kliebenstein</surname> <given-names>D. J.</given-names>
</name>
<name>
<surname>Rokas</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>A global coexpression network approach for connecting genes to specialized metabolic pathways in plants</article-title>. <source>Plant Cell</source> <volume>29</volume>, <fpage>944</fpage>&#x2013;<lpage>959</lpage>. doi: <pub-id pub-id-type="doi">10.1105/tpc.17.00009</pub-id>
</citation>
</ref>
<ref id="B77">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Shibuya</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Katsube</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Tsutsumi</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Otsuka</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>H.</given-names>
</name>
<etal/>
</person-group>. (<year>2006</year>). <article-title>A new triterpene synthase from <italic>Arabidopsis thaliana</italic> produces a tricyclic triterpene with two hydroxyl groups</article-title>. <source>Org. Lett.</source> <volume>8</volume>, <fpage>2835</fpage>&#x2013;<lpage>2838</lpage>. doi: <pub-id pub-id-type="doi">10.1021/ol060973p</pub-id>
</citation>
</ref>
<ref id="B78">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiong</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Wilson</surname> <given-names>W. K.</given-names>
</name>
<name>
<surname>Matsuda</surname> <given-names>S. P. T.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>An arabidopsis oxidosqualene cyclase catalyzes iridal skeleton formation by grob fragmentation</article-title>. <source>Angew. Chem. Int. Ed. Engl.</source> <volume>45</volume>, <fpage>1285</fpage>&#x2013;<lpage>1288</lpage>. doi: <pub-id pub-id-type="doi">10.1002/anie.200503420</pub-id>
</citation>
</ref>
<ref id="B79">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yasumoto</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Fukushima</surname> <given-names>E. O.</given-names>
</name>
<name>
<surname>Seki</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Muranaka</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Novel triterpene oxidizing activity of <italic>Arabidopsis thaliana</italic> CYP716A subfamily enzymes</article-title>. <source>FEBS Lett.</source> <volume>590</volume>, <fpage>533</fpage>&#x2013;<lpage>540</lpage>. doi: <pub-id pub-id-type="doi">10.1002/1873-3468.12074</pub-id>
</citation>
</ref>
<ref id="B80">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yun</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Chang</surname> <given-names>P. C.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>M. F.</given-names>
</name>
<name>
<surname>Carroll</surname> <given-names>A.</given-names>
</name>
<name>
<surname>McLean</surname> <given-names>C. Y.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Accurate, scalable cohort variant calls using DeepVariant and GLnexus</article-title>. <source>Bioinformatics</source> <volume>36</volume>, <fpage>5582</fpage>&#x2013;<lpage>5589</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btaa1081</pub-id>
</citation>
</ref>
<ref id="B81">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname> <given-names>N.</given-names>
</name>
<name>
<surname>N&#xfc;tzmann</surname> <given-names>H. W.</given-names>
</name>
<name>
<surname>MacDonald</surname> <given-names>J. T.</given-names>
</name>
<name>
<surname>Moore</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Field</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Berriri</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2016</year>). <article-title>Delineation of metabolic gene clusters in plant genomes by chromatin signatures</article-title>. <source>Nucleic Acids Res.</source> <volume>44</volume>, <fpage>2255</fpage>&#x2013;<lpage>2265</lpage>. doi: <pub-id pub-id-type="doi">10.1093/nar/gkw100</pub-id>
</citation>
</ref>
<ref id="B82">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhan</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Lei</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>X.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Selection of a subspecies-specific diterpene gene cluster implicated in rice disease resistance</article-title>. <source>Nat. Plants</source> <volume>6</volume>, <fpage>1447</fpage>&#x2013;<lpage>1454</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41477-020-00816-7</pub-id>
</citation>
</ref>
<ref id="B83">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zmienko</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Marszalek-Zenczak</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Wojciechowski</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Samelak-Czajka</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Luczak</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Kozlowski</surname> <given-names>P.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>AthCNV: A map of DNA copy number variations in the arabidopsis genome</article-title>. <source>Plant Cell</source> <volume>32</volume>, <fpage>1797</fpage>&#x2013;<lpage>1819</lpage>. doi: <pub-id pub-id-type="doi">10.1105/tpc.19.00640</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>